diff --git a/services/capture/deltacast-bridge/main.c b/services/capture/deltacast-bridge/main.c index 8cd1fc2..9c12c54 100644 --- a/services/capture/deltacast-bridge/main.c +++ b/services/capture/deltacast-bridge/main.c @@ -24,7 +24,7 @@ * * For each port that acquires signal, emits one JSON line to stderr: * {"port":N,"width":W,"height":H,"fps_num":N,"fps_den":D, - * "pix_fmt":"uyvy422","audio_rate":48000,"audio_channels":16, + * "pix_fmt":"uyvy422","audio_rate":48000,"audio_channels":2, * "slot_id":"deltacast--"} * * Compile with -DLEGACY_FIFO=1 to disable shm writes and fall back to @@ -198,14 +198,8 @@ static void *audio_thread(void *arg) { PortState *ps = (PortState *)arg; const int AUDIO_RATE = 48000; - /* The bridge ALWAYS captures the full 16 embedded channels (4 SDI audio - * groups × 1 stereo pair each). Per-recorder channel selection (keep first - * N) happens downstream in the capture ffmpeg via a channelmap — the bridge - * publishes one consistent 16ch s16le interleaved stream per port so a - * single FIFO serves every consumer regardless of how many channels they - * want. */ - enum { GROUPS = 4, CH_PER_GROUP = 2, CHANNELS = GROUPS * CH_PER_GROUP }; /* = 16 */ - const size_t FRAME_BYTES = (size_t)CHANNELS * 2; /* s16le, 16ch */ + const int CHANNELS = 2; + const size_t FRAME_BYTES = (size_t)CHANNELS * 2; /* s16le stereo */ int fps_num = ps->vi.fps_num > 0 ? ps->vi.fps_num : 25; int fps_den = ps->vi.fps_den > 0 ? ps->vi.fps_den : 1; long samples_per_frame = ((long)AUDIO_RATE * fps_den + fps_num / 2) / fps_num; @@ -215,17 +209,10 @@ static void *audio_thread(void *arg) { ULONG max_samples = VHD_GetNbSamples((VHD_VIDEOSTANDARD)ps->video_std, (VHD_CLOCKDIVISOR)ps->clock_div, VHD_ASR_48000, 0); - /* Per-group capture buffer (2ch packed s16le) — one per SDI audio group. - * Sized for the SDK's stereo block size; we extract each group into its - * own gbuf[g] then interleave the 4 groups into the 16ch out buffer. */ ULONG block_size = VHD_GetBlockSize(VHD_AF_16, VHD_AM_STEREO); - size_t gbuf_sz = ((size_t)max_samples + 64) * (block_size ? block_size : 4); - unsigned char *gbuf[GROUPS]; - for (int g = 0; g < GROUPS; g++) { gbuf[g] = calloc(1, gbuf_sz); if (!gbuf[g]) return NULL; } - /* Interleaved 16ch output buffer (and the silence buffer reuses it). */ - size_t out_cap = (size_t)(max_samples + 64) * FRAME_BYTES; - if (out_cap < tick_bytes) out_cap = tick_bytes; - unsigned char *buf = calloc(1, out_cap); + size_t vhd_buf_sz = ((size_t)max_samples + 64) * (block_size ? block_size : FRAME_BYTES); + size_t buf_sz = vhd_buf_sz > tick_bytes ? vhd_buf_sz : tick_bytes; + unsigned char *buf = calloc(1, buf_sz); if (!buf) return NULL; /* Open the VHD audio stream once for the lifetime of the bridge. @@ -250,18 +237,14 @@ static void *audio_thread(void *arg) { VHD_SetStreamProperty(stream, VHD_CORE_SP_TRANSFER_SCHEME, VHD_TRANSFER_SLAVED); VHD_SetStreamProperty(stream, VHD_SDI_SP_INTERFACE, iface); - /* Configure all 4 audio groups as stereo pairs. Each group's packed - * L/R s16le samples land in pAudioGroups[g].pAudioChannels[0].pData; - * channel [1] must still declare Mode+BufferFormat so the SDK - * recognizes the pair. Groups with no embedded audio simply return 0 - * samples and are zero-filled during interleave. */ - for (int g = 0; g < GROUPS; g++) { - ai.pAudioGroups[g].pAudioChannels[0].Mode = VHD_AM_STEREO; - ai.pAudioGroups[g].pAudioChannels[0].BufferFormat = VHD_AF_16; - ai.pAudioGroups[g].pAudioChannels[0].pData = gbuf[g]; - ai.pAudioGroups[g].pAudioChannels[1].Mode = VHD_AM_STEREO; - ai.pAudioGroups[g].pAudioChannels[1].BufferFormat = VHD_AF_16; - } + /* Configure BOTH channels of the stereo pair (group 0). The actual PCM + * samples land in pAudioChannels[0].pData (packed L/R s16le). Channel + * [1] must declare Mode+BufferFormat so the SDK recognizes the pair. */ + ai.pAudioGroups[0].pAudioChannels[0].Mode = VHD_AM_STEREO; + ai.pAudioGroups[0].pAudioChannels[0].BufferFormat = VHD_AF_16; + ai.pAudioGroups[0].pAudioChannels[0].pData = buf; + ai.pAudioGroups[0].pAudioChannels[1].Mode = VHD_AM_STEREO; + ai.pAudioGroups[0].pAudioChannels[1].BufferFormat = VHD_AF_16; if (VHD_StartStream(stream) == VHDERR_NOERROR) { have_vhd_audio = 1; @@ -315,47 +298,10 @@ static void *audio_thread(void *arg) { * stream length diverge from the video stream length. */ r = VHD_LockSlotHandle(stream, &slot); if (r == VHDERR_NOERROR) { - /* Ask the SDK for up to gbuf_sz bytes per group. After - * extraction each group's DataSize holds the bytes actually - * written (2ch s16le). Group 0 paces the frame count; groups - * with no audio report 0 and are zero-filled. */ - for (int g = 0; g < GROUPS; g++) - ai.pAudioGroups[g].pAudioChannels[0].DataSize = (ULONG)gbuf_sz; + ai.pAudioGroups[0].pAudioChannels[0].DataSize = (ULONG)buf_sz; if (VHD_SlotExtractAudio(slot, &ai) == VHDERR_NOERROR) { - /* Group 0 is the AUTHORITATIVE sample count — it paces the - * audio timeline in lockstep with video (same SDI slot - * clock), exactly as the original 2ch path did. We must - * emit EXACTLY group 0's frame count per slot; taking a - * max across groups would occasionally emit extra frames - * and make the audio stream drift LONGER than the video - * (heard as a slight pitch-up). Groups 1-3 are sampled at - * the same rate; any that return fewer bytes are padded - * with silence to group 0's length, never extending it. */ - ULONG g0 = ai.pAudioGroups[0].pAudioChannels[0].DataSize; - size_t frames = (size_t)g0 / 4; /* 2ch * s16 = 4 bytes/frame */ - if (frames > 0) { - size_t need = frames * FRAME_BYTES; - if (need > out_cap) { frames = out_cap / FRAME_BYTES; need = frames * FRAME_BYTES; } - /* Interleave: for each sample frame, emit the 2 - * samples of each group in order → 16ch frame - * [G0L G0R G1L G1R G2L G2R G3L G3R]. Groups shorter - * than `frames` (or absent) contribute silence. */ - int16_t *out = (int16_t *)buf; - for (size_t f = 0; f < frames; f++) { - for (int g = 0; g < GROUPS; g++) { - size_t gframes = (size_t)ai.pAudioGroups[g].pAudioChannels[0].DataSize / 4; - const int16_t *gs = (const int16_t *)gbuf[g]; - if (f < gframes) { - out[f * CHANNELS + g * 2 + 0] = gs[f * 2 + 0]; - out[f * CHANNELS + g * 2 + 1] = gs[f * 2 + 1]; - } else { - out[f * CHANNELS + g * 2 + 0] = 0; - out[f * CHANNELS + g * 2 + 1] = 0; - } - } - } - out_bytes = need; - } + ULONG sz = ai.pAudioGroups[0].pAudioChannels[0].DataSize; + if (sz > 0 && (size_t)sz <= buf_sz) out_bytes = (size_t)sz; } VHD_UnlockSlotHandle(slot); @@ -414,7 +360,6 @@ static void *audio_thread(void *arg) { VHD_CloseStreamHandle(stream); } free(buf); - for (int g = 0; g < GROUPS; g++) free(gbuf[g]); return NULL; } @@ -815,7 +760,7 @@ int main(int argc, char *argv[]) { "\"fps_num\":%d,\"fps_den\":%d," "\"interlaced\":%s," "\"pix_fmt\":\"uyvy422\"," - "\"audio_channels\":16,\"audio_rate\":48000," + "\"audio_channels\":2,\"audio_rate\":48000," "\"device\":%u," "\"slot_id\":\"%s\"}\n", ports[pi], diff --git a/services/capture/src/capture-manager.js b/services/capture/src/capture-manager.js index 9e77cdc..29fe7ac 100644 --- a/services/capture/src/capture-manager.js +++ b/services/capture/src/capture-manager.js @@ -693,18 +693,7 @@ class CaptureManager { const fcFps = process.env.DELTACAST_FRAMERATE || '60000/1001'; const fcInterlaced = process.env.DELTACAST_INTERLACED === '1'; - // The deltacast bridge now publishes a fixed 16-channel s16le stream per - // port (all 4 SDI audio groups). The recorder selects how many of those - // channels to keep in the master — RECORDING_AUDIO_CHANNELS (2/8/16), - // injected by node-agent from the recorder config. We declare the FIFO as - // 16ch on input and KEEP THE FIRST N discrete channels downstream (no - // downmix) via an audio channel-map on the encode output. - const FIFO_CHANNELS = 16; - let wantCh = parseInt(process.env.RECORDING_AUDIO_CHANNELS || '2', 10); - if (!Number.isFinite(wantCh) || wantCh < 1) wantCh = 2; - if (wantCh > FIFO_CHANNELS) wantCh = FIFO_CHANNELS; - - console.log(`[framecache] slot=${slotId} size=${fcSize} fps=${fcFps} audio=${audioFifoPath} ch=${wantCh}/${FIFO_CHANNELS}`); + console.log(`[framecache] slot=${slotId} size=${fcSize} fps=${fcFps} audio=${audioFifoPath}`); // Spawn fc_pipe: opens the framecache slot with its own read cursor and // streams raw UYVY422 frames to stdout. ffmpeg reads from the pipe as @@ -724,37 +713,21 @@ class CaptureManager { return { inputArgs: [ - // fc_pipe stdout → ffmpeg rawvideo input 0 (video) - // DO NOT use -use_wallclock_as_timestamps here. The framecache ring - // delivers frame-accurate 60fps from the SDI clock, so -framerate - // produces correct CFR timestamps from frame 0, immune to ffmpeg - // startup jitter and NVENC cold-start. Wallclock timestamping caused - // wrong framerate in the recorded file (e.g. 56.06 instead of 59.94) - // because arrival-time jitter at ffmpeg startup skewed the PTS. + // fc_pipe stdout → ffmpeg rawvideo input 0 (video). '-thread_queue_size', '512', '-f', 'rawvideo', '-pix_fmt', 'uyvy422', '-video_size', fcSize, '-framerate', fcFps, '-i', 'pipe:0', - // Audio FIFO → ffmpeg input 1. Wall-clock timestamps on the audio - // input are REQUIRED for throughput: without them ffmpeg's audio - // reader has no rate reference on the raw s16le FIFO and the demux - // thread stalls the whole graph (NVENC sat idle at 9% while frames - // dropped). With wallclock, audio is paced by arrival and the master - // -af aresample=async=1 resamples it onto the video CFR timeline so - // A/V length stays locked. The residual ~1% drift that wallclock used - // to cause was actually the all-intra HEVC dropping frames (video - // short); that's fixed by long-GOP HEVC for non-growing records, so - // wallclock is safe again and necessary. - // The FIFO carries the full 16ch the bridge publishes; channel - // SELECTION (keep first N) is applied as an output filter so the - // discrete broadcast channels are preserved, not downmixed. + // Audio FIFO → ffmpeg input 1. The deltacast bridge writes a 2ch s16le + // 48kHz stream paced by the SDI slot clock (same clock as the video), + // so wallclock timestamps + master aresample=async=1 keep A/V locked. '-use_wallclock_as_timestamps', '1', '-thread_queue_size', '512', '-f', 's16le', '-ar', '48000', - '-ac', String(FIFO_CHANNELS), + '-ac', '2', '-i', audioFifoPath, ], isNetwork: false, @@ -762,11 +735,6 @@ class CaptureManager { audioFifo: null, interlaced: fcInterlaced, audioInputIndex: 1, /* audio FIFO is ffmpeg input 1 */ - // Number of source channels available on the FIFO, and how many the - // recorder wants kept (first N). The encode builder turns wantCh into a - // channelmap so the master holds exactly those discrete channels. - sourceAudioChannels: FIFO_CHANNELS, - wantAudioChannels: wantCh, _fcPipeProcess: fcPipeProcess, /* stored for clean stop */ }; } @@ -1058,25 +1026,10 @@ exit "$BMXRC" this._sessionIdForBridge = sessionId; const { inputArgs, isNetwork, bridgeProcess = null, audioFifo = null, interlaced = false, audioInputIndex = 0, - sourceAudioChannels = null, wantAudioChannels = null } = await this._buildInputArgs({ + } = await this._buildInputArgs({ sourceType, sourceBackend, device, port, board, sourceUrl, listen, listenPort, streamKey, }); - // Channel selection for the master: when the source FIFO carries more - // discrete channels than the recorder wants (e.g. 16ch SDI → 2ch master), - // keep the FIRST N channels as discrete streams (no downmix) via a `pan` - // filter `c0=c0|c1=c1|…`. effAudioChannels is what the master container - // actually holds and what `-ac` must declare. - const effAudioChannels = (sourceAudioChannels && wantAudioChannels) - ? Math.min(wantAudioChannels, sourceAudioChannels) - : audioChannels; - const needChannelSelect = !!(sourceAudioChannels && wantAudioChannels && wantAudioChannels < sourceAudioChannels); - const channelSelectFilter = needChannelSelect - ? `pan=${effAudioChannels}c|` + Array.from({ length: effAudioChannels }, (_, i) => `c${i}=c${i}`).join('|') - : null; - // Override the codec channel count so -ac matches the selected layout. - if (sourceAudioChannels && wantAudioChannels) audioChannels = effAudioChannels; - // ── Pre-roll: discard initial unstable frames ──────────────────────────── if (bridgeProcess && (sourceType === 'deltacast' || sourceType === 'blackmagic' || sourceType === 'sdi')) { console.log(`[capture] pre-rolling: discarding ${PRE_ROLL_SECONDS}s of frames`); @@ -1194,18 +1147,11 @@ exit "$BMXRC" // ffmpeg doesn't fail trying to map a nonexistent audio stream. const hasAudio = audioInputIndex >= 0 && !isNetFcPipe; const masterAudioMap = hasAudio ? ['-map', audioMap] : []; - // Master audio: optional first-N channel select (discrete, no downmix), - // then async resample to lock A/V sync. Chain both into one -af. - const masterFilterChain = [ - ...(channelSelectFilter ? [channelSelectFilter] : []), - 'aresample=async=1:min_hard_comp=0.100000:first_pts=0', - ].join(','); - const masterAudioFilter = hasAudio ? ['-af', masterFilterChain] : []; + const masterAudioFilter = hasAudio + ? ['-af', 'aresample=async=1:min_hard_comp=0.100000:first_pts=0'] : []; const hlsAudioMap = hasAudio ? ['-map', audioMap] : []; - // HLS preview is always stereo for browser playback — downmix the first - // pair regardless of how many channels the master keeps. const hlsAudioCodec = hasAudio - ? ['-af', 'pan=stereo|c0=c0|c1=c1', '-c:a', 'aac', '-b:a', '128k', '-ar', '44100', '-ac', '2'] : []; + ? ['-c:a', 'aac', '-b:a', '128k', '-ar', '44100'] : []; hiresArgs = [ ...inputArgs, '-filter_complex', filterStr,