diff --git a/services/capture/deltacast-bridge/main.c b/services/capture/deltacast-bridge/main.c index 00a5472..977a0ee 100644 --- a/services/capture/deltacast-bridge/main.c +++ b/services/capture/deltacast-bridge/main.c @@ -252,15 +252,32 @@ static void *audio_thread(void *arg) { #endif fcntl(fd, F_SETPIPE_SZ, 1024 * 1024); - /* Reset wall-clock baseline after potentially blocking on open(). */ + /* Reset wall-clock baseline after potentially blocking on open(). + * Only used for the SILENCE fallback path (no hardware audio). */ struct timespec next; clock_gettime(CLOCK_MONOTONIC, &next); + /* Audio-rate telemetry: count samples written per second so drift is + * visible in the log ([audio:N] rate=). At 48 kHz it must + * average 48000. */ + unsigned long dbg_samples = 0; + struct timespec dbg_t0; clock_gettime(CLOCK_MONOTONIC, &dbg_t0); + /* Inner loop: feed audio into the open FIFO until reader exits (EPIPE). */ while (!atomic_load(&g_stop) && !atomic_load(&g_port_stop[ps->port])) { size_t out_bytes = 0; if (have_vhd_audio) { + /* HARDWARE-PACED PATH (the normal case). + * VHD_LockSlotHandle blocks until the board has the next audio + * slot ready — this slot is generated from the SAME SDI signal + * as the video, so blocking here paces audio in lockstep with + * video at the TRUE hardware rate. We write ONLY the real + * samples the board gives us (no silence padding, no wall-clock + * sleep) so the audio timeline length exactly tracks video. + * This is the fix for progressive A/V drift: mixing wall-clock + * paced silence with variable-length real reads made the audio + * stream length diverge from the video stream length. */ r = VHD_LockSlotHandle(stream, &slot); if (r == VHDERR_NOERROR) { ai.pAudioGroups[0].pAudioChannels[0].DataSize = (ULONG)buf_sz; @@ -269,26 +286,46 @@ static void *audio_thread(void *arg) { if (sz > 0 && (size_t)sz <= buf_sz) out_bytes = (size_t)sz; } VHD_UnlockSlotHandle(slot); - } else if (r != VHDERR_TIMEOUT) { + + if (out_bytes > 0) { + if (write_all(fd, buf, out_bytes) < 0) { + fprintf(stderr, "[audio:%u] EPIPE — waiting for next reader\n", ps->port); + break; + } + dbg_samples += out_bytes / FRAME_BYTES; + struct timespec dnow; clock_gettime(CLOCK_MONOTONIC, &dnow); + double el = (dnow.tv_sec - dbg_t0.tv_sec) + (dnow.tv_nsec - dbg_t0.tv_nsec)/1e9; + if (el >= 5.0) { + fprintf(stderr, "[audio:%u] rate=%.1f samples/s (target 48000)\n", + ps->port, dbg_samples / el); + dbg_samples = 0; dbg_t0 = dnow; + } + } + /* No wall-clock sleep — the board's slot cadence is the clock. */ + continue; + } else if (r == VHDERR_TIMEOUT) { + /* No slot yet — loop and try again (do NOT inject silence, + * that would add extra samples and cause drift). */ + continue; + } else { fprintf(stderr, "[audio:%u] lock error %lu — degrading to silence\n", ps->port, r); VHD_StopStream(stream); VHD_CloseStreamHandle(stream); stream = NULL; have_vhd_audio = 0; + clock_gettime(CLOCK_MONOTONIC, &next); /* rebase silence clock */ } } - if (out_bytes == 0) { - memset(buf, 0, tick_bytes); - out_bytes = tick_bytes; - } + /* SILENCE FALLBACK PATH (no hardware audio available). + * Wall-clock paced one-frame-of-silence per video-frame interval so + * ffmpeg's input 1 never starves and audio length still tracks + * real time. */ + memset(buf, 0, tick_bytes); + out_bytes = tick_bytes; if (write_all(fd, buf, out_bytes) < 0) { - /* EPIPE: ffmpeg reader on this port died (session stop/restart). - * Close and break to the outer loop which will reopen and block - * until the next ffmpeg reader connects. - * Do NOT set g_stop — other ports must keep running. */ fprintf(stderr, "[audio:%u] EPIPE — waiting for next reader\n", ps->port); break; } diff --git a/services/capture/src/capture-manager.js b/services/capture/src/capture-manager.js index 71a7a8d..0dc17cc 100644 --- a/services/capture/src/capture-manager.js +++ b/services/capture/src/capture-manager.js @@ -1008,6 +1008,11 @@ exit "$BMXRC" '-filter_complex', filterStr, // Output 0 — ProRes/MOV master (local temp, uploaded to S3 on stop) '-map', '[vhi]', '-map', audioMap, + // Keep raw audio aligned to the video clock. The two raw FIFOs carry + // no timestamps; -af aresample=async lets ffmpeg stretch/squeeze audio + // to correct any tiny rate mismatch so A/V never drifts over a long + // take. Applies to this output's mapped audio stream. + '-af', 'aresample=async=1:min_hard_comp=0.100000:first_pts=0', ...hiresCodecArgs, hiresOutput, // Output 1 — low-latency H.264 HLS preview for the UI monitor.