fix(audio): hardware-paced audio (no wall-clock silence mixing) + aresample=async to lock A/V sync

This commit is contained in:
Zac Gaetano 2026-06-02 21:50:11 +00:00
parent 3eacb35c1e
commit 20d913fbad
2 changed files with 52 additions and 10 deletions

View file

@ -252,15 +252,32 @@ static void *audio_thread(void *arg) {
#endif
fcntl(fd, F_SETPIPE_SZ, 1024 * 1024);
/* Reset wall-clock baseline after potentially blocking on open(). */
/* Reset wall-clock baseline after potentially blocking on open().
* Only used for the SILENCE fallback path (no hardware audio). */
struct timespec next;
clock_gettime(CLOCK_MONOTONIC, &next);
/* Audio-rate telemetry: count samples written per second so drift is
* visible in the log ([audio:N] rate=<samples/s>). At 48 kHz it must
* average 48000. */
unsigned long dbg_samples = 0;
struct timespec dbg_t0; clock_gettime(CLOCK_MONOTONIC, &dbg_t0);
/* Inner loop: feed audio into the open FIFO until reader exits (EPIPE). */
while (!atomic_load(&g_stop) && !atomic_load(&g_port_stop[ps->port])) {
size_t out_bytes = 0;
if (have_vhd_audio) {
/* HARDWARE-PACED PATH (the normal case).
* VHD_LockSlotHandle blocks until the board has the next audio
* slot ready this slot is generated from the SAME SDI signal
* as the video, so blocking here paces audio in lockstep with
* video at the TRUE hardware rate. We write ONLY the real
* samples the board gives us (no silence padding, no wall-clock
* sleep) so the audio timeline length exactly tracks video.
* This is the fix for progressive A/V drift: mixing wall-clock
* paced silence with variable-length real reads made the audio
* stream length diverge from the video stream length. */
r = VHD_LockSlotHandle(stream, &slot);
if (r == VHDERR_NOERROR) {
ai.pAudioGroups[0].pAudioChannels[0].DataSize = (ULONG)buf_sz;
@ -269,26 +286,46 @@ static void *audio_thread(void *arg) {
if (sz > 0 && (size_t)sz <= buf_sz) out_bytes = (size_t)sz;
}
VHD_UnlockSlotHandle(slot);
} else if (r != VHDERR_TIMEOUT) {
if (out_bytes > 0) {
if (write_all(fd, buf, out_bytes) < 0) {
fprintf(stderr, "[audio:%u] EPIPE — waiting for next reader\n", ps->port);
break;
}
dbg_samples += out_bytes / FRAME_BYTES;
struct timespec dnow; clock_gettime(CLOCK_MONOTONIC, &dnow);
double el = (dnow.tv_sec - dbg_t0.tv_sec) + (dnow.tv_nsec - dbg_t0.tv_nsec)/1e9;
if (el >= 5.0) {
fprintf(stderr, "[audio:%u] rate=%.1f samples/s (target 48000)\n",
ps->port, dbg_samples / el);
dbg_samples = 0; dbg_t0 = dnow;
}
}
/* No wall-clock sleep — the board's slot cadence is the clock. */
continue;
} else if (r == VHDERR_TIMEOUT) {
/* No slot yet — loop and try again (do NOT inject silence,
* that would add extra samples and cause drift). */
continue;
} else {
fprintf(stderr, "[audio:%u] lock error %lu — degrading to silence\n",
ps->port, r);
VHD_StopStream(stream);
VHD_CloseStreamHandle(stream);
stream = NULL;
have_vhd_audio = 0;
clock_gettime(CLOCK_MONOTONIC, &next); /* rebase silence clock */
}
}
if (out_bytes == 0) {
memset(buf, 0, tick_bytes);
out_bytes = tick_bytes;
}
/* SILENCE FALLBACK PATH (no hardware audio available).
* Wall-clock paced one-frame-of-silence per video-frame interval so
* ffmpeg's input 1 never starves and audio length still tracks
* real time. */
memset(buf, 0, tick_bytes);
out_bytes = tick_bytes;
if (write_all(fd, buf, out_bytes) < 0) {
/* EPIPE: ffmpeg reader on this port died (session stop/restart).
* Close and break to the outer loop which will reopen and block
* until the next ffmpeg reader connects.
* Do NOT set g_stop other ports must keep running. */
fprintf(stderr, "[audio:%u] EPIPE — waiting for next reader\n", ps->port);
break;
}

View file

@ -1008,6 +1008,11 @@ exit "$BMXRC"
'-filter_complex', filterStr,
// Output 0 — ProRes/MOV master (local temp, uploaded to S3 on stop)
'-map', '[vhi]', '-map', audioMap,
// Keep raw audio aligned to the video clock. The two raw FIFOs carry
// no timestamps; -af aresample=async lets ffmpeg stretch/squeeze audio
// to correct any tiny rate mismatch so A/V never drifts over a long
// take. Applies to this output's mapped audio stream.
'-af', 'aresample=async=1:min_hard_comp=0.100000:first_pts=0',
...hiresCodecArgs,
hiresOutput,
// Output 1 — low-latency H.264 HLS preview for the UI monitor.