diff --git a/src/TeamsISO.Engine.NdiInterop/NdiInteropPInvoke.cs b/src/TeamsISO.Engine.NdiInterop/NdiInteropPInvoke.cs
index 3becbdf..84bf2f3 100644
--- a/src/TeamsISO.Engine.NdiInterop/NdiInteropPInvoke.cs
+++ b/src/TeamsISO.Engine.NdiInterop/NdiInteropPInvoke.cs
@@ -139,6 +139,58 @@ public sealed class NdiInteropPInvoke : INdiInterop, IDisposable
}
}
+ ///
+ /// Pulls one audio frame and returns its peak amplitude in [0,1], or null
+ /// if the timeout elapsed without an audio frame arriving. Uses the same
+ /// underlying NDIlib_recv_capture_v3 the video path does, but binds the
+ /// audio output slot only — the receiver's internal queue serves video
+ /// and audio independently, so this can be polled from a separate thread
+ /// without contending with the video capture loop.
+ ///
+ public double? CaptureAudioPeak(NdiReceiverHandle receiver, int timeoutMs)
+ {
+ var pInvokeReceiver = (NdiPInvokeReceiverHandle)receiver;
+ var frameType = NdiNative.RecvCaptureV3Audio(
+ pInvokeReceiver.Native,
+ IntPtr.Zero,
+ out var nativeAudio,
+ IntPtr.Zero,
+ (uint)Math.Max(0, timeoutMs));
+
+ if (frameType != NdiNative.FrameType.Audio || nativeAudio.p_data == IntPtr.Zero)
+ {
+ // Free defensively on the off-chance an audio struct was partially
+ // populated despite the wrong frame-type return — the SDK's free
+ // is a no-op on a zero pointer.
+ if (nativeAudio.p_data != IntPtr.Zero)
+ NdiNative.RecvFreeAudioV3(pInvokeReceiver.Native, ref nativeAudio);
+ return null;
+ }
+
+ try
+ {
+ // Total bytes for the entire frame's audio buffer. For FLTP that's
+ // no_channels * channel_stride_in_bytes. The struct's union slot
+ // exposed as channel_stride_in_bytes is the per-channel stride
+ // when FourCC=FLTp; total samples across all channels is
+ // no_channels * no_samples and we walk every sample for the peak.
+ var totalBytes = nativeAudio.no_channels * nativeAudio.channel_stride_in_bytes;
+ if (totalBytes <= 0 || nativeAudio.no_samples <= 0)
+ return 0.0;
+
+ var managed = new byte[totalBytes];
+ Marshal.Copy(nativeAudio.p_data, managed, 0, totalBytes);
+
+ var totalSamples = nativeAudio.no_channels * nativeAudio.no_samples;
+ return TeamsISO.Engine.Pipeline.AudioPeakComputer.ComputePeak(
+ managed, nativeAudio.FourCC, totalSamples);
+ }
+ finally
+ {
+ NdiNative.RecvFreeAudioV3(pInvokeReceiver.Native, ref nativeAudio);
+ }
+ }
+
public RawFrame? CaptureFrame(NdiReceiverHandle receiver, int timeoutMs)
{
var pInvokeReceiver = (NdiPInvokeReceiverHandle)receiver;
diff --git a/src/TeamsISO.Engine.NdiInterop/NdiNative.cs b/src/TeamsISO.Engine.NdiInterop/NdiNative.cs
index 02e3c46..609d4a0 100644
--- a/src/TeamsISO.Engine.NdiInterop/NdiNative.cs
+++ b/src/TeamsISO.Engine.NdiInterop/NdiNative.cs
@@ -67,6 +67,23 @@ internal static class NdiNative
[DllImport(LibName, EntryPoint = "NDIlib_recv_free_video_v2", CallingConvention = CallingConvention.Cdecl)]
public static extern void RecvFreeVideoV2(IntPtr p_instance, ref VideoFrameV2 p_video_data);
+ ///
+ /// Audio-only entrypoint into the same NDIlib_recv_capture_v3 the video
+ /// path uses. We bind it as a separate import so callers that only need
+ /// audio can pass IntPtr.Zero for the video / metadata slots without
+ /// having to allocate a video frame struct they don't intend to fill.
+ ///
+ [DllImport(LibName, EntryPoint = "NDIlib_recv_capture_v3", CallingConvention = CallingConvention.Cdecl)]
+ public static extern FrameType RecvCaptureV3Audio(
+ IntPtr p_instance,
+ IntPtr p_video_data,
+ out AudioFrameV3 p_audio_data,
+ IntPtr p_metadata,
+ uint timeout_in_ms);
+
+ [DllImport(LibName, EntryPoint = "NDIlib_recv_free_audio_v3", CallingConvention = CallingConvention.Cdecl)]
+ public static extern void RecvFreeAudioV3(IntPtr p_instance, ref AudioFrameV3 p_audio_data);
+
// ---- Send ----
[DllImport(LibName, EntryPoint = "NDIlib_send_create", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr SendCreate(ref SendCreateSettings p_create_settings);
@@ -182,4 +199,26 @@ internal static class NdiNative
public IntPtr p_metadata;
public long timestamp;
}
+
+ ///
+ /// Mirrors NDIlib_audio_frame_v3_t. The FourCC field
+ /// distinguishes the sample format; for NDI 6 the only common value is
+ /// FLTP (32-bit float, planar, one channel-plane after another).
+ /// channel_stride_in_bytes is the byte distance between the start
+ /// of channel N and channel N+1 — for FLTP that's no_samples * 4.
+ /// Total buffer size = no_channels * channel_stride_in_bytes.
+ ///
+ [StructLayout(LayoutKind.Sequential)]
+ public struct AudioFrameV3
+ {
+ public int sample_rate;
+ public int no_channels;
+ public int no_samples;
+ public long timecode;
+ public uint FourCC;
+ public IntPtr p_data;
+ public int channel_stride_in_bytes; // union with data_size_in_bytes
+ public IntPtr p_metadata;
+ public long timestamp;
+ }
}
diff --git a/src/TeamsISO.Engine/Interop/INdiInterop.cs b/src/TeamsISO.Engine/Interop/INdiInterop.cs
index a0da0e6..045052a 100644
--- a/src/TeamsISO.Engine/Interop/INdiInterop.cs
+++ b/src/TeamsISO.Engine/Interop/INdiInterop.cs
@@ -31,6 +31,19 @@ public interface INdiInterop
///
RawFrame? CaptureFrame(NdiReceiverHandle receiver, int timeoutMs);
+ ///
+ /// Pulls the next audio frame from the receiver and returns its peak
+ /// amplitude in [0.0, 1.0], or null if no audio frame was available
+ /// within the timeout. Implementations MUST return immediately when no
+ /// audio is queued (a polling caller drives a UI VU bar; we don't want it
+ /// to block on a video-only sender).
+ ///
+ /// Default implementation returns null — the
+ /// in tests doesn't simulate audio; the production
+ /// NdiInteropPInvoke overrides this with the real read.
+ ///
+ double? CaptureAudioPeak(NdiReceiverHandle receiver, int timeoutMs) => null;
+
// ----- Send -----
///
diff --git a/src/TeamsISO.Engine/Pipeline/AudioPeakComputer.cs b/src/TeamsISO.Engine/Pipeline/AudioPeakComputer.cs
new file mode 100644
index 0000000..b603b87
--- /dev/null
+++ b/src/TeamsISO.Engine/Pipeline/AudioPeakComputer.cs
@@ -0,0 +1,96 @@
+using System.Runtime.InteropServices;
+
+namespace TeamsISO.Engine.Pipeline;
+
+///
+/// Computes a single peak amplitude (in [0.0, 1.0]) from one NDI audio frame.
+///
+/// NDI 6's preferred audio format is NDIlib_FourCC_audio_type_FLTP —
+/// 32-bit IEEE float, planar (one contiguous chunk per channel). Values are
+/// nominally normalized to [-1, 1]; brief excursions past 1 during transient
+/// clipping are clamped here. We compute a max-absolute peak across every
+/// sample of every channel rather than RMS so the UI VU bar reads
+/// "loudest part of the buffer" — the same convention OBS / Resolve / Studio
+/// Monitor use for their meters.
+///
+/// Pulled out of so the math is unit-testable
+/// without an NDI runtime; the heavy work (FLTP decode) runs entirely on
+/// managed memory the caller has already copied across the P/Invoke
+/// boundary, so tests exercise the same code path that production does.
+///
+public static class AudioPeakComputer
+{
+ /// FourCC for FLTP — 32-bit float, planar layout. 'F','L','T','p'.
+ public const uint FourCC_FLTP = 0x70544c46;
+
+ /// FourCC for FLT — 32-bit float, interleaved. 'F','L','T',' '. Rarely seen but cheap to handle.
+ public const uint FourCC_FLT = 0x20544c46;
+
+ /// FourCC for PCM 16-bit signed integer, interleaved. Some legacy senders use this. 'P','C','M','s'.
+ public const uint FourCC_PCMs16 = 0x73334d50;
+
+ ///
+ /// Returns the largest absolute sample value found in the buffer,
+ /// normalized to [0.0, 1.0] and clamped to 1.0 for any input that exceeds it.
+ /// Returns 0.0 for an empty / zero-length buffer.
+ ///
+ /// The raw audio sample bytes for the entire frame.
+ /// The NDI audio FourCC (see the constants on this class).
+ ///
+ /// Total sample count across all channels (e.g. no_samples * no_channels
+ /// for FLTP — channels are concatenated planes, but every sample contributes).
+ ///
+ public static double ComputePeak(ReadOnlySpan data, uint fourCC, int totalSamples)
+ {
+ if (data.IsEmpty || totalSamples <= 0) return 0.0;
+
+ return fourCC switch
+ {
+ FourCC_FLTP or FourCC_FLT => ComputePeakFloat32(data, totalSamples),
+ FourCC_PCMs16 => ComputePeakInt16(data, totalSamples),
+ _ => 0.0, // unknown format — surface silence rather than throw
+ };
+ }
+
+ private static double ComputePeakFloat32(ReadOnlySpan data, int totalSamples)
+ {
+ // 4 bytes per sample. Cap by what's actually in the buffer in case
+ // the caller's totalSamples disagrees with the byte length (defensive
+ // — a misreporting source shouldn't take down the receiver loop).
+ var available = Math.Min(totalSamples, data.Length / 4);
+ if (available <= 0) return 0.0;
+
+ var floats = MemoryMarshal.Cast(data.Slice(0, available * 4));
+ float peak = 0;
+ // Tight scalar loop. .NET's auto-vectorizer turns this into SIMD on
+ // modern x64 builds; benchmarking showed a manual Vector loop
+ // wasn't faster for the typical 1024-sample @ 48kHz buffer.
+ for (var i = 0; i < floats.Length; i++)
+ {
+ var v = floats[i];
+ var abs = v < 0 ? -v : v;
+ if (abs > peak) peak = abs;
+ }
+ return peak > 1.0f ? 1.0 : peak;
+ }
+
+ private static double ComputePeakInt16(ReadOnlySpan data, int totalSamples)
+ {
+ var available = Math.Min(totalSamples, data.Length / 2);
+ if (available <= 0) return 0.0;
+
+ var samples = MemoryMarshal.Cast(data.Slice(0, available * 2));
+ int peak = 0;
+ for (var i = 0; i < samples.Length; i++)
+ {
+ // short.MinValue (-32768) negated overflows back to itself in two's
+ // complement. Clamp the result up to short.MaxValue (32767) so we
+ // can divide by a stable max in the normalize step below.
+ var s = samples[i];
+ var abs = s == short.MinValue ? short.MaxValue : (s < 0 ? -s : s);
+ if (abs > peak) peak = abs;
+ }
+ var normalized = peak / (double)short.MaxValue;
+ return normalized > 1.0 ? 1.0 : normalized;
+ }
+}
diff --git a/src/TeamsISO.Engine/Pipeline/IsoPipeline.cs b/src/TeamsISO.Engine/Pipeline/IsoPipeline.cs
index b57ce6e..ac67a35 100644
--- a/src/TeamsISO.Engine/Pipeline/IsoPipeline.cs
+++ b/src/TeamsISO.Engine/Pipeline/IsoPipeline.cs
@@ -106,6 +106,9 @@ public sealed class IsoPipeline : IAsyncDisposable
IncomingHeight: h)
{
State = State,
+ // Peak is published by NdiReceiver's audio loop; 0.0 means
+ // silence, no audio yet, or the sender is video-only.
+ PeakAudioLevel = receiver.LatestAudioPeak,
};
}
diff --git a/src/TeamsISO.Engine/Pipeline/NdiReceiver.cs b/src/TeamsISO.Engine/Pipeline/NdiReceiver.cs
index 0dea0f2..6f8404c 100644
--- a/src/TeamsISO.Engine/Pipeline/NdiReceiver.cs
+++ b/src/TeamsISO.Engine/Pipeline/NdiReceiver.cs
@@ -18,6 +18,19 @@ public sealed class NdiReceiver : IDisposable
private readonly NdiReceiverHandle _handle;
private long _framesCaptured;
+ // Most recent audio peak, in [0, 1]. Updated by the audio capture loop;
+ // read by IsoPipeline.GetStats on the UI poll thread. We use a long
+ // holding the IEEE 754 double bits + Volatile read/write so reads are
+ // atomic across threads (a double on x86 can tear; long is always atomic
+ // when aligned, which the runtime guarantees for fields).
+ //
+ // Decay rationale: an audio frame arrives every ~20ms (~50Hz at 48kHz
+ // with 1024-sample blocks). The UI polls at 1Hz; without decay the bar
+ // would freeze at the loudest sample seen in the most recent buffer.
+ // We let the receiver keep the live max and let the UI apply visual
+ // decay on its end so the engine stays simple — see ParticipantViewModel.
+ private long _lastAudioPeakBits;
+
public NdiReceiver(
INdiInterop interop,
string sourceName,
@@ -33,6 +46,20 @@ public sealed class NdiReceiver : IDisposable
public long FramesCaptured => Interlocked.Read(ref _framesCaptured);
+ ///
+ /// Most recent audio peak amplitude, in [0.0, 1.0]. Returns 0 when no
+ /// audio frame has been processed yet (silent source, video-only sender,
+ /// or audio loop hasn't started). Safe to call from any thread.
+ ///
+ public double LatestAudioPeak
+ {
+ get
+ {
+ var bits = Volatile.Read(ref _lastAudioPeakBits);
+ return BitConverter.Int64BitsToDouble(bits);
+ }
+ }
+
///
/// Captures one frame (or returns on timeout). Test seam.
///
@@ -45,10 +72,24 @@ public sealed class NdiReceiver : IDisposable
}
///
- /// Long-running capture loop. Run on a dedicated thread (TaskCreationOptions.LongRunning).
+ /// Captures one audio frame (or returns on timeout) and updates
+ /// . Test seam mirroring .
///
- public Task RunAsync(CancellationToken cancellationToken) =>
- Task.Factory.StartNew(() =>
+ public void CaptureAudioOnce(int timeoutMs)
+ {
+ var peak = _interop.CaptureAudioPeak(_handle, timeoutMs);
+ if (peak is null) return;
+ Volatile.Write(ref _lastAudioPeakBits, BitConverter.DoubleToInt64Bits(peak.Value));
+ }
+
+ ///
+ /// Long-running capture loop. Run on a dedicated thread (TaskCreationOptions.LongRunning).
+ /// Spins up a sibling audio loop on the same lifetime so the UI VU bar
+ /// can read peaks without polluting the video read path.
+ ///
+ public Task RunAsync(CancellationToken cancellationToken)
+ {
+ var videoTask = Task.Factory.StartNew(() =>
{
try
{
@@ -60,10 +101,38 @@ public sealed class NdiReceiver : IDisposable
catch (OperationCanceledException) { }
catch (Exception ex)
{
- _logger.LogError(ex, "NdiReceiver loop crashed for source {Source}.", _sourceName);
+ _logger.LogError(ex, "NdiReceiver video loop crashed for source {Source}.", _sourceName);
throw;
}
}, cancellationToken, TaskCreationOptions.LongRunning, TaskScheduler.Default);
+ var audioTask = Task.Factory.StartNew(() =>
+ {
+ try
+ {
+ while (!cancellationToken.IsCancellationRequested)
+ {
+ // Audio frames arrive at the source's frame rate (typically
+ // 48kHz delivered in 1024-sample chunks ~= 50Hz). A 50ms
+ // poll matches that cadence — we won't miss frames and we
+ // won't busy-spin if the source is video-only.
+ //
+ // Audio loop failures are logged but never re-thrown — a
+ // misbehaving audio path must NEVER tear down the live
+ // video pipeline. The UI VU bar will simply freeze at its
+ // last value.
+ try { CaptureAudioOnce(timeoutMs: 50); }
+ catch (Exception ex)
+ {
+ _logger.LogWarning(ex, "NdiReceiver audio loop hiccup for source {Source}.", _sourceName);
+ }
+ }
+ }
+ catch (OperationCanceledException) { }
+ }, cancellationToken, TaskCreationOptions.LongRunning, TaskScheduler.Default);
+
+ return Task.WhenAll(videoTask, audioTask);
+ }
+
public void Dispose() => _handle.Dispose();
}
diff --git a/src/tests/TeamsISO.Engine.Tests/Pipeline/AudioPeakComputerTests.cs b/src/tests/TeamsISO.Engine.Tests/Pipeline/AudioPeakComputerTests.cs
new file mode 100644
index 0000000..e7e4cfb
--- /dev/null
+++ b/src/tests/TeamsISO.Engine.Tests/Pipeline/AudioPeakComputerTests.cs
@@ -0,0 +1,140 @@
+using System.Runtime.InteropServices;
+using TeamsISO.Engine.Pipeline;
+using Xunit;
+
+namespace TeamsISO.Engine.Tests.Pipeline;
+
+public class AudioPeakComputerTests
+{
+ [Fact]
+ public void EmptyBuffer_ReturnsZero()
+ {
+ Assert.Equal(0.0, AudioPeakComputer.ComputePeak(ReadOnlySpan.Empty, AudioPeakComputer.FourCC_FLTP, 0));
+ }
+
+ [Fact]
+ public void ZeroSamples_ReturnsZero()
+ {
+ var data = new byte[16];
+ Assert.Equal(0.0, AudioPeakComputer.ComputePeak(data, AudioPeakComputer.FourCC_FLTP, 0));
+ }
+
+ [Fact]
+ public void UnknownFourCC_ReturnsZero_RatherThanThrow()
+ {
+ // Receiver loop must never crash on an unrecognized format — better to
+ // show silence on the meter than to take down the pipeline.
+ var floats = new[] { 0.5f, -0.5f };
+ var bytes = AsBytes(floats);
+ Assert.Equal(0.0, AudioPeakComputer.ComputePeak(bytes, fourCC: 0xDEADBEEF, totalSamples: floats.Length));
+ }
+
+ [Fact]
+ public void FltpAllZeros_PeakIsZero()
+ {
+ var floats = new float[256];
+ var bytes = AsBytes(floats);
+ Assert.Equal(0.0, AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_FLTP, floats.Length));
+ }
+
+ [Fact]
+ public void Fltp_PicksLargestAbsoluteSample()
+ {
+ // Mix of small + one large negative spike. Peak = abs(largest) = 0.85.
+ var floats = new[] { 0.1f, -0.2f, 0.3f, -0.85f, 0.4f, -0.05f };
+ var bytes = AsBytes(floats);
+ var peak = AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_FLTP, floats.Length);
+ Assert.Equal(0.85, peak, precision: 5);
+ }
+
+ [Fact]
+ public void Fltp_FullScalePositive_ReturnsOne()
+ {
+ var floats = new[] { 0.0f, 1.0f, 0.5f };
+ var bytes = AsBytes(floats);
+ Assert.Equal(1.0, AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_FLTP, floats.Length), precision: 5);
+ }
+
+ [Fact]
+ public void Fltp_FullScaleNegative_ReturnsOne()
+ {
+ var floats = new[] { 0.0f, -1.0f, 0.5f };
+ var bytes = AsBytes(floats);
+ Assert.Equal(1.0, AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_FLTP, floats.Length), precision: 5);
+ }
+
+ [Fact]
+ public void Fltp_OverflowsClampToOne()
+ {
+ // Real audio sometimes briefly exceeds [-1,1] during a clip. Engine
+ // clamps so the meter never reads >1.0 (UI assumes [0,1] for the bar width).
+ var floats = new[] { 0.0f, 1.5f, -2.3f };
+ var bytes = AsBytes(floats);
+ Assert.Equal(1.0, AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_FLTP, floats.Length), precision: 5);
+ }
+
+ [Fact]
+ public void Fltp_TotalSamplesSmallerThanBuffer_OnlyConsumesReportedRange()
+ {
+ // The reported range covers only the first 3 floats. The 4th
+ // (largest) is past `totalSamples` and must be ignored — otherwise we'd
+ // be reading beyond what the source said it wrote.
+ var floats = new[] { 0.1f, -0.2f, 0.3f, 0.99f };
+ var bytes = AsBytes(floats);
+ var peak = AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_FLTP, totalSamples: 3);
+ Assert.Equal(0.3, peak, precision: 5);
+ }
+
+ [Fact]
+ public void Fltp_TotalSamplesLargerThanBuffer_CapsAtBuffer()
+ {
+ // Defensive: a misreporting source claims more samples than the buffer
+ // actually holds. We must not read past the end of `data`.
+ var floats = new[] { 0.5f, -0.7f };
+ var bytes = AsBytes(floats);
+ var peak = AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_FLTP, totalSamples: 999);
+ Assert.Equal(0.7, peak, precision: 5);
+ }
+
+ [Fact]
+ public void Pcms16_ZeroBuffer_ReturnsZero()
+ {
+ var samples = new short[64];
+ var bytes = AsBytes(samples);
+ Assert.Equal(0.0, AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_PCMs16, samples.Length));
+ }
+
+ [Fact]
+ public void Pcms16_FullScalePositive_ReturnsOne()
+ {
+ var samples = new[] { (short)0, short.MaxValue, (short)100 };
+ var bytes = AsBytes(samples);
+ Assert.Equal(1.0, AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_PCMs16, samples.Length), precision: 5);
+ }
+
+ [Fact]
+ public void Pcms16_MinValue_ClampsToOne()
+ {
+ // short.MinValue == -32768; abs() overflows back to short.MinValue in
+ // two's complement, so we need explicit handling. Verify it pins to 1.0.
+ var samples = new[] { (short)0, short.MinValue, (short)100 };
+ var bytes = AsBytes(samples);
+ Assert.Equal(1.0, AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_PCMs16, samples.Length), precision: 5);
+ }
+
+ [Fact]
+ public void Pcms16_HalfScale_ReturnsHalf()
+ {
+ var samples = new[] { (short)0, (short)16384, (short)-16383 };
+ var bytes = AsBytes(samples);
+ var peak = AudioPeakComputer.ComputePeak(bytes, AudioPeakComputer.FourCC_PCMs16, samples.Length);
+ // 16384 / 32767 ≈ 0.500015; tolerate small precision drift.
+ Assert.InRange(peak, 0.49, 0.51);
+ }
+
+ private static byte[] AsBytes(T[] arr) where T : struct
+ {
+ var span = MemoryMarshal.Cast(arr.AsSpan());
+ return span.ToArray();
+ }
+}