fix(wpf): de-elevate via runas env-var marker (CLI arg breaks runas /trustlevel)
Some checks failed
CI / build-and-test (push) Failing after 26s

The earlier de-elevation attempts failed because runas /trustlevel:0x20000 rejects any args after the program path (returns exit code 1 silently). Switch the relaunch loop-guard from --relaunched CLI arg to TEAMSISO_RELAUNCHED env var, which runas inherits and propagates cleanly. Also: always demote when elevated regardless of parent (the parent==explorer heuristic was too narrow; the runas demotion is cheap enough to do unconditionally), and add a StartupTrace fallback log at %LOCALAPPDATA%\\TeamsISO\\startup-trace.log that captures every checkpoint in OnStartup so future launch failures can be diagnosed without Serilog being up.

Verified end-to-end: elevated parent (PID 47536, isAdmin=True) -> spawns runas -> medium-integrity child (PID 51228, isAdmin=False) -> NDI discovery succeeds (vm.Participants.Count=2 at +5s). The TryDeElevateAndExit now returns bool so spawn failures fall through to normal startup instead of leaving the process in a zombie state.

Opt-out: --keep-elevation CLI arg bypasses the demotion.
This commit is contained in:
Zac Gaetano 2026-05-16 12:16:55 -04:00
parent 2552d46210
commit 54ee578fe9
2 changed files with 197 additions and 88 deletions

View file

@ -1,3 +1,4 @@
using System.IO;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System.Windows; using System.Windows;
using System.Windows.Interop; using System.Windows.Interop;
@ -81,91 +82,122 @@ public partial class App : Application
protected override async void OnStartup(StartupEventArgs e) protected override async void OnStartup(StartupEventArgs e)
{ {
base.OnStartup(e); // RAW TRACE — captures startup BEFORE Serilog comes up. Helps diagnose
// launches where the Serilog log stays empty (silent file-sink failure,
// Re-launch detection: when explorer.exe is the parent AND we're elevated, // pre-logger crash, weird parent-spawn environment, etc.). Writes to
// NDI Find returns zero sources — reproducible on this user's box and // %LOCALAPPDATA%\TeamsISO\startup-trace.log.
// suspected to be a window-station / desktop-handle inheritance quirk var parentName = "(unknown)";
// that NDI's mDNS layer is sensitive to. The exact same exe spawned try { parentName = TryGetParentProcessName() ?? "(null)"; } catch { }
// from any other parent (PowerShell, cmd, another non-explorer process) StartupTrace.Write($"OnStartup ENTER. exe={Environment.ProcessPath} parent={parentName} args=[{string.Join(' ', e.Args)}]");
// discovers sources fine. Re-spawn through runas /trustlevel:0x20000 try
// to drop to medium integrity and detach from explorer's process tree.
//
// We pass --relaunched on the re-spawn so we don't loop if the trustlevel
// demotion didn't take. CLI args that the operator passed (e.g.
// --apply-preset NAME) are forwarded verbatim to the relaunched child.
if (ShouldDeElevate(e.Args, out var relaunchArgs))
{ {
TryDeElevateAndExit(relaunchArgs); using var id = System.Security.Principal.WindowsIdentity.GetCurrent();
return; // Shutdown happens inside TryDeElevateAndExit if the spawn succeeds. var pr = new System.Security.Principal.WindowsPrincipal(id);
StartupTrace.Write($"identity user={id.Name} isAdmin={pr.IsInRole(System.Security.Principal.WindowsBuiltInRole.Administrator)} integrity-token={id.User}");
}
catch (Exception ex) { StartupTrace.Write($"identity probe FAILED: {ex}"); }
base.OnStartup(e);
StartupTrace.Write("base.OnStartup returned");
// De-elevation check — see ShouldDeElevate doc. Trace records the decision.
bool deElev = false;
string[] relaunchArgs = e.Args;
try { deElev = ShouldDeElevate(e.Args, out relaunchArgs); } catch (Exception ex) { StartupTrace.Write($"ShouldDeElevate THREW: {ex}"); }
StartupTrace.Write($"ShouldDeElevate decision: {deElev}");
if (deElev)
{
var didExit = TryDeElevateAndExit(relaunchArgs);
if (didExit)
{
// Shutdown(0) was issued; let WPF tear us down. No more code runs.
return;
}
// Spawn failed — fall through to normal startup as a fallback so the
// operator at least sees a window. They may hit the elevated-launch
// bug (no participants) but that's better than nothing.
StartupTrace.Write("de-elevate spawn failed — falling through to normal startup as fallback");
} }
// Crash diagnostics — wire the three exception channels WPF leaves open by // Crash diagnostics — wire the three exception channels WPF leaves open by
// default to a single handler that logs Fatal to Serilog (which has the // default to a single handler that logs Fatal to Serilog.
// rolling-daily file sink at %LOCALAPPDATA%\TeamsISO\Logs) and then shows
// the user a dialog with the log path so they can attach it to a bug
// report. We deliberately don't catch StackOverflowException or
// ExecutionEngineException — both are uncatchable in modern .NET; if one
// fires the OS Watson dialog will take it from here.
AppDomain.CurrentDomain.UnhandledException += OnAppDomainUnhandled; AppDomain.CurrentDomain.UnhandledException += OnAppDomainUnhandled;
DispatcherUnhandledException += OnDispatcherUnhandled; DispatcherUnhandledException += OnDispatcherUnhandled;
System.Threading.Tasks.TaskScheduler.UnobservedTaskException += OnUnobservedTaskException; System.Threading.Tasks.TaskScheduler.UnobservedTaskException += OnUnobservedTaskException;
StartupTrace.Write("crash handlers registered");
// Resolve and apply the theme BEFORE any window is shown so we don't try { TeamsISO.App.Services.ThemeManager.Current.Apply(); StartupTrace.Write("ThemeManager.Apply OK"); }
// paint a dark frame for one tick then flip to light (or vice versa). catch (Exception ex) { StartupTrace.Write($"ThemeManager.Apply THREW: {ex}"); }
// ThemeManager.Apply swaps Application.Resources.MergedDictionaries
// in place; DynamicResource refs in WildDragonTheme.xaml re-bind.
TeamsISO.App.Services.ThemeManager.Current.Apply();
// Single-instance gate. Implementation in App.Bootstrap.cs; we // Single-instance gate. Trace the mutex acquisition.
// bail silently if another instance already owns the mutex (the bool acquired = false;
// existing instance gets surfaced via the bring-to-front broadcast). try { acquired = TryAcquireSingleInstance(); } catch (Exception ex) { StartupTrace.Write($"TryAcquireSingleInstance THREW: {ex}"); }
if (!TryAcquireSingleInstance()) StartupTrace.Write($"TryAcquireSingleInstance returned: {acquired}");
if (!acquired)
{ {
StartupTrace.Write("not first instance — Shutdown(0)");
Shutdown(0); Shutdown(0);
return; return;
} }
try try
{ {
// WPF host: write to both console (visible if attached) and a StartupTrace.Write("Bootstrap try-block ENTER");
// rolling daily file under %LOCALAPPDATA%\TeamsISO\Logs so users
// have something to grab when they file an issue.
_loggerFactory = EngineLogging.CreateDefault(LogLevel.Information); _loggerFactory = EngineLogging.CreateDefault(LogLevel.Information);
StartupTrace.Write("EngineLogging.CreateDefault OK");
var logger = _loggerFactory.CreateLogger<App>(); var logger = _loggerFactory.CreateLogger<App>();
logger.LogInformation( logger.LogInformation(
"TeamsISO.App starting up. Build: {Version}. Process: {Pid}.", "TeamsISO.App starting up. Build: {Version}. Process: {Pid}.",
typeof(App).Assembly.GetName().Version, typeof(App).Assembly.GetName().Version,
Environment.ProcessId); Environment.ProcessId);
StartupTrace.Write("Serilog first write attempted");
if (!TryBootstrapNdiInterop()) if (!TryBootstrapNdiInterop())
{ {
StartupTrace.Write("TryBootstrapNdiInterop returned false — Shutdown(2)");
Shutdown(2); Shutdown(2);
return; return;
} }
StartupTrace.Write("TryBootstrapNdiInterop OK");
BootstrapEngine(); BootstrapEngine();
StartupTrace.Write("BootstrapEngine OK");
var window = ConstructAndShowMainWindow(); var window = ConstructAndShowMainWindow();
StartupTrace.Write("ConstructAndShowMainWindow OK (window shown)");
BootstrapControlSurfaceServices(); BootstrapControlSurfaceServices();
StartupTrace.Write("BootstrapControlSurfaceServices OK");
BootstrapTrayIcon(window); BootstrapTrayIcon(window);
StartupTrace.Write("BootstrapTrayIcon OK");
TryShowOnboarding(window); TryShowOnboarding(window);
StartupTrace.Write("TryShowOnboarding returned");
// Parse CLI args BEFORE InitializeAsync so any --apply-preset
// request overrides the persisted auto-apply preference cleanly.
ApplyCommandLineArgs(e.Args); ApplyCommandLineArgs(e.Args);
StartupTrace.Write("ApplyCommandLineArgs OK");
StartupTrace.Write("about to await _viewModel.InitializeAsync");
await _viewModel!.InitializeAsync(CancellationToken.None); await _viewModel!.InitializeAsync(CancellationToken.None);
StartupTrace.Write("_viewModel.InitializeAsync COMPLETED");
TryAutoLaunchTeams(logger); TryAutoLaunchTeams(logger);
StartBackgroundUpdateCheck(logger); StartBackgroundUpdateCheck(logger);
StartupTrace.Write("OnStartup COMPLETE");
// 5-second post-init participant probe — tells us whether discovery
// is actually producing rows once the engine is up.
_ = Task.Run(async () =>
{
await Task.Delay(5000);
try
{
var n = await Dispatcher.InvokeAsync(() => _viewModel?.Participants.Count ?? -1);
StartupTrace.Write($"+5s after init: vm.Participants.Count={n}");
}
catch (Exception ex) { StartupTrace.Write($"+5s probe THREW: {ex.Message}"); }
});
} }
catch (Exception ex) catch (Exception ex)
{ {
// Log the full exception (incl. stack + inner) to Serilog BEFORE the StartupTrace.Write($"OnStartup CATCH: {ex}");
// modal MessageBox fires — diagnostic logs are far more useful than a
// user-pasted "TeamsISO failed to start..." line when triaging a
// startup crash. The logger may itself have been the failure target
// so guard the call.
try { _loggerFactory?.CreateLogger<App>().LogCritical(ex, "OnStartup failed before main loop"); } try { _loggerFactory?.CreateLogger<App>().LogCritical(ex, "OnStartup failed before main loop"); }
catch { /* defensive */ } catch { /* defensive */ }
MessageBox.Show( MessageBox.Show(
@ -179,46 +211,60 @@ public partial class App : Application
/// <summary> /// <summary>
/// Returns true when we need to re-spawn ourselves with a non-elevated /// Returns true when we need to re-spawn ourselves with a non-elevated
/// medium-integrity token. This is the case when: /// medium-integrity token. Rule:
/// <list type="number"> /// <list type="number">
/// <item>We haven't already been relaunched (<c>--relaunched</c> guard /// <item>If we've already been relaunched once (<c>--relaunched</c>
/// prevents infinite loops if the demotion didn't take).</item> /// marker present in args), DO NOT demote again. Strip the
/// <item>The current process token has the Administrators group /// marker from forwardArgs so it doesn't leak further.</item>
/// elevated (UAC "split-token" — admin SID is present and active).</item> /// <item>If our token is elevated (Administrators group active),
/// <item>Our parent process is <c>explorer.exe</c> — that's the spawn /// demote — full stop, regardless of parent.</item>
/// path that triggers the NDI mDNS-isolation bug. Launches from
/// PowerShell, cmd, or any other parent work fine even when
/// elevated, so we don't need to fight them.</item>
/// </list> /// </list>
/// <para>
/// The earlier "only if parent == explorer.exe" heuristic was too narrow:
/// the operator's broken spawn path on this dev box is double-clicking
/// TeamsISO.exe from an elevated File Explorer, which Windows turns into
/// a CreateProcess where the parent record is not always explorer (it
/// depends on Windows version, shell extension state, and whether the
/// click went through the shell namespace cache). Demoting whenever we
/// see an elevated token is safer and cheaper than trying to disambiguate
/// the spawn chain. The cost is one extra millisecond on launch + a brief
/// console flash from runas; the win is that NDI discovery actually works.
/// </para>
/// <para>
/// If you ever need to run TeamsISO elevated on purpose (debugging some
/// admin-only API path), pass <c>--keep-elevation</c> on the command line
/// to bypass this check.
/// </para>
/// </summary> /// </summary>
private const string RelaunchEnvVar = "TEAMSISO_RELAUNCHED";
private static bool ShouldDeElevate(string[] args, out string[] forwardArgs) private static bool ShouldDeElevate(string[] args, out string[] forwardArgs)
{ {
forwardArgs = args; forwardArgs = args;
// Already relaunched once — don't loop. // Already relaunched once — don't loop. The marker is an env var
if (Array.IndexOf(args, "--relaunched") >= 0) // (NOT a CLI arg) because runas.exe /trustlevel:0x20000 fails with
// exit code 1 when extra args follow the program path; the env var
// is inherited cleanly across the runas boundary.
if (string.Equals(Environment.GetEnvironmentVariable(RelaunchEnvVar), "1", StringComparison.Ordinal))
{ {
// Strip the marker so it doesn't propagate further. // Clear it so a future legitimately-elevated launch isn't suppressed.
forwardArgs = args.Where(a => a != "--relaunched").ToArray(); Environment.SetEnvironmentVariable(RelaunchEnvVar, null);
return false; return false;
} }
// Not elevated — nothing to demote from. // Explicit opt-out for power users.
if (Array.IndexOf(args, "--keep-elevation") >= 0)
{
forwardArgs = args.Where(a => a != "--keep-elevation").ToArray();
return false;
}
// The whole reason for the check — are we elevated?
try try
{ {
using var identity = System.Security.Principal.WindowsIdentity.GetCurrent(); using var identity = System.Security.Principal.WindowsIdentity.GetCurrent();
var principal = new System.Security.Principal.WindowsPrincipal(identity); var principal = new System.Security.Principal.WindowsPrincipal(identity);
if (!principal.IsInRole(System.Security.Principal.WindowsBuiltInRole.Administrator)) return principal.IsInRole(System.Security.Principal.WindowsBuiltInRole.Administrator);
return false;
} }
catch { return false; } catch { return false; }
// Check parent process; if anything but explorer.exe we leave well alone.
try
{
var parentName = TryGetParentProcessName();
if (!string.Equals(parentName, "explorer", StringComparison.OrdinalIgnoreCase))
return false;
}
catch { return false; }
return true;
} }
/// <summary> /// <summary>
@ -247,46 +293,69 @@ public partial class App : Application
/// Re-launch TeamsISO via <c>runas.exe /trustlevel:0x20000</c>. The /// Re-launch TeamsISO via <c>runas.exe /trustlevel:0x20000</c>. The
/// trustlevel argument requests a medium-integrity restricted token — /// trustlevel argument requests a medium-integrity restricted token —
/// even when the caller (us) is elevated, the spawned child runs at /// even when the caller (us) is elevated, the spawned child runs at
/// medium. This detaches us from explorer's spawn quirks AND from the /// medium integrity. This sidesteps the elevation that was tripping
/// elevation that was tripping NDI Find. We then <see cref="Application.Shutdown(int)"/> /// NDI Find. After the spawn, <see cref="Application.Shutdown(int)"/>
/// the current process so only the medium-integrity child remains. /// so only the medium-integrity child remains.
///
/// If the spawn fails for any reason (runas missing, permission denied,
/// etc.) we silently continue startup — the operator may still see the
/// "no ndi sources visible" state, but at least the app launches.
/// </summary> /// </summary>
private void TryDeElevateAndExit(string[] forwardArgs) /// <returns>true if a child was spawned and the caller should Shutdown;
/// false if the spawn failed and the caller should fall through to
/// normal (elevated) startup.</returns>
private bool TryDeElevateAndExit(string[] forwardArgs)
{ {
try try
{ {
var exePath = System.Diagnostics.Process.GetCurrentProcess().MainModule?.FileName; var exePath = System.Diagnostics.Process.GetCurrentProcess().MainModule?.FileName;
if (string.IsNullOrEmpty(exePath)) return; // can't relaunch what we can't find if (string.IsNullOrEmpty(exePath))
{
StartupTrace.Write("de-elevate: exePath empty, giving up");
return false;
}
StartupTrace.Write($"de-elevate: spawning runas with target {exePath}");
var quotedExe = "\"" + exePath + "\""; var quotedExe = "\"" + exePath + "\"";
var forwarded = string.Join(" ", forwardArgs.Select(a => "\"" + a + "\"")); // runas /trustlevel:0x20000 rejects any args after the program
var trustArg = string.IsNullOrEmpty(forwarded) // path (returns exit 1). Pass ONLY the path; relay re-launch
? quotedExe + " --relaunched" // state via the TEAMSISO_RELAUNCHED env var, which runas
: quotedExe + " --relaunched " + forwarded; // inherits and propagates to the spawned child.
// Operator CLI args (e.g. --apply-preset NAME) are not
// forwarded across de-elevation for the same reason; this is
// an acceptable tradeoff because the elevated launch was
// probably an Explorer double-click with no args anyway.
// Find runas.exe explicitly under System32 (the native 64-bit path).
var systemRunas = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.System), "runas.exe");
var runasPath = File.Exists(systemRunas) ? systemRunas : "runas.exe";
var psi = new System.Diagnostics.ProcessStartInfo var psi = new System.Diagnostics.ProcessStartInfo
{ {
FileName = "runas.exe", FileName = runasPath,
Arguments = "/trustlevel:0x20000 " + trustArg, Arguments = "/trustlevel:0x20000 " + quotedExe,
UseShellExecute = false, UseShellExecute = false,
CreateNoWindow = true, CreateNoWindow = true,
WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden, WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden,
}; };
System.Diagnostics.Process.Start(psi); // Mark the env so the demoted child knows it's the relaunch and
// won't loop. runas + CreateProcess passes the parent env block
// to the new child by default.
psi.EnvironmentVariables[RelaunchEnvVar] = "1";
using var spawned = System.Diagnostics.Process.Start(psi);
if (spawned is null)
{
StartupTrace.Write("de-elevate: Process.Start returned null");
return false;
}
StartupTrace.Write($"de-elevate: runas spawned as PID {spawned.Id}");
} }
catch catch (Exception ex)
{ {
// Relaunch failed — let normal startup proceed. Worst case the operator StartupTrace.Write($"de-elevate: spawn THREW: {ex.GetType().Name}: {ex.Message}");
// sees the empty-state and has to launch differently. return false;
return;
} }
// Shutdown WITHOUT a value so OnExit handlers don't run a teardown for an // Spawn succeeded — shut ourselves down so only the medium child remains.
// engine that was never wired up. // Use Shutdown(0) to signal a clean exit (NOT a startup error).
StartupTrace.Write("de-elevate: calling Shutdown(0) to let runas child take over");
Shutdown(0); Shutdown(0);
return true;
} }
/// <summary> /// <summary>

View file

@ -0,0 +1,40 @@
using System.IO;
namespace TeamsISO.App;
/// <summary>
/// Bare-metal startup tracer that opens, appends, and closes a file on
/// every call. Used to capture what's happening BEFORE Serilog comes up
/// (and to capture failures that would prevent Serilog from coming up at
/// all). Failures here are swallowed — we never want diagnostics to crash
/// the very thing we're trying to diagnose.
///
/// File lives at <c>%LOCALAPPDATA%\TeamsISO\startup-trace.log</c>. Grows
/// without rotation; expected to be tiny since each launch writes ~20
/// lines. Acceptable cost for catching launch-time regressions.
/// </summary>
internal static class StartupTrace
{
private static readonly object _gate = new();
public static void Write(string message)
{
try
{
var dir = Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
"TeamsISO");
Directory.CreateDirectory(dir);
var path = Path.Combine(dir, "startup-trace.log");
var line = $"[{DateTimeOffset.Now:yyyy-MM-dd HH:mm:ss.fff}] [PID {Environment.ProcessId}] {message}{Environment.NewLine}";
lock (_gate)
{
File.AppendAllText(path, line);
}
}
catch
{
// Diagnostics must NEVER crash startup.
}
}
}