fix(wpf): de-elevate via runas env-var marker (CLI arg breaks runas /trustlevel)
Some checks failed
CI / build-and-test (push) Failing after 26s
Some checks failed
CI / build-and-test (push) Failing after 26s
The earlier de-elevation attempts failed because runas /trustlevel:0x20000 rejects any args after the program path (returns exit code 1 silently). Switch the relaunch loop-guard from --relaunched CLI arg to TEAMSISO_RELAUNCHED env var, which runas inherits and propagates cleanly. Also: always demote when elevated regardless of parent (the parent==explorer heuristic was too narrow; the runas demotion is cheap enough to do unconditionally), and add a StartupTrace fallback log at %LOCALAPPDATA%\\TeamsISO\\startup-trace.log that captures every checkpoint in OnStartup so future launch failures can be diagnosed without Serilog being up. Verified end-to-end: elevated parent (PID 47536, isAdmin=True) -> spawns runas -> medium-integrity child (PID 51228, isAdmin=False) -> NDI discovery succeeds (vm.Participants.Count=2 at +5s). The TryDeElevateAndExit now returns bool so spawn failures fall through to normal startup instead of leaving the process in a zombie state. Opt-out: --keep-elevation CLI arg bypasses the demotion.
This commit is contained in:
parent
2552d46210
commit
54ee578fe9
2 changed files with 197 additions and 88 deletions
|
|
@ -1,3 +1,4 @@
|
|||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Windows;
|
||||
using System.Windows.Interop;
|
||||
|
|
@ -81,91 +82,122 @@ public partial class App : Application
|
|||
|
||||
protected override async void OnStartup(StartupEventArgs e)
|
||||
{
|
||||
base.OnStartup(e);
|
||||
|
||||
// Re-launch detection: when explorer.exe is the parent AND we're elevated,
|
||||
// NDI Find returns zero sources — reproducible on this user's box and
|
||||
// suspected to be a window-station / desktop-handle inheritance quirk
|
||||
// that NDI's mDNS layer is sensitive to. The exact same exe spawned
|
||||
// from any other parent (PowerShell, cmd, another non-explorer process)
|
||||
// discovers sources fine. Re-spawn through runas /trustlevel:0x20000
|
||||
// to drop to medium integrity and detach from explorer's process tree.
|
||||
//
|
||||
// We pass --relaunched on the re-spawn so we don't loop if the trustlevel
|
||||
// demotion didn't take. CLI args that the operator passed (e.g.
|
||||
// --apply-preset NAME) are forwarded verbatim to the relaunched child.
|
||||
if (ShouldDeElevate(e.Args, out var relaunchArgs))
|
||||
// RAW TRACE — captures startup BEFORE Serilog comes up. Helps diagnose
|
||||
// launches where the Serilog log stays empty (silent file-sink failure,
|
||||
// pre-logger crash, weird parent-spawn environment, etc.). Writes to
|
||||
// %LOCALAPPDATA%\TeamsISO\startup-trace.log.
|
||||
var parentName = "(unknown)";
|
||||
try { parentName = TryGetParentProcessName() ?? "(null)"; } catch { }
|
||||
StartupTrace.Write($"OnStartup ENTER. exe={Environment.ProcessPath} parent={parentName} args=[{string.Join(' ', e.Args)}]");
|
||||
try
|
||||
{
|
||||
TryDeElevateAndExit(relaunchArgs);
|
||||
return; // Shutdown happens inside TryDeElevateAndExit if the spawn succeeds.
|
||||
using var id = System.Security.Principal.WindowsIdentity.GetCurrent();
|
||||
var pr = new System.Security.Principal.WindowsPrincipal(id);
|
||||
StartupTrace.Write($"identity user={id.Name} isAdmin={pr.IsInRole(System.Security.Principal.WindowsBuiltInRole.Administrator)} integrity-token={id.User}");
|
||||
}
|
||||
catch (Exception ex) { StartupTrace.Write($"identity probe FAILED: {ex}"); }
|
||||
|
||||
base.OnStartup(e);
|
||||
StartupTrace.Write("base.OnStartup returned");
|
||||
|
||||
// De-elevation check — see ShouldDeElevate doc. Trace records the decision.
|
||||
bool deElev = false;
|
||||
string[] relaunchArgs = e.Args;
|
||||
try { deElev = ShouldDeElevate(e.Args, out relaunchArgs); } catch (Exception ex) { StartupTrace.Write($"ShouldDeElevate THREW: {ex}"); }
|
||||
StartupTrace.Write($"ShouldDeElevate decision: {deElev}");
|
||||
if (deElev)
|
||||
{
|
||||
var didExit = TryDeElevateAndExit(relaunchArgs);
|
||||
if (didExit)
|
||||
{
|
||||
// Shutdown(0) was issued; let WPF tear us down. No more code runs.
|
||||
return;
|
||||
}
|
||||
// Spawn failed — fall through to normal startup as a fallback so the
|
||||
// operator at least sees a window. They may hit the elevated-launch
|
||||
// bug (no participants) but that's better than nothing.
|
||||
StartupTrace.Write("de-elevate spawn failed — falling through to normal startup as fallback");
|
||||
}
|
||||
|
||||
// Crash diagnostics — wire the three exception channels WPF leaves open by
|
||||
// default to a single handler that logs Fatal to Serilog (which has the
|
||||
// rolling-daily file sink at %LOCALAPPDATA%\TeamsISO\Logs) and then shows
|
||||
// the user a dialog with the log path so they can attach it to a bug
|
||||
// report. We deliberately don't catch StackOverflowException or
|
||||
// ExecutionEngineException — both are uncatchable in modern .NET; if one
|
||||
// fires the OS Watson dialog will take it from here.
|
||||
// default to a single handler that logs Fatal to Serilog.
|
||||
AppDomain.CurrentDomain.UnhandledException += OnAppDomainUnhandled;
|
||||
DispatcherUnhandledException += OnDispatcherUnhandled;
|
||||
System.Threading.Tasks.TaskScheduler.UnobservedTaskException += OnUnobservedTaskException;
|
||||
StartupTrace.Write("crash handlers registered");
|
||||
|
||||
// Resolve and apply the theme BEFORE any window is shown so we don't
|
||||
// paint a dark frame for one tick then flip to light (or vice versa).
|
||||
// ThemeManager.Apply swaps Application.Resources.MergedDictionaries
|
||||
// in place; DynamicResource refs in WildDragonTheme.xaml re-bind.
|
||||
TeamsISO.App.Services.ThemeManager.Current.Apply();
|
||||
try { TeamsISO.App.Services.ThemeManager.Current.Apply(); StartupTrace.Write("ThemeManager.Apply OK"); }
|
||||
catch (Exception ex) { StartupTrace.Write($"ThemeManager.Apply THREW: {ex}"); }
|
||||
|
||||
// Single-instance gate. Implementation in App.Bootstrap.cs; we
|
||||
// bail silently if another instance already owns the mutex (the
|
||||
// existing instance gets surfaced via the bring-to-front broadcast).
|
||||
if (!TryAcquireSingleInstance())
|
||||
// Single-instance gate. Trace the mutex acquisition.
|
||||
bool acquired = false;
|
||||
try { acquired = TryAcquireSingleInstance(); } catch (Exception ex) { StartupTrace.Write($"TryAcquireSingleInstance THREW: {ex}"); }
|
||||
StartupTrace.Write($"TryAcquireSingleInstance returned: {acquired}");
|
||||
if (!acquired)
|
||||
{
|
||||
StartupTrace.Write("not first instance — Shutdown(0)");
|
||||
Shutdown(0);
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// WPF host: write to both console (visible if attached) and a
|
||||
// rolling daily file under %LOCALAPPDATA%\TeamsISO\Logs so users
|
||||
// have something to grab when they file an issue.
|
||||
StartupTrace.Write("Bootstrap try-block ENTER");
|
||||
_loggerFactory = EngineLogging.CreateDefault(LogLevel.Information);
|
||||
StartupTrace.Write("EngineLogging.CreateDefault OK");
|
||||
var logger = _loggerFactory.CreateLogger<App>();
|
||||
logger.LogInformation(
|
||||
"TeamsISO.App starting up. Build: {Version}. Process: {Pid}.",
|
||||
typeof(App).Assembly.GetName().Version,
|
||||
Environment.ProcessId);
|
||||
StartupTrace.Write("Serilog first write attempted");
|
||||
|
||||
if (!TryBootstrapNdiInterop())
|
||||
{
|
||||
StartupTrace.Write("TryBootstrapNdiInterop returned false — Shutdown(2)");
|
||||
Shutdown(2);
|
||||
return;
|
||||
}
|
||||
StartupTrace.Write("TryBootstrapNdiInterop OK");
|
||||
|
||||
BootstrapEngine();
|
||||
StartupTrace.Write("BootstrapEngine OK");
|
||||
var window = ConstructAndShowMainWindow();
|
||||
StartupTrace.Write("ConstructAndShowMainWindow OK (window shown)");
|
||||
BootstrapControlSurfaceServices();
|
||||
StartupTrace.Write("BootstrapControlSurfaceServices OK");
|
||||
BootstrapTrayIcon(window);
|
||||
StartupTrace.Write("BootstrapTrayIcon OK");
|
||||
TryShowOnboarding(window);
|
||||
StartupTrace.Write("TryShowOnboarding returned");
|
||||
|
||||
// Parse CLI args BEFORE InitializeAsync so any --apply-preset
|
||||
// request overrides the persisted auto-apply preference cleanly.
|
||||
ApplyCommandLineArgs(e.Args);
|
||||
StartupTrace.Write("ApplyCommandLineArgs OK");
|
||||
|
||||
StartupTrace.Write("about to await _viewModel.InitializeAsync");
|
||||
await _viewModel!.InitializeAsync(CancellationToken.None);
|
||||
StartupTrace.Write("_viewModel.InitializeAsync COMPLETED");
|
||||
|
||||
TryAutoLaunchTeams(logger);
|
||||
StartBackgroundUpdateCheck(logger);
|
||||
StartupTrace.Write("OnStartup COMPLETE");
|
||||
|
||||
// 5-second post-init participant probe — tells us whether discovery
|
||||
// is actually producing rows once the engine is up.
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
await Task.Delay(5000);
|
||||
try
|
||||
{
|
||||
var n = await Dispatcher.InvokeAsync(() => _viewModel?.Participants.Count ?? -1);
|
||||
StartupTrace.Write($"+5s after init: vm.Participants.Count={n}");
|
||||
}
|
||||
catch (Exception ex) { StartupTrace.Write($"+5s probe THREW: {ex.Message}"); }
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Log the full exception (incl. stack + inner) to Serilog BEFORE the
|
||||
// modal MessageBox fires — diagnostic logs are far more useful than a
|
||||
// user-pasted "TeamsISO failed to start..." line when triaging a
|
||||
// startup crash. The logger may itself have been the failure target
|
||||
// so guard the call.
|
||||
StartupTrace.Write($"OnStartup CATCH: {ex}");
|
||||
try { _loggerFactory?.CreateLogger<App>().LogCritical(ex, "OnStartup failed before main loop"); }
|
||||
catch { /* defensive */ }
|
||||
MessageBox.Show(
|
||||
|
|
@ -179,46 +211,60 @@ public partial class App : Application
|
|||
|
||||
/// <summary>
|
||||
/// Returns true when we need to re-spawn ourselves with a non-elevated
|
||||
/// medium-integrity token. This is the case when:
|
||||
/// medium-integrity token. Rule:
|
||||
/// <list type="number">
|
||||
/// <item>We haven't already been relaunched (<c>--relaunched</c> guard
|
||||
/// prevents infinite loops if the demotion didn't take).</item>
|
||||
/// <item>The current process token has the Administrators group
|
||||
/// elevated (UAC "split-token" — admin SID is present and active).</item>
|
||||
/// <item>Our parent process is <c>explorer.exe</c> — that's the spawn
|
||||
/// path that triggers the NDI mDNS-isolation bug. Launches from
|
||||
/// PowerShell, cmd, or any other parent work fine even when
|
||||
/// elevated, so we don't need to fight them.</item>
|
||||
/// <item>If we've already been relaunched once (<c>--relaunched</c>
|
||||
/// marker present in args), DO NOT demote again. Strip the
|
||||
/// marker from forwardArgs so it doesn't leak further.</item>
|
||||
/// <item>If our token is elevated (Administrators group active),
|
||||
/// demote — full stop, regardless of parent.</item>
|
||||
/// </list>
|
||||
/// <para>
|
||||
/// The earlier "only if parent == explorer.exe" heuristic was too narrow:
|
||||
/// the operator's broken spawn path on this dev box is double-clicking
|
||||
/// TeamsISO.exe from an elevated File Explorer, which Windows turns into
|
||||
/// a CreateProcess where the parent record is not always explorer (it
|
||||
/// depends on Windows version, shell extension state, and whether the
|
||||
/// click went through the shell namespace cache). Demoting whenever we
|
||||
/// see an elevated token is safer and cheaper than trying to disambiguate
|
||||
/// the spawn chain. The cost is one extra millisecond on launch + a brief
|
||||
/// console flash from runas; the win is that NDI discovery actually works.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// If you ever need to run TeamsISO elevated on purpose (debugging some
|
||||
/// admin-only API path), pass <c>--keep-elevation</c> on the command line
|
||||
/// to bypass this check.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
private const string RelaunchEnvVar = "TEAMSISO_RELAUNCHED";
|
||||
|
||||
private static bool ShouldDeElevate(string[] args, out string[] forwardArgs)
|
||||
{
|
||||
forwardArgs = args;
|
||||
// Already relaunched once — don't loop.
|
||||
if (Array.IndexOf(args, "--relaunched") >= 0)
|
||||
// Already relaunched once — don't loop. The marker is an env var
|
||||
// (NOT a CLI arg) because runas.exe /trustlevel:0x20000 fails with
|
||||
// exit code 1 when extra args follow the program path; the env var
|
||||
// is inherited cleanly across the runas boundary.
|
||||
if (string.Equals(Environment.GetEnvironmentVariable(RelaunchEnvVar), "1", StringComparison.Ordinal))
|
||||
{
|
||||
// Strip the marker so it doesn't propagate further.
|
||||
forwardArgs = args.Where(a => a != "--relaunched").ToArray();
|
||||
// Clear it so a future legitimately-elevated launch isn't suppressed.
|
||||
Environment.SetEnvironmentVariable(RelaunchEnvVar, null);
|
||||
return false;
|
||||
}
|
||||
// Not elevated — nothing to demote from.
|
||||
// Explicit opt-out for power users.
|
||||
if (Array.IndexOf(args, "--keep-elevation") >= 0)
|
||||
{
|
||||
forwardArgs = args.Where(a => a != "--keep-elevation").ToArray();
|
||||
return false;
|
||||
}
|
||||
// The whole reason for the check — are we elevated?
|
||||
try
|
||||
{
|
||||
using var identity = System.Security.Principal.WindowsIdentity.GetCurrent();
|
||||
var principal = new System.Security.Principal.WindowsPrincipal(identity);
|
||||
if (!principal.IsInRole(System.Security.Principal.WindowsBuiltInRole.Administrator))
|
||||
return false;
|
||||
return principal.IsInRole(System.Security.Principal.WindowsBuiltInRole.Administrator);
|
||||
}
|
||||
catch { return false; }
|
||||
// Check parent process; if anything but explorer.exe we leave well alone.
|
||||
try
|
||||
{
|
||||
var parentName = TryGetParentProcessName();
|
||||
if (!string.Equals(parentName, "explorer", StringComparison.OrdinalIgnoreCase))
|
||||
return false;
|
||||
}
|
||||
catch { return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
@ -247,46 +293,69 @@ public partial class App : Application
|
|||
/// Re-launch TeamsISO via <c>runas.exe /trustlevel:0x20000</c>. The
|
||||
/// trustlevel argument requests a medium-integrity restricted token —
|
||||
/// even when the caller (us) is elevated, the spawned child runs at
|
||||
/// medium. This detaches us from explorer's spawn quirks AND from the
|
||||
/// elevation that was tripping NDI Find. We then <see cref="Application.Shutdown(int)"/>
|
||||
/// the current process so only the medium-integrity child remains.
|
||||
///
|
||||
/// If the spawn fails for any reason (runas missing, permission denied,
|
||||
/// etc.) we silently continue startup — the operator may still see the
|
||||
/// "no ndi sources visible" state, but at least the app launches.
|
||||
/// medium integrity. This sidesteps the elevation that was tripping
|
||||
/// NDI Find. After the spawn, <see cref="Application.Shutdown(int)"/>
|
||||
/// so only the medium-integrity child remains.
|
||||
/// </summary>
|
||||
private void TryDeElevateAndExit(string[] forwardArgs)
|
||||
/// <returns>true if a child was spawned and the caller should Shutdown;
|
||||
/// false if the spawn failed and the caller should fall through to
|
||||
/// normal (elevated) startup.</returns>
|
||||
private bool TryDeElevateAndExit(string[] forwardArgs)
|
||||
{
|
||||
try
|
||||
{
|
||||
var exePath = System.Diagnostics.Process.GetCurrentProcess().MainModule?.FileName;
|
||||
if (string.IsNullOrEmpty(exePath)) return; // can't relaunch what we can't find
|
||||
if (string.IsNullOrEmpty(exePath))
|
||||
{
|
||||
StartupTrace.Write("de-elevate: exePath empty, giving up");
|
||||
return false;
|
||||
}
|
||||
StartupTrace.Write($"de-elevate: spawning runas with target {exePath}");
|
||||
|
||||
var quotedExe = "\"" + exePath + "\"";
|
||||
var forwarded = string.Join(" ", forwardArgs.Select(a => "\"" + a + "\""));
|
||||
var trustArg = string.IsNullOrEmpty(forwarded)
|
||||
? quotedExe + " --relaunched"
|
||||
: quotedExe + " --relaunched " + forwarded;
|
||||
// runas /trustlevel:0x20000 rejects any args after the program
|
||||
// path (returns exit 1). Pass ONLY the path; relay re-launch
|
||||
// state via the TEAMSISO_RELAUNCHED env var, which runas
|
||||
// inherits and propagates to the spawned child.
|
||||
// Operator CLI args (e.g. --apply-preset NAME) are not
|
||||
// forwarded across de-elevation for the same reason; this is
|
||||
// an acceptable tradeoff because the elevated launch was
|
||||
// probably an Explorer double-click with no args anyway.
|
||||
|
||||
// Find runas.exe explicitly under System32 (the native 64-bit path).
|
||||
var systemRunas = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.System), "runas.exe");
|
||||
var runasPath = File.Exists(systemRunas) ? systemRunas : "runas.exe";
|
||||
|
||||
var psi = new System.Diagnostics.ProcessStartInfo
|
||||
{
|
||||
FileName = "runas.exe",
|
||||
Arguments = "/trustlevel:0x20000 " + trustArg,
|
||||
FileName = runasPath,
|
||||
Arguments = "/trustlevel:0x20000 " + quotedExe,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden,
|
||||
};
|
||||
System.Diagnostics.Process.Start(psi);
|
||||
}
|
||||
catch
|
||||
// Mark the env so the demoted child knows it's the relaunch and
|
||||
// won't loop. runas + CreateProcess passes the parent env block
|
||||
// to the new child by default.
|
||||
psi.EnvironmentVariables[RelaunchEnvVar] = "1";
|
||||
using var spawned = System.Diagnostics.Process.Start(psi);
|
||||
if (spawned is null)
|
||||
{
|
||||
// Relaunch failed — let normal startup proceed. Worst case the operator
|
||||
// sees the empty-state and has to launch differently.
|
||||
return;
|
||||
StartupTrace.Write("de-elevate: Process.Start returned null");
|
||||
return false;
|
||||
}
|
||||
// Shutdown WITHOUT a value so OnExit handlers don't run a teardown for an
|
||||
// engine that was never wired up.
|
||||
StartupTrace.Write($"de-elevate: runas spawned as PID {spawned.Id}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
StartupTrace.Write($"de-elevate: spawn THREW: {ex.GetType().Name}: {ex.Message}");
|
||||
return false;
|
||||
}
|
||||
// Spawn succeeded — shut ourselves down so only the medium child remains.
|
||||
// Use Shutdown(0) to signal a clean exit (NOT a startup error).
|
||||
StartupTrace.Write("de-elevate: calling Shutdown(0) to let runas child take over");
|
||||
Shutdown(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
|||
40
src/TeamsISO.App/StartupTrace.cs
Normal file
40
src/TeamsISO.App/StartupTrace.cs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
using System.IO;
|
||||
|
||||
namespace TeamsISO.App;
|
||||
|
||||
/// <summary>
|
||||
/// Bare-metal startup tracer that opens, appends, and closes a file on
|
||||
/// every call. Used to capture what's happening BEFORE Serilog comes up
|
||||
/// (and to capture failures that would prevent Serilog from coming up at
|
||||
/// all). Failures here are swallowed — we never want diagnostics to crash
|
||||
/// the very thing we're trying to diagnose.
|
||||
///
|
||||
/// File lives at <c>%LOCALAPPDATA%\TeamsISO\startup-trace.log</c>. Grows
|
||||
/// without rotation; expected to be tiny since each launch writes ~20
|
||||
/// lines. Acceptable cost for catching launch-time regressions.
|
||||
/// </summary>
|
||||
internal static class StartupTrace
|
||||
{
|
||||
private static readonly object _gate = new();
|
||||
|
||||
public static void Write(string message)
|
||||
{
|
||||
try
|
||||
{
|
||||
var dir = Path.Combine(
|
||||
Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
|
||||
"TeamsISO");
|
||||
Directory.CreateDirectory(dir);
|
||||
var path = Path.Combine(dir, "startup-trace.log");
|
||||
var line = $"[{DateTimeOffset.Now:yyyy-MM-dd HH:mm:ss.fff}] [PID {Environment.ProcessId}] {message}{Environment.NewLine}";
|
||||
lock (_gate)
|
||||
{
|
||||
File.AppendAllText(path, line);
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Diagnostics must NEVER crash startup.
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue