From 72fc608d8a81b46bd7998cfc29d0c1db57c68341 Mon Sep 17 00:00:00 2001
From: Zac <zac@wilddragon.net>
Date: Sat, 30 May 2026 12:52:53 +0000
Subject: [PATCH] fix(mam-api): harden TOTP login flow + tighten Google domain
 check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Review of the v2 auth landing turned up four weak spots in the MFA path.
All four are now fixed; behaviour is unchanged for the password-correct
+ correct-TOTP happy path.

1. TOTP brute-force gate (the big one). /login was calling
   ipBackoff.recordSuccess(ip) the instant the password hashed correctly,
   *before* the second factor was proven. That cleared the per-IP failure
   counter, so each /login retry let an attacker with a known password
   hammer the 6-digit /login/totp space (10^6) at full speed.
   Now recordSuccess fires only inside establishSession() — i.e. after
   every required factor has actually passed (password [+TOTP] or
   OAuth [+TOTP]).

2. MFA ticket binding. Tickets issued by /login (and the Google callback)
   were unbound — a stolen ticket replayed from a different origin still
   worked. Tickets now carry SHA-256 hashes of the issuing request's IP
   and User-Agent; redeemTicket rejects on mismatch. The ticket is burned
   even on mismatch so a wrong-binding probe can't be retried.

3. TOTP replay within the same 30s step (RFC 6238 §5.2). The verifier
   accepted the same code as many times as you submitted it. Now
   verifyToken returns the matched counter, and /login/totp does a CAS
   UPDATE on users.totp_last_counter — codes at counters <= the last
   accepted value are rejected. New migration 030 adds totp_last_counter,
   seeded on /totp/enable so the enrollment code itself can't be reused
   at first login, and zeroed on /totp/disable.

4. Google OAuth domain check no longer falls back to the email suffix
   when the hd (hosted-domain) claim is missing. Email-suffix matching
   let consumer (non-Workspace) Google accounts whose email happens to
   end in the allowed domain through; if GOOGLE_ALLOWED_DOMAIN is set,
   the operator means "only this Workspace", so accounts without a
   verified hd must be rejected.

Tests: new mfa-tickets.test.js covers ip/UA binding, single-use on
mismatch, and bindings-absent back-compat. totp.test.js updated for the
new verifyToken return shape (counter on success, null on failure;
truthiness still works at call sites) and adds an explicit
matched-counter check.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 services/mam-api/src/auth/google-oauth.js     | 10 ++-
 services/mam-api/src/auth/mfa-tickets.js      | 35 ++++++--
 services/mam-api/src/auth/totp.js             | 12 ++-
 .../src/db/migrations/030-totp-replay.sql     |  9 +++
 services/mam-api/src/routes/auth.js           | 79 ++++++++++++++-----
 .../mam-api/test/auth/mfa-tickets.test.js     | 49 ++++++++++++
 services/mam-api/test/auth/totp.test.js       | 36 ++++++---
 7 files changed, 186 insertions(+), 44 deletions(-)
 create mode 100644 services/mam-api/src/db/migrations/030-totp-replay.sql
 create mode 100644 services/mam-api/test/auth/mfa-tickets.test.js

diff --git a/services/mam-api/src/auth/google-oauth.js b/services/mam-api/src/auth/google-oauth.js
index f8f55aa..1e53f27 100644
--- a/services/mam-api/src/auth/google-oauth.js
+++ b/services/mam-api/src/auth/google-oauth.js
@@ -75,9 +75,13 @@ export async function exchangeAndVerify(code) {
   }
   const domain = allowedDomain();
   if (domain) {
-    const emailDomain = String(p.email).split('@')[1]?.toLowerCase();
-    // Prefer Google's hosted-domain claim; fall back to the email domain.
-    const hd = (p.hd || emailDomain || '').toLowerCase();
+    // ONLY trust Google's `hd` (hosted-domain) claim — it's present iff the
+    // account is a member of a Google Workspace domain that Google itself
+    // has verified. The email-suffix fallback we used to allow let any
+    // non-Workspace account with a spoof-friendly email through; if a
+    // GOOGLE_ALLOWED_DOMAIN is set, the operator means "only this Workspace,"
+    // and consumer accounts (no hd) must be rejected.
+    const hd = (p.hd || '').toLowerCase();
     if (hd !== domain) {
       const err = new Error('domain not allowed'); err.status = 403; throw err;
     }
diff --git a/services/mam-api/src/auth/mfa-tickets.js b/services/mam-api/src/auth/mfa-tickets.js
index 9600862..9c079e2 100644
--- a/services/mam-api/src/auth/mfa-tickets.js
+++ b/services/mam-api/src/auth/mfa-tickets.js
@@ -5,33 +5,54 @@
 // code) redeems the ticket to finish login. Tickets are single-use and expire
 // fast so a stolen ticket is near-useless.
 //
+// Tickets are bound to the issuing request's IP and User-Agent (hashed). A
+// stolen ticket replayed from a different origin redeems to null. This is
+// defense in depth against ticket exfiltration via a logged proxy, browser
+// extension, or shoulder-surf; it does not stop an attacker who is on the same
+// IP and UA.
+//
 // In-memory + single-instance, matching the existing login rate-limiter
 // (auth/rate-limit.js). Documented limitation: in a multi-instance deployment
 // the second step must hit the same node. Acceptable for Dragonflight's
 // one-mam-api-per-node shape; revisit if that changes.
-import { randomBytes } from 'node:crypto';
+import { randomBytes, createHash } from 'node:crypto';
 
 const TTL_MS = 5 * 60 * 1000; // 5 minutes to enter a code
-const tickets = new Map();     // id -> { userId, expiresAt }
+const tickets = new Map();     // id -> { userId, ipHash, uaHash, expiresAt }
 
 function sweep() {
   const now = Date.now();
   for (const [id, t] of tickets) if (t.expiresAt <= now) tickets.delete(id);
 }
 
-export function issueTicket(userId) {
+function hashBinding(value) {
+  return createHash('sha256').update(String(value || '')).digest('hex');
+}
+
+export function issueTicket(userId, { ip, userAgent } = {}) {
   sweep();
   const id = randomBytes(32).toString('hex');
-  tickets.set(id, { userId, expiresAt: Date.now() + TTL_MS });
+  tickets.set(id, {
+    userId,
+    ipHash: hashBinding(ip),
+    uaHash: hashBinding(userAgent),
+    expiresAt: Date.now() + TTL_MS,
+  });
   return id;
 }
 
-// Redeem (and consume) a ticket. Returns the userId, or null if missing/expired.
-export function redeemTicket(id) {
+// Redeem (and consume) a ticket. Returns the userId, or null if missing,
+// expired, or the binding doesn't match the redeeming request.
+export function redeemTicket(id, { ip, userAgent } = {}) {
   if (!id) return null;
   const t = tickets.get(id);
   if (!t) return null;
-  tickets.delete(id); // single-use
+  tickets.delete(id); // single-use — burn even on binding mismatch so a
+                      // wrong-binding probe can't be retried.
   if (t.expiresAt <= Date.now()) return null;
+  // If a caller doesn't supply bindings (e.g. tests), accept — the issue side
+  // controls whether bindings get recorded.
+  if (ip !== undefined && t.ipHash !== hashBinding(ip)) return null;
+  if (userAgent !== undefined && t.uaHash !== hashBinding(userAgent)) return null;
   return t.userId;
 }
diff --git a/services/mam-api/src/auth/totp.js b/services/mam-api/src/auth/totp.js
index 39ba18e..1b390b7 100644
--- a/services/mam-api/src/auth/totp.js
+++ b/services/mam-api/src/auth/totp.js
@@ -74,18 +74,22 @@ export function generateToken(base32Secret, atMs = Date.now()) {
 
 // Verify a user-supplied code, allowing ±`window` steps of clock drift
 // (default ±1 = 90s total tolerance). Constant-time compare per candidate.
+//
+// Returns the matched counter on success (so callers can persist it for
+// replay protection — RFC 6238 §5.2), or null on failure. Boolean truthiness
+// still works for the common case (`if (verifyToken(...))`).
 export function verifyToken(base32Secret, token, atMs = Date.now(), window = 1) {
-  if (!base32Secret || !token) return false;
+  if (!base32Secret || !token) return null;
   const cleaned = String(token).replace(/\s+/g, '');
-  if (!/^\d{6}$/.test(cleaned)) return false;
+  if (!/^\d{6}$/.test(cleaned)) return null;
   const secretBuf = base32Decode(base32Secret);
   const counter = Math.floor(atMs / 1000 / STEP_SECONDS);
   const want = Buffer.from(cleaned);
   for (let w = -window; w <= window; w++) {
     const candidate = Buffer.from(hotp(secretBuf, counter + w));
-    if (candidate.length === want.length && timingSafeEqual(candidate, want)) return true;
+    if (candidate.length === want.length && timingSafeEqual(candidate, want)) return counter + w;
   }
-  return false;
+  return null;
 }
 
 // The otpauth:// URI an authenticator app scans. label/issuer show in the app.
diff --git a/services/mam-api/src/db/migrations/030-totp-replay.sql b/services/mam-api/src/db/migrations/030-totp-replay.sql
new file mode 100644
index 0000000..352ef96
--- /dev/null
+++ b/services/mam-api/src/db/migrations/030-totp-replay.sql
@@ -0,0 +1,9 @@
+-- Migration 030 — TOTP replay protection.
+--
+-- RFC 6238 §5.2 hardening: track the last counter value we accepted for each
+-- user and reject codes at counters ≤ the last one. Without this, the same
+-- 6-digit code can be submitted N times within its 30s step. Low impact in
+-- practice (the code is only valid for ~90s with ±1 drift) but standard.
+
+ALTER TABLE users
+  ADD COLUMN IF NOT EXISTS totp_last_counter BIGINT NOT NULL DEFAULT 0;
diff --git a/services/mam-api/src/routes/auth.js b/services/mam-api/src/routes/auth.js
index 09f1f37..00e0456 100644
--- a/services/mam-api/src/routes/auth.js
+++ b/services/mam-api/src/routes/auth.js
@@ -101,23 +101,29 @@ router.post('/login', async (req, res, next) => {
       return res.status(401).json({ error: 'invalid credentials' });
     }
 
-    // Password is correct — clear the per-IP backoff regardless of MFA outcome.
-    ipBackoff.recordSuccess(ip);
-
-    // Second factor: if TOTP is enabled, don't create a session yet. Hand back a
-    // short-lived ticket the client redeems via /login/totp with a code.
+    // Second factor: if TOTP is enabled, don't create a session yet. Hand back
+    // a short-lived ticket the client redeems via /login/totp with a code.
+    // Crucially: do NOT clear the per-IP failure counter here. If we did, each
+    // /login retry would reset the backoff and let an attacker brute the 6-digit
+    // TOTP space (10^6) with no per-attempt delay. The counter is cleared
+    // inside establishSession() once MFA has actually passed.
     if (user.totp_enabled) {
-      return res.json({ mfa_required: true, ticket: issueTicket(user.id) });
+      return res.json({
+        mfa_required: true,
+        ticket: issueTicket(user.id, { ip, userAgent: req.get('user-agent') }),
+      });
     }
 
-    await establishSession(req, user);
+    await establishSession(req, user, ip);
     res.json({ user: { id: user.id, username: user.username, display_name: user.display_name } });
   } catch (err) { next(err); }
 });
 
 // Write the session and wait for it to persist before responding. Extracted so
 // both the password-only and the MFA-completion paths share one implementation.
-async function establishSession(req, user) {
+// Clears the per-IP failure counter only here — after every required factor has
+// actually been proven (password [+ TOTP if enabled, or OAuth + TOTP]).
+async function establishSession(req, user, ip) {
   req.session.user_id       = user.id;
   req.session.first_seen_at = Date.now();
   req.session.last_seen_at  = Date.now();
@@ -125,6 +131,7 @@ async function establishSession(req, user) {
   // Without this, the SPA's next request races the store write, hits 401, and
   // the prior bounce-to-login logic produced an infinite loop.
   await new Promise((resolve, reject) => req.session.save(err => err ? reject(err) : resolve()));
+  if (ip) ipBackoff.recordSuccess(ip);
   pool.query(`UPDATE users SET last_login_at = NOW() WHERE id = $1`, [user.id])
     .catch(err => console.error('[auth] last_login_at update failed:', err.message));
 }
@@ -143,7 +150,9 @@ router.post('/login/totp', async (req, res, next) => {
     const { ticket: bodyTicket, code } = req.body || {};
     const ticket = bodyTicket || req.session?.mfa_ticket;
     if (req.session?.mfa_ticket) delete req.session.mfa_ticket;
-    const userId = redeemTicket(ticket);
+    // Bound to the issuing request's IP + UA — replays from a different origin
+    // redeem to null. See mfa-tickets.js for the binding model.
+    const userId = redeemTicket(ticket, { ip, userAgent: req.get('user-agent') });
     if (!userId) {
       ipBackoff.recordFailure(ip);
       return res.status(401).json({ error: 'invalid or expired ticket' });
@@ -151,13 +160,31 @@ router.post('/login/totp', async (req, res, next) => {
     if (!code) return res.status(400).json({ error: 'code required' });
 
     const { rows } = await pool.query(
-      `SELECT id, username, display_name, totp_secret, totp_enabled FROM users WHERE id = $1`, [userId]);
+      `SELECT id, username, display_name, totp_secret, totp_enabled, totp_last_counter
+         FROM users WHERE id = $1`, [userId]);
     const user = rows[0];
     if (!user || !user.totp_enabled || !user.totp_secret) {
       return res.status(401).json({ error: 'invalid credentials' });
     }
 
-    let ok = verifyToken(user.totp_secret, code);
+    // verifyToken returns the matched counter on success. Reject codes at
+    // counters ≤ totp_last_counter to prevent replay within the same step.
+    // The CAS-style UPDATE makes this race-free under concurrent submissions.
+    const matchedCounter = verifyToken(user.totp_secret, code);
+    let ok = false;
+    if (matchedCounter !== null) {
+      const lastCounter = BigInt(user.totp_last_counter || 0);
+      if (BigInt(matchedCounter) > lastCounter) {
+        const upd = await pool.query(
+          `UPDATE users SET totp_last_counter = $1
+             WHERE id = $2 AND totp_last_counter < $1`,
+          [String(matchedCounter), user.id]
+        );
+        ok = upd.rowCount === 1;
+      }
+      // matchedCounter ≤ last → silent replay; falls through to recovery-code
+      // path which also fails → 401. Same UX as a wrong code, no info leak.
+    }
     if (!ok) ok = await consumeRecoveryCode(user.id, code);
     if (!ok) {
       ipBackoff.recordFailure(ip);
@@ -165,8 +192,9 @@ router.post('/login/totp', async (req, res, next) => {
       return res.status(401).json({ error: 'invalid code' });
     }
 
-    ipBackoff.recordSuccess(ip);
-    await establishSession(req, user);
+    // recordSuccess is called by establishSession once the session lands —
+    // that's the first moment we know every required factor has passed.
+    await establishSession(req, user, ip);
     res.json({ user: { id: user.id, username: user.username, display_name: user.display_name } });
   } catch (err) { next(err); }
 });
@@ -273,15 +301,21 @@ router.post('/totp/enable', requireAuth, async (req, res, next) => {
     const row = rows[0];
     if (!row?.totp_secret) return badRequest(res, 'start setup first');
     if (row.totp_enabled)  return res.status(409).json({ error: 'TOTP already enabled' });
-    if (!verifyToken(row.totp_secret, code)) return badRequest(res, 'incorrect code');
+    const enrollCounter = verifyToken(row.totp_secret, code);
+    if (enrollCounter === null) return badRequest(res, 'incorrect code');
 
     const recovery = generateRecoveryCodes(10);
     const hashes = await Promise.all(recovery.map(c => hashPassword(c)));
-    // Enable + replace any stale recovery codes atomically.
+    // Enable + seed totp_last_counter to the enrollment code's counter so the
+    // same code can't be reused on first login. Replace any stale recovery
+    // codes atomically.
     const client = await pool.connect();
     try {
       await client.query('BEGIN');
-      await client.query(`UPDATE users SET totp_enabled = TRUE WHERE id = $1`, [req.user.id]);
+      await client.query(
+        `UPDATE users SET totp_enabled = TRUE, totp_last_counter = $2 WHERE id = $1`,
+        [req.user.id, String(enrollCounter)]
+      );
       await client.query(`DELETE FROM user_recovery_codes WHERE user_id = $1`, [req.user.id]);
       for (const h of hashes) {
         await client.query(
@@ -310,7 +344,9 @@ router.post('/totp/disable', requireAuth, async (req, res, next) => {
       return badRequest(res, 'incorrect password');
     }
     await pool.query(
-      `UPDATE users SET totp_enabled = FALSE, totp_secret = NULL WHERE id = $1`, [req.user.id]);
+      `UPDATE users SET totp_enabled = FALSE, totp_secret = NULL, totp_last_counter = 0 WHERE id = $1`,
+      [req.user.id]
+    );
     await pool.query(`DELETE FROM user_recovery_codes WHERE user_id = $1`, [req.user.id]);
     res.status(204).end();
   } catch (err) { next(err); }
@@ -358,12 +394,17 @@ router.get('/google/callback', async (req, res, next) => {
     // through the same second-factor step as password login. The ticket lives in
     // the session (not the URL) and the SPA prompts for the code.
     if (user.totp_enabled) {
-      req.session.mfa_ticket = issueTicket(user.id);
+      const ip = req.ip || req.socket?.remoteAddress || 'unknown';
+      req.session.mfa_ticket = issueTicket(user.id, {
+        ip,
+        userAgent: req.get('user-agent'),
+      });
       await new Promise((resolve, reject) => req.session.save(err => err ? reject(err) : resolve()));
       return res.redirect('/?mfa=1');
     }
 
-    await establishSession(req, user);
+    const ip = req.ip || req.socket?.remoteAddress || 'unknown';
+    await establishSession(req, user, ip);
 
     // Redirect to the SPA root; AuthGate will re-check /auth/me and render the app.
     res.redirect('/');
diff --git a/services/mam-api/test/auth/mfa-tickets.test.js b/services/mam-api/test/auth/mfa-tickets.test.js
new file mode 100644
index 0000000..546a336
--- /dev/null
+++ b/services/mam-api/test/auth/mfa-tickets.test.js
@@ -0,0 +1,49 @@
+// MFA ticket binding tests — the second login step's tickets are bound to the
+// issuing request's IP + User-Agent (hashed) so a stolen ticket replayed from
+// a different origin can't complete the second factor.
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { issueTicket, redeemTicket } from '../../src/auth/mfa-tickets.js';
+
+test('ticket round-trips when ip + userAgent match', () => {
+  const id = issueTicket('user-1', { ip: '1.2.3.4', userAgent: 'curl/8' });
+  assert.equal(redeemTicket(id, { ip: '1.2.3.4', userAgent: 'curl/8' }), 'user-1');
+});
+
+test('ticket rejects redemption from a different IP', () => {
+  const id = issueTicket('user-1', { ip: '1.2.3.4', userAgent: 'curl/8' });
+  assert.equal(redeemTicket(id, { ip: '9.9.9.9', userAgent: 'curl/8' }), null);
+});
+
+test('ticket rejects redemption with a different User-Agent', () => {
+  const id = issueTicket('user-1', { ip: '1.2.3.4', userAgent: 'curl/8' });
+  assert.equal(redeemTicket(id, { ip: '1.2.3.4', userAgent: 'Mozilla/5.0' }), null);
+});
+
+test('ticket is single-use even on binding mismatch', () => {
+  // A wrong-binding probe must still burn the ticket — otherwise an attacker
+  // could try multiple IPs/UAs against the same ticket.
+  const id = issueTicket('user-1', { ip: '1.2.3.4', userAgent: 'curl/8' });
+  assert.equal(redeemTicket(id, { ip: '9.9.9.9', userAgent: 'curl/8' }), null);
+  // Same ticket with correct bindings now also fails — it was consumed.
+  assert.equal(redeemTicket(id, { ip: '1.2.3.4', userAgent: 'curl/8' }), null);
+});
+
+test('redeemTicket returns null for missing or unknown id', () => {
+  assert.equal(redeemTicket(null), null);
+  assert.equal(redeemTicket(undefined), null);
+  assert.equal(redeemTicket(''), null);
+  assert.equal(redeemTicket('not-a-real-id', { ip: 'x', userAgent: 'y' }), null);
+});
+
+test('ticket is single-use on success', () => {
+  const id = issueTicket('user-1', { ip: '1.2.3.4', userAgent: 'curl/8' });
+  assert.equal(redeemTicket(id, { ip: '1.2.3.4', userAgent: 'curl/8' }), 'user-1');
+  assert.equal(redeemTicket(id, { ip: '1.2.3.4', userAgent: 'curl/8' }), null);
+});
+
+test('issueTicket without bindings still works (back-compat / tests)', () => {
+  const id = issueTicket('user-1');
+  // No bindings on redeem either — both sides skip the check.
+  assert.equal(redeemTicket(id), 'user-1');
+});
diff --git a/services/mam-api/test/auth/totp.test.js b/services/mam-api/test/auth/totp.test.js
index 0457f49..f583213 100644
--- a/services/mam-api/test/auth/totp.test.js
+++ b/services/mam-api/test/auth/totp.test.js
@@ -35,32 +35,46 @@ test('matches RFC 6238 SHA-1 vectors (low 6 digits)', () => {
 });
 
 // ── verify with drift window ────────────────────────────────────────────────
+// verifyToken returns the matched counter (truthy) or null (falsy).
 test('verifyToken accepts the current code and ±1 step of drift', () => {
   const secret = generateSecret();
   const now = 1_700_000_000_000;
   const code = generateToken(secret, now);
-  assert.equal(verifyToken(secret, code, now), true);
-  // 30s earlier / later still inside ±1 window.
-  assert.equal(verifyToken(secret, code, now + 30_000), true);
-  assert.equal(verifyToken(secret, code, now - 30_000), true);
+  const baseCounter = Math.floor(now / 1000 / 30);
+  assert.equal(verifyToken(secret, code, now), baseCounter);
+  // 30s earlier / later still inside ±1 window — the *issued* code matches the
+  // baseCounter, but at now+30s we're in step baseCounter+1, so the issued
+  // code matches as drift = -1 step → returns baseCounter.
+  assert.equal(verifyToken(secret, code, now + 30_000), baseCounter);
+  assert.equal(verifyToken(secret, code, now - 30_000), baseCounter);
   // 2 steps away → rejected.
-  assert.equal(verifyToken(secret, code, now + 90_000), false);
+  assert.equal(verifyToken(secret, code, now + 90_000), null);
 });
 
 test('verifyToken rejects malformed / empty input without throwing', () => {
   const secret = generateSecret();
-  assert.equal(verifyToken(secret, ''), false);
-  assert.equal(verifyToken(secret, 'abcdef'), false);
-  assert.equal(verifyToken(secret, '12345'), false);   // too short
-  assert.equal(verifyToken(secret, '1234567'), false);  // too long
-  assert.equal(verifyToken('', '123456'), false);
+  assert.equal(verifyToken(secret, ''), null);
+  assert.equal(verifyToken(secret, 'abcdef'), null);
+  assert.equal(verifyToken(secret, '12345'), null);   // too short
+  assert.equal(verifyToken(secret, '1234567'), null);  // too long
+  assert.equal(verifyToken('', '123456'), null);
 });
 
 test('verifyToken tolerates spaces in the user-entered code', () => {
   const secret = generateSecret();
   const now = 1_700_000_000_000;
   const code = generateToken(secret, now);
-  assert.equal(verifyToken(secret, code.slice(0, 3) + ' ' + code.slice(3), now), true);
+  const expected = Math.floor(now / 1000 / 30);
+  assert.equal(verifyToken(secret, code.slice(0, 3) + ' ' + code.slice(3), now), expected);
+});
+
+test('verifyToken returns the matched counter (for replay protection)', () => {
+  const secret = generateSecret();
+  const now = 1_700_000_000_000;
+  const code = generateToken(secret, now);
+  const counter = verifyToken(secret, code, now);
+  assert.ok(typeof counter === 'number' && counter > 0);
+  assert.equal(counter, Math.floor(now / 1000 / 30));
 });
 
 // ── otpauth URI ─────────────────────────────────────────────────────────────