diff --git a/services/mam-api/src/routes/playout.js b/services/mam-api/src/routes/playout.js index 4b68e0c..491dd87 100644 --- a/services/mam-api/src/routes/playout.js +++ b/services/mam-api/src/routes/playout.js @@ -338,9 +338,14 @@ async function spawnChannelSidecar(channel) { } } + // Set last_heartbeat_at = NOW() so the scheduler health tick treats this + // channel as freshly alive. Without this, last_heartbeat_at starts as NULL + // (epoch=0), and the very first tick sees ageMs >> TIMEOUT_MS and triggers + // failover immediately — before the sidecar has had a chance to respond. const { rows } = await pool.query( `UPDATE playout_channels - SET status = 'running', container_id = $1, container_meta = $2, updated_at = NOW() + SET status = 'running', container_id = $1, container_meta = $2, + last_heartbeat_at = NOW(), updated_at = NOW() WHERE id = $3 RETURNING *`, [containerId, JSON.stringify(containerMeta), channel.id] ); @@ -448,7 +453,14 @@ router.post('/channels/:id/play', requireChannelEdit, async (req, res, next) => asset_duration_ms: i.asset_duration_ms != null ? Number(i.asset_duration_ms) : null, })), }; - const out = await callSidecar(req.channel, '/playlist/load', 'POST', payload); + // callSidecar throws on network/timeout errors. Return 502 (not 409) so + // the UI and operators know it's a gateway problem, not a state conflict. + let out; + try { + out = await callSidecar(req.channel, '/playlist/load', 'POST', payload); + } catch (err) { + return res.status(502).json({ error: 'Sidecar unreachable: ' + err.message }); + } res.json(out); } catch (err) { next(err); } }); diff --git a/services/mam-api/src/scheduler.js b/services/mam-api/src/scheduler.js index 027fa5e..51436b4 100644 --- a/services/mam-api/src/scheduler.js +++ b/services/mam-api/src/scheduler.js @@ -222,7 +222,7 @@ async function playoutHealthTick(client) { let channels; try { ({ rows: channels } = await client.query( - `SELECT id, output_type, container_meta, node_id, last_heartbeat_at, restart_count + `SELECT id, output_type, container_meta, node_id, last_heartbeat_at, updated_at, restart_count FROM playout_channels WHERE status = 'running'` )); } catch (err) { @@ -244,7 +244,12 @@ async function playoutHealthTick(client) { 'UPDATE playout_channels SET last_heartbeat_at = NOW() WHERE id = $1', [ch.id] ); } catch (err) { - const lastSeen = ch.last_heartbeat_at ? new Date(ch.last_heartbeat_at).getTime() : 0; + // When last_heartbeat_at is NULL (channel just spawned), fall back to + // updated_at (set to NOW() by spawnChannelSidecar). This prevents a + // brand-new channel from being failed over on the very first tick because + // epoch-0 age always exceeds TIMEOUT_MS. + const baseline = ch.last_heartbeat_at || ch.updated_at; + const lastSeen = baseline ? new Date(baseline).getTime() : Date.now(); const ageMs = Date.now() - lastSeen; if (ageMs < TIMEOUT_MS) continue; // not yet 3 misses