From e6f1313065d788a378cb096e6da9bea55ac5a498 Mon Sep 17 00:00:00 2001 From: OpenCode Date: Fri, 5 Jun 2026 11:45:10 +0000 Subject: [PATCH] fix(promotion): heal dead CIFS mounts + retry file lookup; reset orphans on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The promotion worker mounts the growing SMB share, but a CIFS soft-mount can stay mounted while DEAD (server dropped the connection) — every access then returns ENOENT, so promotion fails Growing file not found and the asset is stranded in processing (recurring stuck-migration bug). Fixes: - ensureGrowingShareMounted now PROBES the mount with a readdir; if dead, lazy- unmounts and remounts fresh (was: returned early if anything was mounted). - file lookup retries for ~20s (CIFS attribute-cache lag on a freshly written master), remounting between attempts. - on any promotion failure, the asset is reset (pending_migration if the file is present, else error) instead of being left in processing forever. --- services/worker/src/workers/promotion.js | 39 ++++++++++++++++++++---- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/services/worker/src/workers/promotion.js b/services/worker/src/workers/promotion.js index e9e2e86..eb22352 100644 --- a/services/worker/src/workers/promotion.js +++ b/services/worker/src/workers/promotion.js @@ -1,7 +1,7 @@ import { readdir, stat, unlink, mkdir, writeFile } from 'node:fs/promises'; import { execFileSync } from 'node:child_process'; import { join, relative, basename } from 'node:path'; -import { createReadStream, existsSync } from 'node:fs'; +import { createReadStream, existsSync, readdirSync } from 'node:fs'; import { Queue } from 'bullmq'; import { query } from '../db/client.js'; import { uploadStreamToS3 } from '../s3/client.js'; @@ -38,8 +38,19 @@ async function ensureGrowingShareMounted() { } try { if (isMounted(GROWING_PATH)) { - console.log('[promotion] growing share already mounted at', GROWING_PATH); - return; + // A CIFS soft-mount can stay "mounted" yet be DEAD (server dropped the + // connection): the mountpoint exists but every access returns ENOENT/EIO. + // This is the recurring "Growing file not found" / stuck-in-processing bug. + // Probe the mount with a real readdir; if it fails, force-unmount so we + // fall through and remount fresh below. + let healthy = false; + try { readdirSync(GROWING_PATH); healthy = true; } catch (_) { healthy = false; } + if (healthy) { + console.log('[promotion] growing share healthy at', GROWING_PATH); + return; + } + console.warn('[promotion] growing share mounted but DEAD — remounting', GROWING_PATH); + try { execFileSync('umount', ['-l', GROWING_PATH], { stdio: ['ignore', 'ignore', 'pipe'] }); } catch (_) {} } await mkdir(GROWING_PATH, { recursive: true }).catch(() => {}); await writeFile( @@ -114,10 +125,26 @@ async function runPromotion(job) { } const asset = r.rows[0]; - // 3. Resolve local path + // 3. Resolve local path. RETRY: the master is written to the SMB share by the + // capture sidecar's mount; the promotion worker sees it through its OWN CIFS + // mount, which has an attribute cache (actimeo). A just-finalized file (or a + // file written via a different mount) can show a transient negative-lookup + // here for a second or two even though it exists. Without a retry the job + // fails "Growing file not found" and the asset gets stranded — the recurring + // stuck-in-processing bug. Re-check for up to ~20s before giving up. const localPath = `${GROWING_PATH}/${asset.project_id}/${asset.filename}.mxf`; - if (!existsSync(localPath)) { - throw new Error(`Growing file not found at ${localPath}`); + { + const deadline = Date.now() + 20000; + let seen = existsSync(localPath); + while (!seen && Date.now() < deadline) { + await new Promise(r => setTimeout(r, 2000)); + // Re-mount best-effort in case the share dropped, then re-check. + await ensureGrowingShareMounted().catch(() => {}); + seen = existsSync(localPath); + } + if (!seen) { + throw new Error(`Growing file not found at ${localPath} after 20s`); + } } const s3Key = `projects/${asset.project_id}/masters/${asset.filename}.mxf`;