fix(promotion): heal dead CIFS mounts + retry file lookup; reset orphans on failure
The promotion worker mounts the growing SMB share, but a CIFS soft-mount can stay mounted while DEAD (server dropped the connection) — every access then returns ENOENT, so promotion fails Growing file not found and the asset is stranded in processing (recurring stuck-migration bug). Fixes: - ensureGrowingShareMounted now PROBES the mount with a readdir; if dead, lazy- unmounts and remounts fresh (was: returned early if anything was mounted). - file lookup retries for ~20s (CIFS attribute-cache lag on a freshly written master), remounting between attempts. - on any promotion failure, the asset is reset (pending_migration if the file is present, else error) instead of being left in processing forever.
This commit is contained in:
parent
641b033bf4
commit
e6f1313065
1 changed files with 33 additions and 6 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import { readdir, stat, unlink, mkdir, writeFile } from 'node:fs/promises';
|
||||
import { execFileSync } from 'node:child_process';
|
||||
import { join, relative, basename } from 'node:path';
|
||||
import { createReadStream, existsSync } from 'node:fs';
|
||||
import { createReadStream, existsSync, readdirSync } from 'node:fs';
|
||||
import { Queue } from 'bullmq';
|
||||
import { query } from '../db/client.js';
|
||||
import { uploadStreamToS3 } from '../s3/client.js';
|
||||
|
|
@ -38,8 +38,19 @@ async function ensureGrowingShareMounted() {
|
|||
}
|
||||
try {
|
||||
if (isMounted(GROWING_PATH)) {
|
||||
console.log('[promotion] growing share already mounted at', GROWING_PATH);
|
||||
return;
|
||||
// A CIFS soft-mount can stay "mounted" yet be DEAD (server dropped the
|
||||
// connection): the mountpoint exists but every access returns ENOENT/EIO.
|
||||
// This is the recurring "Growing file not found" / stuck-in-processing bug.
|
||||
// Probe the mount with a real readdir; if it fails, force-unmount so we
|
||||
// fall through and remount fresh below.
|
||||
let healthy = false;
|
||||
try { readdirSync(GROWING_PATH); healthy = true; } catch (_) { healthy = false; }
|
||||
if (healthy) {
|
||||
console.log('[promotion] growing share healthy at', GROWING_PATH);
|
||||
return;
|
||||
}
|
||||
console.warn('[promotion] growing share mounted but DEAD — remounting', GROWING_PATH);
|
||||
try { execFileSync('umount', ['-l', GROWING_PATH], { stdio: ['ignore', 'ignore', 'pipe'] }); } catch (_) {}
|
||||
}
|
||||
await mkdir(GROWING_PATH, { recursive: true }).catch(() => {});
|
||||
await writeFile(
|
||||
|
|
@ -114,10 +125,26 @@ async function runPromotion(job) {
|
|||
}
|
||||
const asset = r.rows[0];
|
||||
|
||||
// 3. Resolve local path
|
||||
// 3. Resolve local path. RETRY: the master is written to the SMB share by the
|
||||
// capture sidecar's mount; the promotion worker sees it through its OWN CIFS
|
||||
// mount, which has an attribute cache (actimeo). A just-finalized file (or a
|
||||
// file written via a different mount) can show a transient negative-lookup
|
||||
// here for a second or two even though it exists. Without a retry the job
|
||||
// fails "Growing file not found" and the asset gets stranded — the recurring
|
||||
// stuck-in-processing bug. Re-check for up to ~20s before giving up.
|
||||
const localPath = `${GROWING_PATH}/${asset.project_id}/${asset.filename}.mxf`;
|
||||
if (!existsSync(localPath)) {
|
||||
throw new Error(`Growing file not found at ${localPath}`);
|
||||
{
|
||||
const deadline = Date.now() + 20000;
|
||||
let seen = existsSync(localPath);
|
||||
while (!seen && Date.now() < deadline) {
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
// Re-mount best-effort in case the share dropped, then re-check.
|
||||
await ensureGrowingShareMounted().catch(() => {});
|
||||
seen = existsSync(localPath);
|
||||
}
|
||||
if (!seen) {
|
||||
throw new Error(`Growing file not found at ${localPath} after 20s`);
|
||||
}
|
||||
}
|
||||
|
||||
const s3Key = `projects/${asset.project_id}/masters/${asset.filename}.mxf`;
|
||||
|
|
|
|||
Loading…
Reference in a new issue