fix(s3): keep-alive agents + long timeouts to end socket starvation
Root cause of stuck 'processing', failed deletes, and dead playback: The mam-api proxies media (/video, /hls pipe the full S3 body through Express), holding long-lived streaming sockets. With the SDK's default http agents (no keep-alive, unbounded but unpooled) those streams starved control-plane calls — DeleteObject and the proxy worker's master download — which timed out (10s connectionTimeout) in bursts. Fixes: - mam-api S3 client: dedicated keep-alive http/https Agents (maxSockets 256) + requestTimeout raised 30s→300s so large master GETs finish. - worker S3 client: previously had NO handler config at all (SDK defaults). Added keep-alive agents + 600s requestTimeout so proxy/conform master downloads (hundreds of MB) don't stall and leave assets in 'processing'.
This commit is contained in:
parent
ac1d7e1e1f
commit
b27b9f6909
2 changed files with 40 additions and 3 deletions
|
|
@ -2,8 +2,20 @@ import { NodeHttpHandler } from '@smithy/node-http-handler';
|
||||||
import { S3Client, GetObjectCommand, DeleteObjectCommand, HeadBucketCommand, ListObjectsV2Command } from '@aws-sdk/client-s3';
|
import { S3Client, GetObjectCommand, DeleteObjectCommand, HeadBucketCommand, ListObjectsV2Command } from '@aws-sdk/client-s3';
|
||||||
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
||||||
import { Upload } from '@aws-sdk/lib-storage';
|
import { Upload } from '@aws-sdk/lib-storage';
|
||||||
|
import http from 'node:http';
|
||||||
|
import https from 'node:https';
|
||||||
import pool from '../db/pool.js';
|
import pool from '../db/pool.js';
|
||||||
|
|
||||||
|
// Dedicated keep-alive agents with a high socket ceiling. Without these the
|
||||||
|
// SDK uses Node's default agents (effectively short-lived, low reuse); when the
|
||||||
|
// API proxies media (/video, /hls pipe the full S3 body through Express) those
|
||||||
|
// long-lived streaming sockets starve control-plane calls (DeleteObject, the
|
||||||
|
// proxy worker's master download), which then time out → assets stuck in
|
||||||
|
// 'processing', "s3 delete failed", and dead browser playback. A large pool +
|
||||||
|
// keep-alive lets streams and control ops coexist.
|
||||||
|
const _s3HttpAgent = new http.Agent({ keepAlive: true, maxSockets: 256, maxFreeSockets: 32, timeout: 120_000 });
|
||||||
|
const _s3HttpsAgent = new https.Agent({ keepAlive: true, maxSockets: 256, maxFreeSockets: 32, timeout: 120_000 });
|
||||||
|
|
||||||
// ── Mutable config ────────────────────────────────────────────────────────────
|
// ── Mutable config ────────────────────────────────────────────────────────────
|
||||||
let _cfg = {
|
let _cfg = {
|
||||||
endpoint: process.env.S3_ENDPOINT || '',
|
endpoint: process.env.S3_ENDPOINT || '',
|
||||||
|
|
@ -23,9 +35,17 @@ function buildClient(cfg) {
|
||||||
secretAccessKey: cfg.secretKey,
|
secretAccessKey: cfg.secretKey,
|
||||||
},
|
},
|
||||||
forcePathStyle: true,
|
forcePathStyle: true,
|
||||||
// Hard request/connection timeouts so a stalled RustFS GET can't hang the
|
// Keep-alive agents (above) prevent socket starvation between media streams
|
||||||
// /video and /hls endpoints forever (the original browser-playback hang).
|
// and control-plane ops. requestTimeout is generous so the proxy worker's
|
||||||
requestHandler: new NodeHttpHandler({ requestTimeout: 30_000, connectionTimeout: 10_000 }),
|
// full-master download (hundreds of MB) doesn't abort mid-transfer and leave
|
||||||
|
// the asset stuck in 'processing'; connectionTimeout stays short so a dead
|
||||||
|
// endpoint fails fast rather than hanging /video.
|
||||||
|
requestHandler: new NodeHttpHandler({
|
||||||
|
httpAgent: _s3HttpAgent,
|
||||||
|
httpsAgent: _s3HttpsAgent,
|
||||||
|
requestTimeout: 300_000,
|
||||||
|
connectionTimeout: 10_000,
|
||||||
|
}),
|
||||||
requestChecksumCalculation: 'WHEN_REQUIRED',
|
requestChecksumCalculation: 'WHEN_REQUIRED',
|
||||||
responseChecksumValidation: 'WHEN_REQUIRED',
|
responseChecksumValidation: 'WHEN_REQUIRED',
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
import { S3Client, GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3';
|
import { S3Client, GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3';
|
||||||
|
import { NodeHttpHandler } from '@smithy/node-http-handler';
|
||||||
import { createReadStream, createWriteStream } from 'fs';
|
import { createReadStream, createWriteStream } from 'fs';
|
||||||
import { readdir } from 'fs/promises';
|
import { readdir } from 'fs/promises';
|
||||||
import { join, extname } from 'path';
|
import { join, extname } from 'path';
|
||||||
import { pipeline } from 'stream/promises';
|
import { pipeline } from 'stream/promises';
|
||||||
|
import http from 'node:http';
|
||||||
|
import https from 'node:https';
|
||||||
|
|
||||||
const CONTENT_TYPES = {
|
const CONTENT_TYPES = {
|
||||||
'.m3u8': 'application/vnd.apple.mpegurl',
|
'.m3u8': 'application/vnd.apple.mpegurl',
|
||||||
|
|
@ -10,6 +13,14 @@ const CONTENT_TYPES = {
|
||||||
'.mp4': 'video/mp4',
|
'.mp4': 'video/mp4',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Keep-alive agents + a long request timeout. The proxy/conform jobs download
|
||||||
|
// full master files (hundreds of MB) and upload HLS segments; the SDK defaults
|
||||||
|
// (no keep-alive, 0/short timeouts under contention) caused master downloads to
|
||||||
|
// stall and abort, leaving assets stuck in 'processing'. Generous timeout +
|
||||||
|
// pooled sockets make these large transfers reliable.
|
||||||
|
const _httpAgent = new http.Agent({ keepAlive: true, maxSockets: 128, timeout: 600_000 });
|
||||||
|
const _httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 128, timeout: 600_000 });
|
||||||
|
|
||||||
const createS3Client = () => {
|
const createS3Client = () => {
|
||||||
return new S3Client({
|
return new S3Client({
|
||||||
region: process.env.S3_REGION || 'us-east-1',
|
region: process.env.S3_REGION || 'us-east-1',
|
||||||
|
|
@ -19,6 +30,12 @@ const createS3Client = () => {
|
||||||
secretAccessKey: process.env.S3_SECRET_KEY,
|
secretAccessKey: process.env.S3_SECRET_KEY,
|
||||||
},
|
},
|
||||||
forcePathStyle: true,
|
forcePathStyle: true,
|
||||||
|
requestHandler: new NodeHttpHandler({
|
||||||
|
httpAgent: _httpAgent,
|
||||||
|
httpsAgent: _httpsAgent,
|
||||||
|
requestTimeout: 600_000,
|
||||||
|
connectionTimeout: 15_000,
|
||||||
|
}),
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue