mam-api self-heartbeat now reads NODE_HOSTNAME so primary rows survive container restarts instead of resurrecting with the random container ID. test-cluster.sh rewritten to use jq (the python f-strings had a parse bug that silently passed the IP check) and limited the docker-bridge alarm to 172.17.x since the user LAN occupies 172.18.0.0/16.
185 lines
7.2 KiB
JavaScript
185 lines
7.2 KiB
JavaScript
import 'dotenv/config';
|
|
import express from 'express';
|
|
import cors from 'cors';
|
|
import session from 'express-session';
|
|
import ConnectPgSimple from 'connect-pg-simple';
|
|
import os from 'node:os';
|
|
import { exec } from 'node:child_process';
|
|
import pool from './db/pool.js';
|
|
import { errorHandler } from './middleware/errors.js';
|
|
import { loadS3ConfigFromDb } from './s3/client.js';
|
|
|
|
// Routes
|
|
import authRouter from './routes/auth.js';
|
|
import assetsRouter from './routes/assets.js';
|
|
import projectsRouter from './routes/projects.js';
|
|
import binsRouter from './routes/bins.js';
|
|
import jobsRouter from './routes/jobs.js';
|
|
import captureRouter from './routes/capture.js';
|
|
import uploadRouter from './routes/upload.js';
|
|
import recordersRouter from './routes/recorders.js';
|
|
import settingsRouter from './routes/settings.js';
|
|
import amppRouter from './routes/ampp.js';
|
|
import usersRouter from './routes/users.js';
|
|
import groupsRouter from './routes/groups.js';
|
|
import tokensRouter from './routes/tokens.js';
|
|
import sequencesRouter from './routes/sequences.js';
|
|
import systemRouter from './routes/system.js';
|
|
import clusterRouter from './routes/cluster.js';
|
|
|
|
const app = express();
|
|
const PORT = process.env.PORT || 3000;
|
|
|
|
// ── Middleware ────────────────────────────────────────────────────────────────
|
|
app.use(cors({ origin: true, credentials: true }));
|
|
app.use(express.json({ limit: '50mb' }));
|
|
|
|
const PgSession = ConnectPgSimple(session);
|
|
|
|
app.use(
|
|
session({
|
|
store: new PgSession({
|
|
pool,
|
|
tableName: 'sessions',
|
|
pruneSessionInterval: 3600,
|
|
}),
|
|
secret: process.env.SESSION_SECRET || 'change-me-in-production',
|
|
resave: false,
|
|
saveUninitialized: false,
|
|
cookie: {
|
|
secure: process.env.NODE_ENV === 'production',
|
|
httpOnly: true,
|
|
maxAge: 1000 * 60 * 60 * 24,
|
|
},
|
|
})
|
|
);
|
|
|
|
// ── Health (no auth) ──────────────────────────────────────────────────────────
|
|
app.get('/health', (_req, res) => res.json({ status: 'ok' }));
|
|
|
|
// ── API Routes ────────────────────────────────────────────────────────────────
|
|
app.use('/api/v1/auth', authRouter);
|
|
app.use('/api/v1/assets', assetsRouter);
|
|
app.use('/api/v1/projects', projectsRouter);
|
|
app.use('/api/v1/bins', binsRouter);
|
|
app.use('/api/v1/jobs', jobsRouter);
|
|
app.use('/api/v1/capture', captureRouter);
|
|
app.use('/api/v1/upload', uploadRouter);
|
|
app.use('/api/v1/recorders', recordersRouter);
|
|
app.use('/api/v1/settings', settingsRouter);
|
|
app.use('/api/v1/ampp', amppRouter);
|
|
app.use('/api/v1/users', usersRouter);
|
|
app.use('/api/v1/groups', groupsRouter);
|
|
app.use('/api/v1/tokens', tokensRouter);
|
|
app.use('/api/v1/sequences', sequencesRouter);
|
|
app.use('/api/v1/system', systemRouter);
|
|
app.use('/api/v1/cluster', clusterRouter);
|
|
|
|
// ── Error handler ─────────────────────────────────────────────────────────────
|
|
app.use(errorHandler);
|
|
|
|
// ── Start ────────────────────────────────────────────────────────────────────
|
|
import { readdirSync, readFileSync } from 'node:fs';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { dirname, join } from 'node:path';
|
|
|
|
const __dirnameMig = dirname(fileURLToPath(import.meta.url));
|
|
async function runMigrations() {
|
|
const dir = join(__dirnameMig, 'db', 'migrations');
|
|
let files = [];
|
|
try { files = readdirSync(dir).filter(f => f.endsWith('.sql')).sort(); } catch { return; }
|
|
for (const f of files) {
|
|
const sql = readFileSync(join(dir, f), 'utf8');
|
|
try {
|
|
await pool.query(sql);
|
|
console.log('[migration] applied ' + f);
|
|
} catch (err) {
|
|
console.error('[migration] failed ' + f, err.message);
|
|
}
|
|
}
|
|
}
|
|
await runMigrations();
|
|
|
|
// Load S3 config from DB so any settings saved via the Settings page override env vars
|
|
await loadS3ConfigFromDb();
|
|
|
|
// ── Cluster self-heartbeat ────────────────────────────────────────────────────
|
|
function getLocalIp() {
|
|
// Prefer an explicit override — useful when running inside Docker where
|
|
// os.networkInterfaces() returns container bridge IPs, not the host LAN IP.
|
|
if (process.env.NODE_IP) return process.env.NODE_IP;
|
|
|
|
const ifaces = os.networkInterfaces();
|
|
for (const name of Object.keys(ifaces)) {
|
|
for (const iface of (ifaces[name] || [])) {
|
|
if (iface.family === 'IPv4' && !iface.internal) return iface.address;
|
|
}
|
|
}
|
|
return '127.0.0.1';
|
|
}
|
|
|
|
// Detect NVIDIA GPUs available to this container via nvidia-smi.
|
|
// Returns an array like [{ index: 0, name: 'Tesla P4', memory_mb: 7680 }, ...]
|
|
// or an empty array if nvidia-smi is unavailable or no GPUs found.
|
|
function detectGpus() {
|
|
return new Promise(resolve => {
|
|
exec(
|
|
'nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader,nounits',
|
|
{ timeout: 5000 },
|
|
(err, stdout) => {
|
|
if (err || !stdout.trim()) return resolve([]);
|
|
const gpus = stdout.trim().split('\n').map(line => {
|
|
const parts = line.split(',').map(s => s.trim());
|
|
return {
|
|
index: parseInt(parts[0], 10),
|
|
name: parts[1] || 'Unknown GPU',
|
|
memory_mb: parseInt(parts[2], 10) || 0,
|
|
};
|
|
}).filter(g => !isNaN(g.index));
|
|
resolve(gpus);
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
async function selfHeartbeat() {
|
|
const load = os.loadavg()[0];
|
|
const total = os.totalmem();
|
|
const used = total - os.freemem();
|
|
const gpus = await detectGpus();
|
|
|
|
const capabilities = { gpus, blackmagic: [] };
|
|
|
|
pool.query(
|
|
`INSERT INTO cluster_nodes
|
|
(hostname, ip_address, role, version, api_url,
|
|
cpu_usage, mem_used_mb, mem_total_mb, capabilities, last_seen)
|
|
VALUES ($1,$2,'primary',$3,$4,$5,$6,$7,$8,NOW())
|
|
ON CONFLICT (hostname) DO UPDATE SET
|
|
ip_address = EXCLUDED.ip_address,
|
|
cpu_usage = EXCLUDED.cpu_usage,
|
|
mem_used_mb = EXCLUDED.mem_used_mb,
|
|
mem_total_mb = EXCLUDED.mem_total_mb,
|
|
capabilities = EXCLUDED.capabilities,
|
|
last_seen = NOW()`,
|
|
[
|
|
process.env.NODE_HOSTNAME || os.hostname(),
|
|
getLocalIp(),
|
|
process.env.npm_package_version || null,
|
|
`http://${getLocalIp()}:${PORT}`,
|
|
parseFloat(load.toFixed(2)),
|
|
Math.round(used / 1024 / 1024),
|
|
Math.round(total / 1024 / 1024),
|
|
JSON.stringify(capabilities),
|
|
]
|
|
).catch(err => console.error('[cluster] heartbeat failed:', err.message));
|
|
}
|
|
|
|
setInterval(selfHeartbeat, 30_000);
|
|
selfHeartbeat();
|
|
|
|
app.listen(PORT, () => {
|
|
const authMode = process.env.AUTH_ENABLED === 'true' ? 'ENABLED' : 'DISABLED (set AUTH_ENABLED=true for production)';
|
|
console.log(`MAM API listening on port ${PORT}`);
|
|
console.log(`Authentication: ${authMode}`);
|
|
});
|