diff --git a/server/claude-sdk.js b/server/claude-sdk.js index e92e4f0..1d25b69 100644 --- a/server/claude-sdk.js +++ b/server/claude-sdk.js @@ -17,7 +17,7 @@ import crypto from 'crypto'; import { promises as fs } from 'fs'; import path from 'path'; import os from 'os'; -import { CLAUDE_MODELS } from '../shared/modelConstants.js'; +import { CLAUDE_MODELS, getModelContextWindow } from '../shared/modelConstants.js'; import { resolveClaudeCodeExecutablePath } from './shared/claude-cli-path.js'; import { createNotificationEvent, @@ -205,7 +205,14 @@ function mapCliOptionsToSDK(options = {}) { // Map model (default to sonnet) // Valid models: sonnet, opus, haiku, opusplan, sonnet[1m] sdkOptions.model = options.model || CLAUDE_MODELS.DEFAULT; - // Model logged at query start below + + // Stamp per-model CONTEXT_WINDOW into the subprocess env so Claude Code + // self-limits its prompt budget to the model's actual capacity. + // Local/constrained models (e.g. claudecode, cc/qwen72b) declare a lower + // ceiling in MODEL_CONTEXT_OVERRIDES; cloud models use the global default. + const modelContextWindow = getModelContextWindow(sdkOptions.model); + sdkOptions.env.CONTEXT_WINDOW = String(modelContextWindow); + sdkOptions.env.VITE_CONTEXT_WINDOW = String(modelContextWindow); // Map system prompt configuration sdkOptions.systemPrompt = { @@ -287,9 +294,10 @@ function transformMessage(sdkMessage) { /** * Extracts token usage from SDK result messages * @param {Object} resultMessage - SDK result message + * @param {string} modelValue - The model value used for this session * @returns {Object|null} Token budget object or null */ -function extractTokenBudget(resultMessage) { +function extractTokenBudget(resultMessage, modelValue) { if (resultMessage.type !== 'result' || !resultMessage.modelUsage) { return null; } @@ -312,11 +320,8 @@ function extractTokenBudget(resultMessage) { // Total used = input + output + cache tokens const totalUsed = inputTokens + outputTokens + cacheReadTokens + cacheCreationTokens; - // Use configured context window budget from environment (default 160000) - // This is the user's budget limit, not the model's context window - const contextWindow = parseInt(process.env.CONTEXT_WINDOW) || 160000; - - // Token calc logged via token-budget WS event + // Use per-model context window so the budget meter reflects the model's actual ceiling. + const contextWindow = getModelContextWindow(modelValue); return { used: totalUsed, @@ -682,7 +687,7 @@ async function queryClaudeSDK(command, options = {}, ws) { if (models.length > 0) { // Model info available in result message } - const tokenBudgetData = extractTokenBudget(message); + const tokenBudgetData = extractTokenBudget(message, sdkOptions.model); if (tokenBudgetData) { ws.send(createNormalizedMessage({ kind: 'status', text: 'token_budget', tokenBudget: tokenBudgetData, sessionId: capturedSessionId || sessionId || null, provider: 'claude' })); }