feat: stamp per-model CONTEXT_WINDOW into subprocess env
Uses getModelContextWindow() to override CONTEXT_WINDOW in the env passed to the Claude Code subprocess. Local/constrained models (claudecode, cc/qwen72b, etc.) now self-limit their prompt budget to their actual VRAM context ceiling instead of using the global 160k default.
This commit is contained in:
parent
69e478224c
commit
07b6f4eb87
1 changed files with 14 additions and 9 deletions
|
|
@ -17,7 +17,7 @@ import crypto from 'crypto';
|
|||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import { CLAUDE_MODELS } from '../shared/modelConstants.js';
|
||||
import { CLAUDE_MODELS, getModelContextWindow } from '../shared/modelConstants.js';
|
||||
import { resolveClaudeCodeExecutablePath } from './shared/claude-cli-path.js';
|
||||
import {
|
||||
createNotificationEvent,
|
||||
|
|
@ -205,7 +205,14 @@ function mapCliOptionsToSDK(options = {}) {
|
|||
// Map model (default to sonnet)
|
||||
// Valid models: sonnet, opus, haiku, opusplan, sonnet[1m]
|
||||
sdkOptions.model = options.model || CLAUDE_MODELS.DEFAULT;
|
||||
// Model logged at query start below
|
||||
|
||||
// Stamp per-model CONTEXT_WINDOW into the subprocess env so Claude Code
|
||||
// self-limits its prompt budget to the model's actual capacity.
|
||||
// Local/constrained models (e.g. claudecode, cc/qwen72b) declare a lower
|
||||
// ceiling in MODEL_CONTEXT_OVERRIDES; cloud models use the global default.
|
||||
const modelContextWindow = getModelContextWindow(sdkOptions.model);
|
||||
sdkOptions.env.CONTEXT_WINDOW = String(modelContextWindow);
|
||||
sdkOptions.env.VITE_CONTEXT_WINDOW = String(modelContextWindow);
|
||||
|
||||
// Map system prompt configuration
|
||||
sdkOptions.systemPrompt = {
|
||||
|
|
@ -287,9 +294,10 @@ function transformMessage(sdkMessage) {
|
|||
/**
|
||||
* Extracts token usage from SDK result messages
|
||||
* @param {Object} resultMessage - SDK result message
|
||||
* @param {string} modelValue - The model value used for this session
|
||||
* @returns {Object|null} Token budget object or null
|
||||
*/
|
||||
function extractTokenBudget(resultMessage) {
|
||||
function extractTokenBudget(resultMessage, modelValue) {
|
||||
if (resultMessage.type !== 'result' || !resultMessage.modelUsage) {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -312,11 +320,8 @@ function extractTokenBudget(resultMessage) {
|
|||
// Total used = input + output + cache tokens
|
||||
const totalUsed = inputTokens + outputTokens + cacheReadTokens + cacheCreationTokens;
|
||||
|
||||
// Use configured context window budget from environment (default 160000)
|
||||
// This is the user's budget limit, not the model's context window
|
||||
const contextWindow = parseInt(process.env.CONTEXT_WINDOW) || 160000;
|
||||
|
||||
// Token calc logged via token-budget WS event
|
||||
// Use per-model context window so the budget meter reflects the model's actual ceiling.
|
||||
const contextWindow = getModelContextWindow(modelValue);
|
||||
|
||||
return {
|
||||
used: totalUsed,
|
||||
|
|
@ -682,7 +687,7 @@ async function queryClaudeSDK(command, options = {}, ws) {
|
|||
if (models.length > 0) {
|
||||
// Model info available in result message
|
||||
}
|
||||
const tokenBudgetData = extractTokenBudget(message);
|
||||
const tokenBudgetData = extractTokenBudget(message, sdkOptions.model);
|
||||
if (tokenBudgetData) {
|
||||
ws.send(createNormalizedMessage({ kind: 'status', text: 'token_budget', tokenBudget: tokenBudgetData, sessionId: capturedSessionId || sessionId || null, provider: 'claude' }));
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue