Compare commits
3 commits
feat/sessi
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 07b6f4eb87 | |||
| 69e478224c | |||
| 25ca157f9d |
3 changed files with 1477 additions and 10 deletions
|
|
@ -17,7 +17,7 @@ import crypto from 'crypto';
|
||||||
import { promises as fs } from 'fs';
|
import { promises as fs } from 'fs';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
import { CLAUDE_MODELS } from '../shared/modelConstants.js';
|
import { CLAUDE_MODELS, getModelContextWindow } from '../shared/modelConstants.js';
|
||||||
import { resolveClaudeCodeExecutablePath } from './shared/claude-cli-path.js';
|
import { resolveClaudeCodeExecutablePath } from './shared/claude-cli-path.js';
|
||||||
import {
|
import {
|
||||||
createNotificationEvent,
|
createNotificationEvent,
|
||||||
|
|
@ -205,7 +205,14 @@ function mapCliOptionsToSDK(options = {}) {
|
||||||
// Map model (default to sonnet)
|
// Map model (default to sonnet)
|
||||||
// Valid models: sonnet, opus, haiku, opusplan, sonnet[1m]
|
// Valid models: sonnet, opus, haiku, opusplan, sonnet[1m]
|
||||||
sdkOptions.model = options.model || CLAUDE_MODELS.DEFAULT;
|
sdkOptions.model = options.model || CLAUDE_MODELS.DEFAULT;
|
||||||
// Model logged at query start below
|
|
||||||
|
// Stamp per-model CONTEXT_WINDOW into the subprocess env so Claude Code
|
||||||
|
// self-limits its prompt budget to the model's actual capacity.
|
||||||
|
// Local/constrained models (e.g. claudecode, cc/qwen72b) declare a lower
|
||||||
|
// ceiling in MODEL_CONTEXT_OVERRIDES; cloud models use the global default.
|
||||||
|
const modelContextWindow = getModelContextWindow(sdkOptions.model);
|
||||||
|
sdkOptions.env.CONTEXT_WINDOW = String(modelContextWindow);
|
||||||
|
sdkOptions.env.VITE_CONTEXT_WINDOW = String(modelContextWindow);
|
||||||
|
|
||||||
// Map system prompt configuration
|
// Map system prompt configuration
|
||||||
sdkOptions.systemPrompt = {
|
sdkOptions.systemPrompt = {
|
||||||
|
|
@ -287,9 +294,10 @@ function transformMessage(sdkMessage) {
|
||||||
/**
|
/**
|
||||||
* Extracts token usage from SDK result messages
|
* Extracts token usage from SDK result messages
|
||||||
* @param {Object} resultMessage - SDK result message
|
* @param {Object} resultMessage - SDK result message
|
||||||
|
* @param {string} modelValue - The model value used for this session
|
||||||
* @returns {Object|null} Token budget object or null
|
* @returns {Object|null} Token budget object or null
|
||||||
*/
|
*/
|
||||||
function extractTokenBudget(resultMessage) {
|
function extractTokenBudget(resultMessage, modelValue) {
|
||||||
if (resultMessage.type !== 'result' || !resultMessage.modelUsage) {
|
if (resultMessage.type !== 'result' || !resultMessage.modelUsage) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -312,11 +320,8 @@ function extractTokenBudget(resultMessage) {
|
||||||
// Total used = input + output + cache tokens
|
// Total used = input + output + cache tokens
|
||||||
const totalUsed = inputTokens + outputTokens + cacheReadTokens + cacheCreationTokens;
|
const totalUsed = inputTokens + outputTokens + cacheReadTokens + cacheCreationTokens;
|
||||||
|
|
||||||
// Use configured context window budget from environment (default 160000)
|
// Use per-model context window so the budget meter reflects the model's actual ceiling.
|
||||||
// This is the user's budget limit, not the model's context window
|
const contextWindow = getModelContextWindow(modelValue);
|
||||||
const contextWindow = parseInt(process.env.CONTEXT_WINDOW) || 160000;
|
|
||||||
|
|
||||||
// Token calc logged via token-budget WS event
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
used: totalUsed,
|
used: totalUsed,
|
||||||
|
|
@ -682,7 +687,7 @@ async function queryClaudeSDK(command, options = {}, ws) {
|
||||||
if (models.length > 0) {
|
if (models.length > 0) {
|
||||||
// Model info available in result message
|
// Model info available in result message
|
||||||
}
|
}
|
||||||
const tokenBudgetData = extractTokenBudget(message);
|
const tokenBudgetData = extractTokenBudget(message, sdkOptions.model);
|
||||||
if (tokenBudgetData) {
|
if (tokenBudgetData) {
|
||||||
ws.send(createNormalizedMessage({ kind: 'status', text: 'token_budget', tokenBudget: tokenBudgetData, sessionId: capturedSessionId || sessionId || null, provider: 'claude' }));
|
ws.send(createNormalizedMessage({ kind: 'status', text: 'token_budget', tokenBudget: tokenBudgetData, sessionId: capturedSessionId || sessionId || null, provider: 'claude' }));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
1429
server/index.js
1429
server/index.js
File diff suppressed because it is too large
Load diff
|
|
@ -48,6 +48,8 @@ export const CLAUDE_MODELS = {
|
||||||
{ value: "ollama/glm-4.7-flash", label: "GLM-4.7 Flash (ollama)" },
|
{ value: "ollama/glm-4.7-flash", label: "GLM-4.7 Flash (ollama)" },
|
||||||
{ value: "ollama/qwen3.5", label: "Qwen 3.5 (ollama)" },
|
{ value: "ollama/qwen3.5", label: "Qwen 3.5 (ollama)" },
|
||||||
{ value: "ollama-local/r1-14b-32k", label: "R1 14B 32K (local)" },
|
{ value: "ollama-local/r1-14b-32k", label: "R1 14B 32K (local)" },
|
||||||
|
{ value: "claudecode", label: "claudecode (local)" },
|
||||||
|
{ value: "cc/qwen72b", label: "qwen72b (local)" },
|
||||||
{ value: "gemini/gemini-3.1-pro-preview", label: "Gemini 3.1 Pro Preview" },
|
{ value: "gemini/gemini-3.1-pro-preview", label: "Gemini 3.1 Pro Preview" },
|
||||||
{ value: "gemini/gemini-3.1-flash-lite-preview", label: "Gemini 3.1 Flash Lite" },
|
{ value: "gemini/gemini-3.1-flash-lite-preview", label: "Gemini 3.1 Flash Lite" },
|
||||||
{ value: "gemini/gemini-3-flash-preview", label: "Gemini 3 Flash Preview" },
|
{ value: "gemini/gemini-3-flash-preview", label: "Gemini 3 Flash Preview" },
|
||||||
|
|
@ -104,6 +106,37 @@ export const CLAUDE_MODELS = {
|
||||||
DEFAULT: "cc/claude-sonnet-4-6",
|
DEFAULT: "cc/claude-sonnet-4-6",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-model context window overrides (in tokens).
|
||||||
|
*
|
||||||
|
* Models listed here will have CONTEXT_WINDOW stamped into the Claude Code
|
||||||
|
* subprocess env so the agent self-limits its prompt budget accordingly.
|
||||||
|
* Models NOT listed here fall back to the global CONTEXT_WINDOW env var (default 160000).
|
||||||
|
*
|
||||||
|
* Add an entry here whenever you add a local/constrained model whose VRAM or
|
||||||
|
* architecture limits the usable context below the global default.
|
||||||
|
*/
|
||||||
|
export const MODEL_CONTEXT_OVERRIDES = {
|
||||||
|
'claudecode': 60000,
|
||||||
|
'cc/qwen72b': 60000,
|
||||||
|
'ollama-local/r1-14b-32k': 32000,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the effective context window size for a given model value.
|
||||||
|
* Checks MODEL_CONTEXT_OVERRIDES first, then falls back to the
|
||||||
|
* global CONTEXT_WINDOW env var, then 160000.
|
||||||
|
*
|
||||||
|
* @param {string} modelValue - The model value string (e.g. "claudecode", "cc/claude-sonnet-4-6")
|
||||||
|
* @returns {number} Context window in tokens
|
||||||
|
*/
|
||||||
|
export function getModelContextWindow(modelValue) {
|
||||||
|
if (modelValue && MODEL_CONTEXT_OVERRIDES[modelValue] !== undefined) {
|
||||||
|
return MODEL_CONTEXT_OVERRIDES[modelValue];
|
||||||
|
}
|
||||||
|
return parseInt(process.env.CONTEXT_WINDOW, 10) || 160000;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cursor Models
|
* Cursor Models
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue