feat: add per-model context window overrides
Adds MODEL_CONTEXT_OVERRIDES map and getModelContextWindow() helper so local/constrained models (claudecode, cc/qwen72b, ollama-local/*) declare their VRAM-limited context budget. claude-sdk.js will stamp this into the subprocess env so Claude Code self-limits instead of overflowing the model.
This commit is contained in:
parent
25ca157f9d
commit
69e478224c
1 changed files with 33 additions and 0 deletions
|
|
@ -48,6 +48,8 @@ export const CLAUDE_MODELS = {
|
||||||
{ value: "ollama/glm-4.7-flash", label: "GLM-4.7 Flash (ollama)" },
|
{ value: "ollama/glm-4.7-flash", label: "GLM-4.7 Flash (ollama)" },
|
||||||
{ value: "ollama/qwen3.5", label: "Qwen 3.5 (ollama)" },
|
{ value: "ollama/qwen3.5", label: "Qwen 3.5 (ollama)" },
|
||||||
{ value: "ollama-local/r1-14b-32k", label: "R1 14B 32K (local)" },
|
{ value: "ollama-local/r1-14b-32k", label: "R1 14B 32K (local)" },
|
||||||
|
{ value: "claudecode", label: "claudecode (local)" },
|
||||||
|
{ value: "cc/qwen72b", label: "qwen72b (local)" },
|
||||||
{ value: "gemini/gemini-3.1-pro-preview", label: "Gemini 3.1 Pro Preview" },
|
{ value: "gemini/gemini-3.1-pro-preview", label: "Gemini 3.1 Pro Preview" },
|
||||||
{ value: "gemini/gemini-3.1-flash-lite-preview", label: "Gemini 3.1 Flash Lite" },
|
{ value: "gemini/gemini-3.1-flash-lite-preview", label: "Gemini 3.1 Flash Lite" },
|
||||||
{ value: "gemini/gemini-3-flash-preview", label: "Gemini 3 Flash Preview" },
|
{ value: "gemini/gemini-3-flash-preview", label: "Gemini 3 Flash Preview" },
|
||||||
|
|
@ -104,6 +106,37 @@ export const CLAUDE_MODELS = {
|
||||||
DEFAULT: "cc/claude-sonnet-4-6",
|
DEFAULT: "cc/claude-sonnet-4-6",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-model context window overrides (in tokens).
|
||||||
|
*
|
||||||
|
* Models listed here will have CONTEXT_WINDOW stamped into the Claude Code
|
||||||
|
* subprocess env so the agent self-limits its prompt budget accordingly.
|
||||||
|
* Models NOT listed here fall back to the global CONTEXT_WINDOW env var (default 160000).
|
||||||
|
*
|
||||||
|
* Add an entry here whenever you add a local/constrained model whose VRAM or
|
||||||
|
* architecture limits the usable context below the global default.
|
||||||
|
*/
|
||||||
|
export const MODEL_CONTEXT_OVERRIDES = {
|
||||||
|
'claudecode': 60000,
|
||||||
|
'cc/qwen72b': 60000,
|
||||||
|
'ollama-local/r1-14b-32k': 32000,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the effective context window size for a given model value.
|
||||||
|
* Checks MODEL_CONTEXT_OVERRIDES first, then falls back to the
|
||||||
|
* global CONTEXT_WINDOW env var, then 160000.
|
||||||
|
*
|
||||||
|
* @param {string} modelValue - The model value string (e.g. "claudecode", "cc/claude-sonnet-4-6")
|
||||||
|
* @returns {number} Context window in tokens
|
||||||
|
*/
|
||||||
|
export function getModelContextWindow(modelValue) {
|
||||||
|
if (modelValue && MODEL_CONTEXT_OVERRIDES[modelValue] !== undefined) {
|
||||||
|
return MODEL_CONTEXT_OVERRIDES[modelValue];
|
||||||
|
}
|
||||||
|
return parseInt(process.env.CONTEXT_WINDOW, 10) || 160000;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cursor Models
|
* Cursor Models
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue