Compare commits
7 commits
feat/sessi
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 7781719179 | |||
| df6345c30a | |||
| e7514c08cc | |||
| 7ed22b9077 | |||
| 07b6f4eb87 | |||
| 69e478224c | |||
| 25ca157f9d |
7 changed files with 1595 additions and 99 deletions
|
|
@ -17,7 +17,7 @@ import crypto from 'crypto';
|
|||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import { CLAUDE_MODELS } from '../shared/modelConstants.js';
|
||||
import { CLAUDE_MODELS, getModelContextWindow } from '../shared/modelConstants.js';
|
||||
import { resolveClaudeCodeExecutablePath } from './shared/claude-cli-path.js';
|
||||
import {
|
||||
createNotificationEvent,
|
||||
|
|
@ -205,7 +205,14 @@ function mapCliOptionsToSDK(options = {}) {
|
|||
// Map model (default to sonnet)
|
||||
// Valid models: sonnet, opus, haiku, opusplan, sonnet[1m]
|
||||
sdkOptions.model = options.model || CLAUDE_MODELS.DEFAULT;
|
||||
// Model logged at query start below
|
||||
|
||||
// Stamp per-model CONTEXT_WINDOW into the subprocess env so Claude Code
|
||||
// self-limits its prompt budget to the model's actual capacity.
|
||||
// Local/constrained models (e.g. claudecode, cc/qwen72b) declare a lower
|
||||
// ceiling in MODEL_CONTEXT_OVERRIDES; cloud models use the global default.
|
||||
const modelContextWindow = getModelContextWindow(sdkOptions.model);
|
||||
sdkOptions.env.CONTEXT_WINDOW = String(modelContextWindow);
|
||||
sdkOptions.env.VITE_CONTEXT_WINDOW = String(modelContextWindow);
|
||||
|
||||
// Map system prompt configuration
|
||||
sdkOptions.systemPrompt = {
|
||||
|
|
@ -287,9 +294,10 @@ function transformMessage(sdkMessage) {
|
|||
/**
|
||||
* Extracts token usage from SDK result messages
|
||||
* @param {Object} resultMessage - SDK result message
|
||||
* @param {string} modelValue - The model value used for this session
|
||||
* @returns {Object|null} Token budget object or null
|
||||
*/
|
||||
function extractTokenBudget(resultMessage) {
|
||||
function extractTokenBudget(resultMessage, modelValue) {
|
||||
if (resultMessage.type !== 'result' || !resultMessage.modelUsage) {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -312,11 +320,8 @@ function extractTokenBudget(resultMessage) {
|
|||
// Total used = input + output + cache tokens
|
||||
const totalUsed = inputTokens + outputTokens + cacheReadTokens + cacheCreationTokens;
|
||||
|
||||
// Use configured context window budget from environment (default 160000)
|
||||
// This is the user's budget limit, not the model's context window
|
||||
const contextWindow = parseInt(process.env.CONTEXT_WINDOW) || 160000;
|
||||
|
||||
// Token calc logged via token-budget WS event
|
||||
// Use per-model context window so the budget meter reflects the model's actual ceiling.
|
||||
const contextWindow = getModelContextWindow(modelValue);
|
||||
|
||||
return {
|
||||
used: totalUsed,
|
||||
|
|
@ -682,7 +687,7 @@ async function queryClaudeSDK(command, options = {}, ws) {
|
|||
if (models.length > 0) {
|
||||
// Model info available in result message
|
||||
}
|
||||
const tokenBudgetData = extractTokenBudget(message);
|
||||
const tokenBudgetData = extractTokenBudget(message, sdkOptions.model);
|
||||
if (tokenBudgetData) {
|
||||
ws.send(createNormalizedMessage({ kind: 'status', text: 'token_budget', tokenBudget: tokenBudgetData, sessionId: capturedSessionId || sessionId || null, provider: 'claude' }));
|
||||
}
|
||||
|
|
|
|||
1429
server/index.js
1429
server/index.js
File diff suppressed because it is too large
Load diff
|
|
@ -17,8 +17,8 @@ let cacheExpiry = 0;
|
|||
|
||||
/**
|
||||
* Maps a raw /v1/models entry to the { value, label } shape used by the UI.
|
||||
* Filters to models that are relevant for Claude routing (id contains 'claude'
|
||||
* or starts with 'cc/').
|
||||
* Accepts all models returned by the endpoint — 9router surfaces only what it
|
||||
* actually routes, so no client-side filtering is needed.
|
||||
*
|
||||
* @param {Object} entry - Raw model object from /v1/models
|
||||
* @returns {{ value: string, label: string } | null}
|
||||
|
|
@ -27,14 +27,8 @@ function mapModelEntry(entry) {
|
|||
const id = typeof entry?.id === 'string' ? entry.id.trim() : null;
|
||||
if (!id) return null;
|
||||
|
||||
// Only surface Claude-family and cc/* (9router) models for the Claude provider.
|
||||
// Extend this filter if you want to surface all models.
|
||||
const isClaude = id.toLowerCase().includes('claude') || id.startsWith('cc/');
|
||||
if (!isClaude) return null;
|
||||
|
||||
// Build a human-readable label from the id.
|
||||
// e.g. "cc/claude-sonnet-4-6" → "claude-sonnet-4-6 (9router)"
|
||||
// "claude-3-5-sonnet-20241022" → "claude-3-5-sonnet-20241022"
|
||||
// Build a human-readable label: prefer server-supplied name, then id.
|
||||
// cc/* entries strip the prefix and add a "(9router)" suffix for clarity.
|
||||
let label = entry.name ?? id;
|
||||
if (id.startsWith('cc/')) {
|
||||
label = `${id.slice(3)} (9router)`;
|
||||
|
|
@ -80,7 +74,7 @@ async function fetchModelsFromApi() {
|
|||
const mapped = raw.map(mapModelEntry).filter(Boolean);
|
||||
if (mapped.length === 0) return null;
|
||||
|
||||
console.log(`[model-discovery] Loaded ${mapped.length} Claude models from ${url}`);
|
||||
console.log(`[model-discovery] Loaded ${mapped.length} models from ${url}`);
|
||||
return mapped;
|
||||
} catch (err) {
|
||||
console.warn(`[model-discovery] Failed to fetch ${url}:`, err?.message ?? err);
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ export const CLAUDE_MODELS = {
|
|||
{ value: "ollama/glm-4.7-flash", label: "GLM-4.7 Flash (ollama)" },
|
||||
{ value: "ollama/qwen3.5", label: "Qwen 3.5 (ollama)" },
|
||||
{ value: "ollama-local/r1-14b-32k", label: "R1 14B 32K (local)" },
|
||||
{ value: "claudecode", label: "claudecode (local)" },
|
||||
{ value: "cc/qwen72b", label: "qwen72b (local)" },
|
||||
{ value: "gemini/gemini-3.1-pro-preview", label: "Gemini 3.1 Pro Preview" },
|
||||
{ value: "gemini/gemini-3.1-flash-lite-preview", label: "Gemini 3.1 Flash Lite" },
|
||||
{ value: "gemini/gemini-3-flash-preview", label: "Gemini 3 Flash Preview" },
|
||||
|
|
@ -104,6 +106,37 @@ export const CLAUDE_MODELS = {
|
|||
DEFAULT: "cc/claude-sonnet-4-6",
|
||||
};
|
||||
|
||||
/**
|
||||
* Per-model context window overrides (in tokens).
|
||||
*
|
||||
* Models listed here will have CONTEXT_WINDOW stamped into the Claude Code
|
||||
* subprocess env so the agent self-limits its prompt budget accordingly.
|
||||
* Models NOT listed here fall back to the global CONTEXT_WINDOW env var (default 160000).
|
||||
*
|
||||
* Add an entry here whenever you add a local/constrained model whose VRAM or
|
||||
* architecture limits the usable context below the global default.
|
||||
*/
|
||||
export const MODEL_CONTEXT_OVERRIDES = {
|
||||
'claudecode': 60000,
|
||||
'cc/qwen72b': 60000,
|
||||
'ollama-local/r1-14b-32k': 32000,
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the effective context window size for a given model value.
|
||||
* Checks MODEL_CONTEXT_OVERRIDES first, then falls back to the
|
||||
* global CONTEXT_WINDOW env var, then 160000.
|
||||
*
|
||||
* @param {string} modelValue - The model value string (e.g. "claudecode", "cc/claude-sonnet-4-6")
|
||||
* @returns {number} Context window in tokens
|
||||
*/
|
||||
export function getModelContextWindow(modelValue) {
|
||||
if (modelValue && MODEL_CONTEXT_OVERRIDES[modelValue] !== undefined) {
|
||||
return MODEL_CONTEXT_OVERRIDES[modelValue];
|
||||
}
|
||||
return parseInt(process.env.CONTEXT_WINDOW, 10) || 160000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cursor Models
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -90,18 +90,14 @@ export function useChatProviderState({ selectedSession }: UseChatProviderStateAr
|
|||
load('gemini', setGeminiModelState, GEMINI_MODELS.DEFAULT);
|
||||
}, [selectedSession?.id]);
|
||||
|
||||
// Fetch live Claude model list and validate the current claude model
|
||||
useEffect(() => {
|
||||
authenticatedFetch('/api/models')
|
||||
.then((res) => {
|
||||
// Fetch and apply live Claude model list, validating the current selection.
|
||||
const fetchAndSetClaudeModels = useCallback(async () => {
|
||||
const res = await authenticatedFetch('/api/models');
|
||||
if (!res.ok) return;
|
||||
return res.json();
|
||||
})
|
||||
.then((data) => {
|
||||
const data = await res.json();
|
||||
if (!Array.isArray(data?.claude) || data.claude.length === 0) return;
|
||||
const options: ModelOption[] = data.claude;
|
||||
setClaudeModelOptions(options);
|
||||
|
||||
setClaudeModelState((current) => {
|
||||
const valid = options.some((o) => o.value === current);
|
||||
if (valid) return current;
|
||||
|
|
@ -112,8 +108,17 @@ export function useChatProviderState({ selectedSession }: UseChatProviderStateAr
|
|||
}
|
||||
return fallback;
|
||||
});
|
||||
})
|
||||
.catch(() => {
|
||||
}, [selectedSession?.id]);
|
||||
|
||||
// Bust the server-side cache then re-fetch the model list.
|
||||
const refreshClaudeModels = useCallback(async () => {
|
||||
await authenticatedFetch('/api/models/refresh', { method: 'POST' });
|
||||
await fetchAndSetClaudeModels();
|
||||
}, [fetchAndSetClaudeModels]);
|
||||
|
||||
// Fetch live Claude model list on mount
|
||||
useEffect(() => {
|
||||
fetchAndSetClaudeModels().catch(() => {
|
||||
// Static fallback already in place
|
||||
});
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
|
|
@ -209,5 +214,6 @@ export function useChatProviderState({ selectedSession }: UseChatProviderStateAr
|
|||
pendingPermissionRequests,
|
||||
setPendingPermissionRequests,
|
||||
cyclePermissionMode,
|
||||
refreshClaudeModels,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ function ChatInterface({
|
|||
pendingPermissionRequests,
|
||||
setPendingPermissionRequests,
|
||||
cyclePermissionMode,
|
||||
refreshClaudeModels,
|
||||
} = useChatProviderState({
|
||||
selectedSession,
|
||||
});
|
||||
|
|
@ -319,6 +320,7 @@ function ChatInterface({
|
|||
codexModelOptions={codexModelOptions}
|
||||
geminiModelOptions={geminiModelOptions}
|
||||
cursorModelOptions={cursorModelOptions}
|
||||
onRefreshModels={refreshClaudeModels}
|
||||
/>
|
||||
|
||||
<ChatMessagesPane
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { useState, useRef, useEffect } from 'react';
|
||||
import { ChevronDown, Globe } from 'lucide-react';
|
||||
import { ChevronDown, Globe, RefreshCw } from 'lucide-react';
|
||||
import type { LLMProvider } from '../../../../types/app';
|
||||
import SessionProviderLogo from '../../../llm-logo-provider/SessionProviderLogo';
|
||||
|
||||
|
|
@ -19,6 +19,7 @@ interface ModelSelectorBarProps {
|
|||
codexModelOptions: ModelOption[];
|
||||
geminiModelOptions: ModelOption[];
|
||||
cursorModelOptions: ModelOption[];
|
||||
onRefreshModels?: () => Promise<void>;
|
||||
}
|
||||
|
||||
function useCurrentModel(
|
||||
|
|
@ -57,9 +58,11 @@ export default function ModelSelectorBar({
|
|||
codexModelOptions,
|
||||
geminiModelOptions,
|
||||
cursorModelOptions,
|
||||
onRefreshModels,
|
||||
}: ModelSelectorBarProps) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const [search, setSearch] = useState('');
|
||||
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||
const dropdownRef = useRef<HTMLDivElement>(null);
|
||||
const searchRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
|
|
@ -101,6 +104,16 @@ export default function ModelSelectorBar({
|
|||
setSearch('');
|
||||
};
|
||||
|
||||
const handleRefresh = async () => {
|
||||
if (!onRefreshModels || isRefreshing) return;
|
||||
setIsRefreshing(true);
|
||||
try {
|
||||
await onRefreshModels();
|
||||
} finally {
|
||||
setIsRefreshing(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="relative z-40 flex-shrink-0 flex items-center justify-between px-4 py-1.5 border-b border-border/40 bg-muted/20 backdrop-blur-sm">
|
||||
{/* Left: provider label */}
|
||||
|
|
@ -109,7 +122,20 @@ export default function ModelSelectorBar({
|
|||
<span className="capitalize">{provider}</span>
|
||||
</div>
|
||||
|
||||
{/* Right: model picker */}
|
||||
{/* Right: model picker + refresh */}
|
||||
<div className="flex items-center gap-1.5">
|
||||
{provider === 'claude' && onRefreshModels && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleRefresh}
|
||||
disabled={isRefreshing}
|
||||
title="Refresh model list from 9router"
|
||||
className="rounded-md border border-border/50 bg-background/60 p-1 text-muted-foreground hover:bg-accent hover:text-foreground hover:border-border transition-all disabled:opacity-50"
|
||||
>
|
||||
<RefreshCw className={`h-3 w-3 ${isRefreshing ? 'animate-spin' : ''}`} />
|
||||
</button>
|
||||
)}
|
||||
|
||||
<div className="relative" ref={dropdownRef}>
|
||||
<button
|
||||
type="button"
|
||||
|
|
@ -164,5 +190,6 @@ export default function ModelSelectorBar({
|
|||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue