commit 23173fac3fc53cad8340d9d7ce04ae9bf89d16d7 Author: Zac Gaetano Date: Thu Apr 2 12:57:40 2026 -0400 Initial commit: Ollama + Claude Code Docker setup with qwen2.5-coder:32b diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1f78456 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +# Dockerfile: Ollama with NVIDIA GPU support +# Base image with CUDA support for NVIDIA GPU acceleration +FROM nvidia/cuda:12.3.2-base-ubuntu22.04 + +# Avoid interactive prompts during package install +ENV DEBIAN_FRONTEND=noninteractive + +# Install dependencies +RUN apt-get update && apt-get install -y \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install Ollama +RUN curl -fsSL https://ollama.com/install.sh | sh + +# Expose Ollama API port +EXPOSE 11434 + +# Set Ollama host to listen on all interfaces +ENV OLLAMA_HOST=0.0.0.0 + +# Copy entrypoint script +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..645a3b7 --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +# Ollama + Claude Code — Docker Setup + +Run **qwen2.5-coder:32b** locally via Ollama, with Claude Code wired up to use it as its backend. + +## Prerequisites + +- Docker & Docker Compose v2 +- NVIDIA Container Toolkit installed on the host +- NVIDIA GPU with 32 GB+ VRAM (or large system RAM for CPU offload) + +## Quick Start + +```bash +# 1. Clone this repository +git clone https://forge.wilddragon.net/zgaetano/ollama-claude-code.git +cd ollama-claude-code + +# 2. Build and start both services +docker compose up -d --build + +# 3. Watch Ollama pull the model on first run +docker compose logs -f ollama + +# 4. Shell into the Claude Code container +docker compose exec claude-code bash + +# 5. Start Claude Code +claude +``` + +## Environment Variables + +### ollama service +| Variable | Default | Description | +|---|---|---| +| OLLAMA_HOST | 0.0.0.0 | Interface Ollama listens on | +| OLLAMA_KEEP_ALIVE | 24h | How long to keep the model loaded | +| OLLAMA_MODEL | qwen2.5-coder:32b | Model to pull on startup | + +### claude-code service +| Variable | Default | Description | +|---|---|---| +| ANTHROPIC_BASE_URL | http://ollama:11434/v1 | Points Claude Code at local Ollama | +| ANTHROPIC_API_KEY | ollama | Dummy key (Ollama does not verify it) | +| CLAUDE_CODE_DEFAULT_MODEL | qwen2.5-coder:32b | Model Claude Code will use | + +## Volumes +- `ollama_data` - persists downloaded models across restarts +- `workspace` - shared workspace inside the claude-code container + +## Stopping +```bash +docker compose down +# To also remove model data: +docker compose down -v +``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ae930ea --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,49 @@ +version: "3.8" + +services: + ollama: + build: + context: . + dockerfile: Dockerfile + container_name: ollama + restart: unless-stopped + ports: + - "11434:11434" + volumes: + - ollama_data:/root/.ollama + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + environment: + - OLLAMA_HOST=0.0.0.0 + - OLLAMA_KEEP_ALIVE=24h + + claude-code: + image: node:20-slim + container_name: claude-code + restart: unless-stopped + depends_on: + - ollama + environment: + - ANTHROPIC_BASE_URL=http://ollama:11434/v1 + - ANTHROPIC_API_KEY=ollama + - CLAUDE_CODE_DEFAULT_MODEL=qwen2.5-coder:32b + volumes: + - workspace:/workspace + working_dir: /workspace + command: > + sh -c "npm install -g @anthropic-ai/claude-code && + echo 'Claude Code installed. Run: claude' && + tail -f /dev/null" + stdin_open: true + tty: true + +volumes: + ollama_data: + driver: local + workspace: + driver: local diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100755 index 0000000..52d15d4 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +MODEL="${OLLAMA_MODEL:-qwen2.5-coder:32b}" + +echo "==> Starting Ollama server..." +ollama serve & +OLLAMA_PID=$! + +# Wait for Ollama to be ready +echo "==> Waiting for Ollama to be ready..." +until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do + sleep 1 +done +echo "==> Ollama is ready." + +# Pull the model if not already present +if ! ollama list | grep -q "${MODEL}"; then + echo "==> Pulling model: ${MODEL} (this may take a while for 32b)..." + ollama pull "${MODEL}" + echo "==> Model pulled successfully." +else + echo "==> Model ${MODEL} already present, skipping pull." +fi + +echo "==> Ollama is running with model: ${MODEL}" +echo "==> API available at http://0.0.0.0:11434" + +# Keep the server process in the foreground +wait $OLLAMA_PID