Initial commit: Ollama + Claude Code Docker setup with qwen2.5-coder:32b

2026-04-02 12:57:40 -04:00 · 2026-04-02 12:57:40 -04:00 · 23173fac3f
commit 23173fac3f
4 changed files with 162 additions and 0 deletions
--- a/27
+++ b/27
@ -0,0 +1,27 @@
 # Dockerfile: Ollama with NVIDIA GPU support
 # Base image with CUDA support for NVIDIA GPU acceleration
 FROM nvidia/cuda:12.3.2-base-ubuntu22.04
 # Avoid interactive prompts during package install
 ENV DEBIAN_FRONTEND=noninteractive
 # Install dependencies
 RUN apt-get update && apt-get install -y \
    curl \
    ca-certificates \
    && rm -rf /var/lib/apt/lists/*
 # Install Ollama
 RUN curl -fsSL https://ollama.com/install.sh | sh
 # Expose Ollama API port
 EXPOSE 11434
 # Set Ollama host to listen on all interfaces
 ENV OLLAMA_HOST=0.0.0.0
 # Copy entrypoint script
 COPY entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
 ENTRYPOINT ["/entrypoint.sh"]
--- a/README.md
+++ b/README.md
@ -0,0 +1,56 @@
 # Ollama + Claude Code — Docker Setup
 Run **qwen2.5-coder:32b** locally via Ollama, with Claude Code wired up to use it as its backend.
 ## Prerequisites
 - Docker & Docker Compose v2
 - NVIDIA Container Toolkit installed on the host
 - NVIDIA GPU with 32 GB+ VRAM (or large system RAM for CPU offload)
 ## Quick Start
 ```bash
 # 1. Clone this repository
 git clone https://forge.wilddragon.net/zgaetano/ollama-claude-code.git
 cd ollama-claude-code
 # 2. Build and start both services
 docker compose up -d --build
 # 3. Watch Ollama pull the model on first run
 docker compose logs -f ollama
 # 4. Shell into the Claude Code container
 docker compose exec claude-code bash
 # 5. Start Claude Code
 claude
 ```
 ## Environment Variables
 ### ollama service
 | Variable | Default | Description |
 |---|---|---|
 | OLLAMA_HOST | 0.0.0.0 | Interface Ollama listens on |
 | OLLAMA_KEEP_ALIVE | 24h | How long to keep the model loaded |
 | OLLAMA_MODEL | qwen2.5-coder:32b | Model to pull on startup |
 ### claude-code service
 | Variable | Default | Description |
 |---|---|---|
 | ANTHROPIC_BASE_URL | http://ollama:11434/v1 | Points Claude Code at local Ollama |
 | ANTHROPIC_API_KEY | ollama | Dummy key (Ollama does not verify it) |
 | CLAUDE_CODE_DEFAULT_MODEL | qwen2.5-coder:32b | Model Claude Code will use |
 ## Volumes
 - `ollama_data` - persists downloaded models across restarts
 - `workspace` - shared workspace inside the claude-code container
 ## Stopping
 ```bash
 docker compose down
 # To also remove model data:
 docker compose down -v
 ```
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,49 @@
 version: "3.8"
 services:
  ollama:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: ollama
    restart: unless-stopped
    ports:
      - "11434:11434"
    volumes:
      - ollama_data:/root/.ollama
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    environment:
      - OLLAMA_HOST=0.0.0.0
      - OLLAMA_KEEP_ALIVE=24h
  claude-code:
    image: node:20-slim
    container_name: claude-code
    restart: unless-stopped
    depends_on:
      - ollama
    environment:
      - ANTHROPIC_BASE_URL=http://ollama:11434/v1
      - ANTHROPIC_API_KEY=ollama
      - CLAUDE_CODE_DEFAULT_MODEL=qwen2.5-coder:32b
    volumes:
      - workspace:/workspace
    working_dir: /workspace
    command: >
      sh -c "npm install -g @anthropic-ai/claude-code &&
             echo 'Claude Code installed. Run: claude' &&
             tail -f /dev/null"
    stdin_open: true
    tty: true
 volumes:
  ollama_data:
    driver: local
  workspace:
    driver: local
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -0,0 +1,30 @@
 #!/bin/bash
 set -e
 MODEL="${OLLAMA_MODEL:-qwen2.5-coder:32b}"
 echo "==> Starting Ollama server..."
 ollama serve &
 OLLAMA_PID=$!
 # Wait for Ollama to be ready
 echo "==> Waiting for Ollama to be ready..."
 until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do
  sleep 1
 done
 echo "==> Ollama is ready."
 # Pull the model if not already present
 if ! ollama list | grep -q "${MODEL}"; then
  echo "==> Pulling model: ${MODEL} (this may take a while for 32b)..."
  ollama pull "${MODEL}"
  echo "==> Model pulled successfully."
 else
  echo "==> Model ${MODEL} already present, skipping pull."
 fi
 echo "==> Ollama is running with model: ${MODEL}"
 echo "==> API available at http://0.0.0.0:11434"
 # Keep the server process in the foreground
 wait $OLLAMA_PID