Initial commit: Ollama + Claude Code Docker setup with qwen2.5-coder:32b

2026-04-02 12:57:40 -04:00 · 2026-04-02 12:57:40 -04:00 · 23173fac3f
commit 23173fac3f
4 changed files with 162 additions and 0 deletions
--- a/27
+++ b/27
@ -0,0 +1,27 @@
+# Dockerfile: Ollama with NVIDIA GPU support
+# Base image with CUDA support for NVIDIA GPU acceleration
+FROM nvidia/cuda:12.3.2-base-ubuntu22.04
+
+# Avoid interactive prompts during package install
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Ollama
+RUN curl -fsSL https://ollama.com/install.sh | sh
+
+# Expose Ollama API port
+EXPOSE 11434
+
+# Set Ollama host to listen on all interfaces
+ENV OLLAMA_HOST=0.0.0.0
+
+# Copy entrypoint script
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
--- a/README.md
+++ b/README.md
@ -0,0 +1,56 @@
+# Ollama + Claude Code — Docker Setup
+
+Run **qwen2.5-coder:32b** locally via Ollama, with Claude Code wired up to use it as its backend.
+
+## Prerequisites
+
+- Docker & Docker Compose v2
+- NVIDIA Container Toolkit installed on the host
+- NVIDIA GPU with 32 GB+ VRAM (or large system RAM for CPU offload)
+
+## Quick Start
+
+```bash
+# 1. Clone this repository
+git clone https://forge.wilddragon.net/zgaetano/ollama-claude-code.git
+cd ollama-claude-code
+
+# 2. Build and start both services
+docker compose up -d --build
+
+# 3. Watch Ollama pull the model on first run
+docker compose logs -f ollama
+
+# 4. Shell into the Claude Code container
+docker compose exec claude-code bash
+
+# 5. Start Claude Code
+claude
+```
+
+## Environment Variables
+
+### ollama service
+| Variable | Default | Description |
+|---|---|---|
+| OLLAMA_HOST | 0.0.0.0 | Interface Ollama listens on |
+| OLLAMA_KEEP_ALIVE | 24h | How long to keep the model loaded |
+| OLLAMA_MODEL | qwen2.5-coder:32b | Model to pull on startup |
+
+### claude-code service
+| Variable | Default | Description |
+|---|---|---|
+| ANTHROPIC_BASE_URL | http://ollama:11434/v1 | Points Claude Code at local Ollama |
+| ANTHROPIC_API_KEY | ollama | Dummy key (Ollama does not verify it) |
+| CLAUDE_CODE_DEFAULT_MODEL | qwen2.5-coder:32b | Model Claude Code will use |
+
+## Volumes
+- `ollama_data` - persists downloaded models across restarts
+- `workspace` - shared workspace inside the claude-code container
+
+## Stopping
+```bash
+docker compose down
+# To also remove model data:
+docker compose down -v
+```
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,49 @@
+version: "3.8"
+
+services:
+  ollama:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: ollama
+    restart: unless-stopped
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    environment:
+      - OLLAMA_HOST=0.0.0.0
+      - OLLAMA_KEEP_ALIVE=24h
+
+  claude-code:
+    image: node:20-slim
+    container_name: claude-code
+    restart: unless-stopped
+    depends_on:
+      - ollama
+    environment:
+      - ANTHROPIC_BASE_URL=http://ollama:11434/v1
+      - ANTHROPIC_API_KEY=ollama
+      - CLAUDE_CODE_DEFAULT_MODEL=qwen2.5-coder:32b
+    volumes:
+      - workspace:/workspace
+    working_dir: /workspace
+    command: >
+      sh -c "npm install -g @anthropic-ai/claude-code &&
+             echo 'Claude Code installed. Run: claude' &&
+             tail -f /dev/null"
+    stdin_open: true
+    tty: true
+
+volumes:
+  ollama_data:
+    driver: local
+  workspace:
+    driver: local
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+MODEL="${OLLAMA_MODEL:-qwen2.5-coder:32b}"
+
+echo "==> Starting Ollama server..."
+ollama serve &
+OLLAMA_PID=$!
+
+# Wait for Ollama to be ready
+echo "==> Waiting for Ollama to be ready..."
+until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do
+  sleep 1
+done
+echo "==> Ollama is ready."
+
+# Pull the model if not already present
+if ! ollama list | grep -q "${MODEL}"; then
+  echo "==> Pulling model: ${MODEL} (this may take a while for 32b)..."
+  ollama pull "${MODEL}"
+  echo "==> Model pulled successfully."
+else
+  echo "==> Model ${MODEL} already present, skipping pull."
+fi
+
+echo "==> Ollama is running with model: ${MODEL}"
+echo "==> API available at http://0.0.0.0:11434"
+
+# Keep the server process in the foreground
+wait $OLLAMA_PID