#!/bin/bash
set -e

MODEL="${OLLAMA_MODEL:-qwen2.5-coder:32b}"

echo "==> Starting Ollama server..."
ollama serve &
OLLAMA_PID=$!

# Wait for Ollama to be ready
echo "==> Waiting for Ollama to be ready..."
until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do
  sleep 1
done
echo "==> Ollama is ready."

# Pull the model if not already present
if ! ollama list | grep -q "${MODEL}"; then
  echo "==> Pulling model: ${MODEL} (this may take a while for 32b)..."
  ollama pull "${MODEL}"
  echo "==> Model pulled successfully."
else
  echo "==> Model ${MODEL} already present, skipping pull."
fi

echo "==> Ollama is running with model: ${MODEL}"
echo "==> API available at http://0.0.0.0:11434"

# Keep the server process in the foreground
wait $OLLAMA_PID