Replace the Ollama service with a custom ROCm image combining ghcr.io/ggml-org/llama.cpp:server-rocm and llama-swap v199. Main motivations: - Unblock qwen35 HF GGUFs (qwen35 architecture not supported in Ollama 0.20.4 for HF-imported models) - Stay current with llama.cpp upstream without waiting for Ollama releases Changes: - ollama/Dockerfile: build llama-swap on top of llama.cpp:server-rocm - ollama/llama-swap.yaml: define 4 models with full sampler config, GPU offload, and mmproj for the two multimodal HF fine-tunes - ollama/docker-compose.yml: replace Ollama image with local build; fix broken volume mount (was /ubuntu/.ollama, now explicit /models) - ollama/Caddyfile: update upstream port 11434→8080 (llama-swap default) - ai/docker-compose.yml: switch Open WebUI from OLLAMA_BASE_URL to OPENAI_API_BASE_URL pointing at llama-swap /v1 endpoint
86 lines
2.1 KiB
YAML
86 lines
2.1 KiB
YAML
services:
|
|
webui:
|
|
image: ghcr.io/open-webui/open-webui:main
|
|
restart: unless-stopped
|
|
hostname: openwebui
|
|
container_name: openwebui
|
|
volumes:
|
|
- "/srv/docker/ai/data/data:/app/backend/data" # Double data is intentional
|
|
- "/srv/docker/ai/data/.webui_secret_key:/app/backend/.webui_secret_key"
|
|
environment:
|
|
- OPENAI_API_BASE_URL=https://ollama.lan.poldebra.me/v1
|
|
- OPENAI_API_KEY=sk-no-key-required
|
|
- ENABLE_OLLAMA_API=false
|
|
networks:
|
|
internal:
|
|
ipv4_address: 172.24.0.5
|
|
logging:
|
|
driver: "json-file"
|
|
options:
|
|
mode: "non-blocking"
|
|
max-size: "10m"
|
|
max-file: "3"
|
|
|
|
tailscale:
|
|
hostname: ai
|
|
image: tailscale/tailscale:latest
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "tailscale status"]
|
|
interval: 1s
|
|
timeout: 5s
|
|
retries: 60
|
|
volumes:
|
|
- /srv/docker/ai/tailscale:/var/lib
|
|
- /lib/modules:/lib/modules:ro
|
|
devices:
|
|
- /dev/net/tun:/dev/net/tun
|
|
cap_add:
|
|
- net_admin
|
|
- sys_module
|
|
- net_raw
|
|
command: tailscaled
|
|
networks:
|
|
- internal
|
|
logging:
|
|
driver: "json-file"
|
|
options:
|
|
mode: "non-blocking"
|
|
max-size: "10m"
|
|
max-file: "3"
|
|
|
|
reverse_proxy:
|
|
image: caddybuilds/caddy-namecheap:2-alpine
|
|
restart: unless-stopped
|
|
network_mode: service:tailscale
|
|
volumes:
|
|
- ./Caddyfile:/etc/caddy/Caddyfile:ro
|
|
- /srv/docker/ai/caddy/config/:/config/caddy:rw
|
|
- /srv/docker/ai/caddy/data/:/data/caddy:rw
|
|
- /srv/docker/ai/caddy/share/:/usr/share/caddy:rw
|
|
env_file:
|
|
- caddy.env
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--spider", "-q", "https://ai.lan.poldebra.me"]
|
|
interval: 10s
|
|
timeout: 30s
|
|
retries: 5
|
|
start_period: 90s
|
|
depends_on:
|
|
webui:
|
|
condition: service_started
|
|
tailscale:
|
|
condition: service_healthy
|
|
logging:
|
|
driver: "json-file"
|
|
options:
|
|
mode: "non-blocking"
|
|
max-size: "10m"
|
|
max-file: "3"
|
|
|
|
networks:
|
|
internal:
|
|
ipam:
|
|
config:
|
|
- subnet: 172.24.0.0/24
|