version: '3.8' | |
services: | |
llm-app: | |
build: | |
context: . | |
dockerfile: Dockerfile | |
ports: | |
- "7860:7860" | |
environment: | |
- MODEL_REPO=lmstudio-community/gemma-3n-E4B-it-text-GGUF | |
- MODEL_FILENAME=gemma-3n-E4B-it-Q8_0.gguf | |
- N_CTX=4096 | |
- N_GPU_LAYERS=0 | |
- N_THREADS=4 | |
- MAX_NEW_TOKENS=256 | |
- TEMPERATURE=0.1 | |
volumes: | |
# Optional: Mount models directory to persist downloaded models | |
- ./models:/app/models | |
restart: unless-stopped | |
mem_limit: 8g | |
# Uncomment below for GPU support | |
# deploy: | |
# resources: | |
# reservations: | |
# devices: | |
# - driver: nvidia | |
# count: 1 | |
# capabilities: [gpu] | |