version: '3.8' services: llm-app: build: context: . dockerfile: Dockerfile ports: - "7860:7860" environment: - MODEL_REPO=lmstudio-community/gemma-3n-E4B-it-text-GGUF - MODEL_FILENAME=gemma-3n-E4B-it-Q8_0.gguf - N_CTX=4096 - N_GPU_LAYERS=0 - N_THREADS=4 - MAX_NEW_TOKENS=256 - TEMPERATURE=0.1 volumes: # Optional: Mount models directory to persist downloaded models - ./models:/app/models restart: unless-stopped mem_limit: 8g # Uncomment below for GPU support # deploy: # resources: # reservations: # devices: # - driver: nvidia # count: 1 # capabilities: [gpu]