Spaces:

Duplicated from kairusama/gemma-3-1b-it-qat

kairusama
/

gemma-3-4b-it-qat

Running

App Files Files Community

gemma-3-4b-it-qat / Dockerfile

kairusama's picture

mount secret

32c538e verified 3 months ago

history blame contribute delete

670 Bytes

	FROM ghcr.io/ggml-org/llama.cpp:full

	RUN apt update && apt install wget -y

	# Install necessary dependencies
	RUN pip install -U "huggingface_hub[cli]"

	# Log in to Hugging Face using the CLI
	RUN --mount=type=secret,id=HF_TOKEN \
	huggingface-cli login --token $(cat /run/secrets/HF_TOKEN)

	# Download the gated model
	RUN huggingface-cli download google/gemma-3-4b-it-qat-q4_0-gguf gemma-3-4b-it-q4_0.gguf --local-dir /
	RUN huggingface-cli download google/gemma-3-4b-it-qat-q4_0-gguf mmproj-model-f16-4B.gguf --local-dir /

	CMD ["--server", "-m", "/gemma-3-4b-it-q4_0.gguf", "--mmproj", "/mmproj-model-f16-4B.gguf", "--port", "7860", "--host", "0.0.0.0", "-n", "512"]