Spaces:

lyangas
/

free_llm_structure_output_docker

Sleeping

App Files Files Community

free_llm_structure_output_docker / Dockerfile

lyangas

init repo

b269c5d 28 days ago

raw

history blame

2.5 kB

	# Use Python 3.10 base image optimized for HuggingFace Spaces
	FROM python:3.10-slim

	# Set working directory
	WORKDIR /app

	# Install system dependencies required for llama-cpp-python and git-lfs
	RUN apt-get update && apt-get install -y \
	build-essential \
	cmake \
	wget \
	curl \
	git \
	git-lfs \
	pkg-config \
	libopenblas-dev \
	libssl-dev \
	musl-dev \
	&& rm -rf /var/lib/apt/lists/*

	# Initialize git-lfs
	RUN git lfs install

	# Set environment variables for optimal Docker performance
	ENV PYTHONUNBUFFERED=1
	ENV PYTHONDONTWRITEBYTECODE=1
	ENV PIP_NO_CACHE_DIR=1
	ENV CMAKE_ARGS="-DLLAMA_OPENBLAS=on"
	ENV FORCE_CMAKE=1
	ENV DOCKER_CONTAINER=true

	# Create models directory
	RUN mkdir -p /app/models

	# Create symbolic link for musl libc compatibility (required for llama-cpp-python)
	RUN ln -sf /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 \|\| \
	ln -sf /usr/lib/x86_64-linux-gnu/libc.so.6 /lib/libc.musl-x86_64.so.1

	# Copy requirements first for better Docker layer caching
	COPY requirements.txt .

	# Install Python dependencies
	RUN pip install --no-cache-dir -r requirements.txt

	# Copy configuration to get model info
	COPY config.py .

	# Pre-download the model during build
	RUN python -c "import os; from huggingface_hub import hf_hub_download; from config import Config; os.makedirs('/app/models', exist_ok=True); print(f'Downloading model {Config.MODEL_REPO}/{Config.MODEL_FILENAME}...'); p=hf_hub_download(repo_id=Config.MODEL_REPO, filename=Config.MODEL_FILENAME, local_dir='/app/models', token=os.getenv('HUGGINGFACE_TOKEN') or None); print(f'Model downloaded to: {p}'); import os; s=os.path.getsize(p) if os.path.exists(p) else (_ for _ in ()).throw(FileNotFoundError(f'Model file not found: {p}')); print(f'Model file size: {s/(1024*3):.2f} GB'); (s>10241024) or (_ for _ in ()).throw(ValueError(f'Downloaded model file seems too small: {s} bytes')); print('Model download verification successful')"

	# Verify model file exists after build
	RUN ls -la /app/models/ && \
	[ -f "/app/models/gemma-3n-E4B-it-Q8_0.gguf" ] \|\| (echo "Model file not found!" && exit 1)

	# Copy application files
	COPY . .

	# Make entrypoint script executable
	RUN chmod +x entrypoint.sh

	# Create a non-root user for security
	RUN useradd -m -u 1000 user && chown -R user:user /app
	USER user

	# Expose the port that Gradio will run on
	EXPOSE 7860

	# Set entrypoint and default command
	ENTRYPOINT ["./entrypoint.sh"]
	CMD ["python", "main.py", "--mode", "gradio"]