# Use an official Python runtime as the base image FROM python:3.8-slim # Set working directory WORKDIR /app # Install system dependencies for pdfplumber, pytesseract, and general compatibility RUN apt-get update && apt-get install -y \ tesseract-ocr \ libtesseract-dev \ poppler-utils \ && rm -rf /var/lib/apt/lists/* # Copy application code COPY . /app # Install Python dependencies, including sentencepiece for Pegasus RUN pip install --no-cache-dir \ flask \ flask-cors \ pdfplumber \ pillow \ pytesseract \ numpy \ torch \ transformers \ datasets \ scikit-learn \ gunicorn \ sentencepiece # Create uploads and cache directories with proper permissions RUN mkdir -p /app/uploads /app/cache && \ chmod -R 777 /app/uploads /app/cache # Set environment variable for Hugging Face cache (using HF_HOME as per latest transformers recommendation) ENV HF_HOME=/app/cache # Expose port (Hugging Face Spaces typically uses 7860, but we'll stick to 5000 and adjust in app.py if needed) EXPOSE 5000 # Run with Gunicorn CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]