Spaces:

MANOJSEQ
/

newsglobe-backend

Sleeping

MANOJSEQ commited on Aug 19

Commit

ab6969d

verified ·

1 Parent(s): d62f608

Upload Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -5,12 +5,13 @@ ENV PYTHONUNBUFFERED=1 \
     PIP_NO_CACHE_DIR=1 \
     HF_HUB_DISABLE_TELEMETRY=1 \
     PORT=7860 \
-    NLTK_DATA=/app/nltk_data \
-    HF_HOME=/app/hf_cache \
-    TRANSFORMERS_CACHE=/app/hf_cache \
-    SENTENCE_TRANSFORMERS_HOME=/app/hf_cache
-# (optional) handy tools for healthchecks & logs
 RUN apt-get update && apt-get install -y --no-install-recommends curl git && \
     rm -rf /var/lib/apt/lists/*
@@ -26,30 +27,27 @@ RUN python -m pip install --upgrade pip && \
 # ---- App code ----
 COPY . .
-# ---- Warm caches into the image layer ----
-# 1) Cache sentence-transformers model (SBERT)
 RUN python - <<'PY'
 from sentence_transformers import SentenceTransformer
 SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-print("✅ SBERT model cached")
 PY
-# 2) Cache NLTK VADER lexicon into /app/nltk_data
 RUN python - <<'PY'
 import os, nltk
-os.makedirs("/app/nltk_data", exist_ok=True)
-nltk.download("vader_lexicon", download_dir="/app/nltk_data")
-print("✅ NLTK VADER cached")
 PY
-# 3) (Optional) Cache tldextract's PSL so first run is snappy
-RUN python - <<'PY'
-import tldextract
-tldextract.extract("example.com")
-print("✅ tldextract PSL cached")
-PY
 EXPOSE 7860
 # ---- Run ----
-CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}"]

     PIP_NO_CACHE_DIR=1 \
     HF_HUB_DISABLE_TELEMETRY=1 \
     PORT=7860 \
+    # ✅ Writable + persistent on HF Spaces:
+    HF_HOME=/data/hf_cache \
+    TRANSFORMERS_CACHE=/data/hf_cache \
+    SENTENCE_TRANSFORMERS_HOME=/data/hf_cache \
+    NLTK_DATA=/data/nltk_data
+# small tools
 RUN apt-get update && apt-get install -y --no-install-recommends curl git && \
     rm -rf /var/lib/apt/lists/*
 # ---- App code ----
 COPY . .
+# ✅ Make caches writable for the runtime user
+RUN mkdir -p /data/hf_cache /data/nltk_data && chmod -R 777 /data
+# (optional) pre-warm models into /data caches to speed first run
 RUN python - <<'PY'
 from sentence_transformers import SentenceTransformer
 SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+print("✅ SBERT cached")
 PY
 RUN python - <<'PY'
 import os, nltk
+os.makedirs(os.getenv("NLTK_DATA","/data/nltk_data"), exist_ok=True)
+nltk.download("vader_lexicon")
+print("✅ VADER cached")
 PY
+# ensure everything under /data is writable after warm
+RUN chmod -R 777 /data
 EXPOSE 7860
 # ---- Run ----
+CMD ["sh","-c","uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}"]