Spaces:

hariharan220
/

med-summary-v2

Runtime error

App Files Files Community

hariharan220 commited on Feb 4

Commit

9e8f213

verified ·

1 Parent(s): a8e50f2

Update main.py

Browse files

Files changed (1) hide show

main.py +30 -28

main.py CHANGED Viewed

@@ -1,13 +1,4 @@
 import os
-# Set cache directories to writable locations
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
-os.environ["HF_HOME"] = "/tmp/hf_home"
-os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface_cache"
-os.makedirs("/tmp/huggingface_cache", exist_ok=True)
-os.makedirs("/tmp/hf_home", exist_ok=True)
 import pdfplumber
 import re
 import nltk
@@ -16,30 +7,32 @@ import uvicorn
 import time
 from nltk.tokenize import sent_tokenize
 from transformers import pipeline
-from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-# Set NLTK data directory to a writable location
-NLTK_DATA_DIR = "/tmp/nltk_data"
 os.makedirs(NLTK_DATA_DIR, exist_ok=True)
 nltk.data.path.append(NLTK_DATA_DIR)
-# Download required NLTK resources
 try:
     nltk.data.find("tokenizers/punkt")
 except LookupError:
     nltk.download("punkt", download_dir=NLTK_DATA_DIR)
-# Download punkt_tab as well (to fix the error)
-try:
-    nltk.data.find("tokenizers/punkt_tab")
-except LookupError:
-    nltk.download("punkt_tab", download_dir=NLTK_DATA_DIR)
-# Initialize FastAPI App
 app = FastAPI()
-# Enable CORS for API Accessibility
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -48,12 +41,20 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Automatically Detect Device (Use GPU if Available)
 device = 0 if torch.cuda.is_available() else -1
-print(f"Using Device: {'GPU' if device == 0 else 'CPU'}")
-# Load Summarization Model
-summarizer = pipeline("summarization", model="google/pegasus-xsum", device=device)
 # --- **Generalized Cleaning** ---
 def clean_text(text):
@@ -83,13 +84,13 @@ def split_text(text, chunk_size=2048):
         chunks.append(current_chunk.strip())
     return chunks
-# --- **Summarization Endpoint** ---
 @app.post("/summarize-pdf/")
 async def summarize_pdf(file: UploadFile = File(...)):
     try:
         start_time = time.time()
         pdf_content = await file.read()
-        pdf_path = "/tmp/temp.pdf"  # Store in /tmp/
         with open(pdf_path, "wb") as f:
             f.write(pdf_content)
@@ -107,5 +108,6 @@ async def summarize_pdf(file: UploadFile = File(...)):
     except Exception as e:
         return {"error": str(e)}
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import pdfplumber
 import re
 import nltk
 import time
 from nltk.tokenize import sent_tokenize
 from transformers import pipeline
+from fastapi import FastAPI, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
+# ✅ Set cache directories to writable locations for Hugging Face
+TMP_DIR = "/tmp/huggingface_cache"
+os.environ["TRANSFORMERS_CACHE"] = TMP_DIR
+os.environ["HF_HOME"] = TMP_DIR
+os.environ["HUGGINGFACE_HUB_CACHE"] = TMP_DIR
+os.makedirs(TMP_DIR, exist_ok=True)
+# ✅ Ensure NLTK Dependencies are Stored in a Writable Directory
+NLTK_DATA_DIR = "/tmp/nltk_data"
 os.makedirs(NLTK_DATA_DIR, exist_ok=True)
 nltk.data.path.append(NLTK_DATA_DIR)
+# ✅ Fix: Download only 'punkt' (NOT 'punkt_tab')
 try:
     nltk.data.find("tokenizers/punkt")
 except LookupError:
     nltk.download("punkt", download_dir=NLTK_DATA_DIR)
+# ✅ Initialize FastAPI App
 app = FastAPI()
+# ✅ Enable CORS for API Accessibility
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# ✅ Force GPU Usage if Available
 device = 0 if torch.cuda.is_available() else -1
+if device == 0:
+    print("🚀 Running on GPU!")
+else:
+    print("⚠️ GPU Not Available! Running on CPU.")
+# ✅ Load Summarization Model (Force Cache in /tmp/)
+summarizer = pipeline(
+    "summarization",
+    model="google/pegasus-xsum",
+    device=device,  # ✅ Force GPU if available
+    cache_dir=TMP_DIR
+)
 # --- **Generalized Cleaning** ---
 def clean_text(text):
         chunks.append(current_chunk.strip())
     return chunks
+# ✅ **Summarization API**
 @app.post("/summarize-pdf/")
 async def summarize_pdf(file: UploadFile = File(...)):
     try:
         start_time = time.time()
         pdf_content = await file.read()
+        pdf_path = "/tmp/temp.pdf"  # ✅ Store in /tmp/
         with open(pdf_path, "wb") as f:
             f.write(pdf_content)
     except Exception as e:
         return {"error": str(e)}
+# ✅ Start Uvicorn for Hugging Face Spaces
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)