Spaces:

hariharan220
/

med-summary-v2

Runtime error

App Files Files Community

hariharan220 commited on Feb 3

Commit

3b4877b

verified ·

1 Parent(s): aaa2dd0

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -19

main.py CHANGED Viewed

@@ -1,4 +1,13 @@
 import os
 import pdfplumber
 import re
 import nltk
@@ -7,29 +16,30 @@ import uvicorn
 import time
 from nltk.tokenize import sent_tokenize
 from transformers import pipeline
-from fastapi import FastAPI, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-# ✅ Set cache directories to writable locations for Hugging Face
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
-os.environ["HF_HOME"] = "/tmp/hf_home"
-os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface_cache"
-os.makedirs("/tmp/huggingface_cache", exist_ok=True)
-os.makedirs("/tmp/hf_home", exist_ok=True)
-# ✅ Ensure NLTK Dependencies are Stored in a Writable Directory
 NLTK_DATA_DIR = "/tmp/nltk_data"
 os.makedirs(NLTK_DATA_DIR, exist_ok=True)
 nltk.data.path.append(NLTK_DATA_DIR)
-# ✅ Fix: Download only 'punkt' (NOT 'punkt_tab')
-nltk.download("punkt", download_dir=NLTK_DATA_DIR)
-# ✅ Initialize FastAPI App
 app = FastAPI()
-# ✅ Enable CORS for API Accessibility
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -38,11 +48,11 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# ✅ Automatically Detect Device (Use GPU if Available)
 device = 0 if torch.cuda.is_available() else -1
 print(f"Using Device: {'GPU' if device == 0 else 'CPU'}")
-# ✅ Load Summarization Model
 summarizer = pipeline("summarization", model="google/pegasus-xsum", device=device)
 # --- **Generalized Cleaning** ---
@@ -73,13 +83,13 @@ def split_text(text, chunk_size=2048):
         chunks.append(current_chunk.strip())
     return chunks
-# ✅ **Summarization API**
 @app.post("/summarize-pdf/")
 async def summarize_pdf(file: UploadFile = File(...)):
     try:
         start_time = time.time()
         pdf_content = await file.read()
-        pdf_path = "/tmp/temp.pdf"  # ✅ Store in /tmp/
         with open(pdf_path, "wb") as f:
             f.write(pdf_content)
@@ -97,6 +107,5 @@ async def summarize_pdf(file: UploadFile = File(...)):
     except Exception as e:
         return {"error": str(e)}
-# ✅ Start Uvicorn for Hugging Face Spaces
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
+# Set cache directories to writable locations
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
+os.environ["HF_HOME"] = "/tmp/hf_home"
+os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface_cache"
+os.makedirs("/tmp/huggingface_cache", exist_ok=True)
+os.makedirs("/tmp/hf_home", exist_ok=True)
 import pdfplumber
 import re
 import nltk
 import time
 from nltk.tokenize import sent_tokenize
 from transformers import pipeline
+from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+# Set NLTK data directory to a writable location
 NLTK_DATA_DIR = "/tmp/nltk_data"
 os.makedirs(NLTK_DATA_DIR, exist_ok=True)
 nltk.data.path.append(NLTK_DATA_DIR)
+# Download required NLTK resources
+try:
+    nltk.data.find("tokenizers/punkt")
+except LookupError:
+    nltk.download("punkt", download_dir=NLTK_DATA_DIR)
+# Download punkt_tab as well (to fix the error)
+try:
+    nltk.data.find("tokenizers/punkt_tab")
+except LookupError:
+    nltk.download("punkt_tab", download_dir=NLTK_DATA_DIR)
+# Initialize FastAPI App
 app = FastAPI()
+# Enable CORS for API Accessibility
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Automatically Detect Device (Use GPU if Available)
 device = 0 if torch.cuda.is_available() else -1
 print(f"Using Device: {'GPU' if device == 0 else 'CPU'}")
+# Load Summarization Model
 summarizer = pipeline("summarization", model="google/pegasus-xsum", device=device)
 # --- **Generalized Cleaning** ---
         chunks.append(current_chunk.strip())
     return chunks
+# --- **Summarization Endpoint** ---
 @app.post("/summarize-pdf/")
 async def summarize_pdf(file: UploadFile = File(...)):
     try:
         start_time = time.time()
         pdf_content = await file.read()
+        pdf_path = "/tmp/temp.pdf"  # Store in /tmp/
         with open(pdf_path, "wb") as f:
             f.write(pdf_content)
     except Exception as e:
         return {"error": str(e)}
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)