hariharan220 commited on
Commit
a60072c
Β·
verified Β·
1 Parent(s): 5a12285

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +13 -7
main.py CHANGED
@@ -1,24 +1,31 @@
 
 
 
 
 
 
 
 
 
 
1
  import pdfplumber
2
  import re
3
  import nltk
4
  import torch
5
  import uvicorn
6
- import os
7
  import time
8
  from nltk.tokenize import sent_tokenize
9
  from transformers import pipeline
10
  from fastapi import FastAPI, File, UploadFile, HTTPException
11
  from fastapi.middleware.cors import CORSMiddleware
12
 
13
- # βœ… Set Hugging Face Cache Directory to /tmp/ (Fix Permission Issues)
14
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
15
 
16
  # βœ… Ensure NLTK Dependencies are Stored in a Writable Directory
17
  NLTK_DATA_DIR = "/tmp/nltk_data"
18
  os.makedirs(NLTK_DATA_DIR, exist_ok=True)
19
  nltk.data.path.append(NLTK_DATA_DIR)
20
-
21
- # βœ… Download punkt tokenizer
22
  try:
23
  nltk.data.find("tokenizers/punkt")
24
  except LookupError:
@@ -40,7 +47,7 @@ app.add_middleware(
40
  device = 0 if torch.cuda.is_available() else -1
41
  print(f"Using Device: {'GPU' if device == 0 else 'CPU'}")
42
 
43
- # βœ… Load Summarization Model (Fixing Cache Issue)
44
  summarizer = pipeline("summarization", model="google/pegasus-xsum", device=device)
45
 
46
  # --- **Generalized Cleaning** ---
@@ -95,6 +102,5 @@ async def summarize_pdf(file: UploadFile = File(...)):
95
  except Exception as e:
96
  return {"error": str(e)}
97
 
98
- # βœ… Run FastAPI Server
99
  if __name__ == "__main__":
100
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os
2
+
3
+ # Set cache directories to writable locations
4
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
5
+ os.environ["HF_HOME"] = "/tmp/hf_home"
6
+ os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface_cache"
7
+
8
+ os.makedirs("/tmp/huggingface_cache", exist_ok=True)
9
+ os.makedirs("/tmp/hf_home", exist_ok=True)
10
+
11
  import pdfplumber
12
  import re
13
  import nltk
14
  import torch
15
  import uvicorn
 
16
  import time
17
  from nltk.tokenize import sent_tokenize
18
  from transformers import pipeline
19
  from fastapi import FastAPI, File, UploadFile, HTTPException
20
  from fastapi.middleware.cors import CORSMiddleware
21
 
22
+ # βœ… Set Hugging Face Token for Authentication
23
+ from huggingface_hub import login
24
 
25
  # βœ… Ensure NLTK Dependencies are Stored in a Writable Directory
26
  NLTK_DATA_DIR = "/tmp/nltk_data"
27
  os.makedirs(NLTK_DATA_DIR, exist_ok=True)
28
  nltk.data.path.append(NLTK_DATA_DIR)
 
 
29
  try:
30
  nltk.data.find("tokenizers/punkt")
31
  except LookupError:
 
47
  device = 0 if torch.cuda.is_available() else -1
48
  print(f"Using Device: {'GPU' if device == 0 else 'CPU'}")
49
 
50
+ # βœ… Load Summarization Model
51
  summarizer = pipeline("summarization", model="google/pegasus-xsum", device=device)
52
 
53
  # --- **Generalized Cleaning** ---
 
102
  except Exception as e:
103
  return {"error": str(e)}
104
 
 
105
  if __name__ == "__main__":
106
  uvicorn.run(app, host="0.0.0.0", port=7860)