Spaces:

priyanandanwar
/

demo-api

Running

App Files Files Community

priyanandanwar commited on 12 days ago

Commit

cf4089c

verified ·

1 Parent(s): 9ae18e2

Update main.py

Browse files

Files changed (1) hide show

main.py +64 -0

main.py CHANGED Viewed

@@ -1,3 +1,62 @@
 @app.post("/retrieve")
 async def retrieve_trial(request: QueryRequest):
     query_vector = generate_embedding(request.text)
@@ -20,3 +79,8 @@ async def retrieve_trial(request: QueryRequest):
             matched_trials.append(trial_data)
     return {"matched_trials": matched_trials}

+import os
+import faiss
+import torch
+import numpy as np
+import pandas as pd
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoModel, AutoTokenizer
+os.environ["HF_HOME"] = "/app/huggingface"
+os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60"
+app = FastAPI()
+# --- Load CSV ---
+csv_path = "ctg-studies.csv"
+if os.path.exists(csv_path):
+    df_trials = pd.read_csv(csv_path)
+    print("✅ CSV Loaded!")
+else:
+    raise FileNotFoundError("❌ CSV File Not Found!")
+# --- Load FAISS Index ---
+dimension = 768
+faiss_index_path = "clinical_trials.index"
+if os.path.exists(faiss_index_path):
+    index = faiss.read_index(faiss_index_path)
+    print("✅ FAISS Index Loaded!")
+else:
+    index = faiss.IndexFlatL2(dimension)
+    print("⚠️ FAISS Index Empty!")
+# --- Load Model ---
+retrieval_model_name = "priyanandanwar/fine-tuned-gatortron"
+retrieval_tokenizer = AutoTokenizer.from_pretrained(retrieval_model_name)
+retrieval_model = AutoModel.from_pretrained(retrieval_model_name)
+# --- Request Model ---
+class QueryRequest(BaseModel):
+    text: str
+    top_k: int = 5
+# --- Generate Embedding ---
+def generate_embedding(text):
+    inputs = retrieval_tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
+    with torch.no_grad():
+        outputs = retrieval_model(**inputs)
+    emb = outputs.last_hidden_state[:, 0, :].numpy()
+    # ✅ Normalize Embeddings
+    emb = emb / np.linalg.norm(emb)
+    return emb
+# --- Compute Similarity ---
+def compute_similarity(distance):
+    return round(np.exp(-distance) * 100, 2)  # ✅ Softmax similarity fix
+# --- Retrieve Trials ---
 @app.post("/retrieve")
 async def retrieve_trial(request: QueryRequest):
     query_vector = generate_embedding(request.text)
             matched_trials.append(trial_data)
     return {"matched_trials": matched_trials}
+@app.get("/")
+async def root():
+    return {"message": "TrialGPT API Running!"}