Spaces:

priyanandanwar
/

demo-api

Running

priyanandanwar commited on 12 days ago

Commit

fbbf31c

verified ·

1 Parent(s): a8d46a3

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -9,12 +9,12 @@ from transformers import AutoModel, AutoTokenizer
 # Hugging Face Cache Directory
 os.environ["HF_HOME"] = "/app/huggingface"
-os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60"  # Increase timeout to 60 seconds
 app = FastAPI()
-# --- Load Clinical Trials CSV (for metadata lookup) ---
-csv_path = "ctg-studies.csv"  # Ensure this file is uploaded
 if os.path.exists(csv_path):
     df_trials = pd.read_csv(csv_path)
     print("✅ CSV File Loaded Successfully!")
@@ -59,19 +59,27 @@ def get_trial_info(nct_id):
 async def retrieve_trial(request: QueryRequest):
     """Retrieve Clinical Trial based on text"""
     query_vector = generate_embedding(request.text)
     distances, indices = index.search(query_vector, request.top_k)
     matched_trials = []
     for idx, dist in zip(indices[0], distances[0]):
         if idx < len(df_trials):  # Ensure index is within bounds
-            nct_id = df_trials.iloc[idx]["NCT Number"]  # Get NCT Number using FAISS index mapping
-            trial_data = get_trial_info(nct_id)  # Fetch complete trial details
             if trial_data:
                 if np.isfinite(dist) and dist >= 0:
                     trial_data["similarity"] = float(round(100 / (1 + dist), 2))
                 else:
                     trial_data["similarity"] = 0.0
                 matched_trials.append(trial_data)
     return {"matched_trials": matched_trials}

 # Hugging Face Cache Directory
 os.environ["HF_HOME"] = "/app/huggingface"
+os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60"  # Increase timeout
 app = FastAPI()
+# --- Load Clinical Trials CSV ---
+csv_path = "ctg-studies.csv"
 if os.path.exists(csv_path):
     df_trials = pd.read_csv(csv_path)
     print("✅ CSV File Loaded Successfully!")
 async def retrieve_trial(request: QueryRequest):
     """Retrieve Clinical Trial based on text"""
     query_vector = generate_embedding(request.text)
+    # Check if FAISS index has vectors
+    if index.ntotal == 0:
+        return {"error": "FAISS index is empty. No trials available."}
     distances, indices = index.search(query_vector, request.top_k)
     matched_trials = []
     for idx, dist in zip(indices[0], distances[0]):
         if idx < len(df_trials):  # Ensure index is within bounds
+            nct_id = df_trials.iloc[idx]["NCT Number"]
+            trial_data = get_trial_info(nct_id)
             if trial_data:
+                # Handle NaN & Inf distances safely
                 if np.isfinite(dist) and dist >= 0:
                     trial_data["similarity"] = float(round(100 / (1 + dist), 2))
                 else:
+                    print(f"⚠️ Invalid distance detected: {dist}")
                     trial_data["similarity"] = 0.0
                 matched_trials.append(trial_data)
     return {"matched_trials": matched_trials}