Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							·
						
						65d7792
	
1
								Parent(s):
							
							4455263
								
Add debug loggings
Browse files- Dockerfile +1 -3
 - app.py +71 -13
 - requirements.txt +1 -0
 
    	
        Dockerfile
    CHANGED
    
    | 
         @@ -32,6 +32,4 @@ RUN chown -R user:user /app/model_cache 
     | 
|
| 32 | 
         
             
            EXPOSE 7860
         
     | 
| 33 | 
         | 
| 34 | 
         
             
            # Run the application
         
     | 
| 35 | 
         
            -
            CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
         
     | 
| 36 | 
         
            -
             
     | 
| 37 | 
         
            -
             
     | 
| 
         | 
|
| 32 | 
         
             
            EXPOSE 7860
         
     | 
| 33 | 
         | 
| 34 | 
         
             
            # Run the application
         
     | 
| 35 | 
         
            +
            CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "debug"]
         
     | 
| 
         | 
|
| 
         | 
    	
        app.py
    CHANGED
    
    | 
         @@ -9,35 +9,85 @@ from pymongo import MongoClient 
     | 
|
| 9 | 
         
             
            from google import genai
         
     | 
| 10 | 
         
             
            from sentence_transformers import SentenceTransformer
         
     | 
| 11 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 12 | 
         
             
            # ✅ Environment Variables
         
     | 
| 13 | 
         
             
            mongo_uri = os.getenv("MONGO_URI")
         
     | 
| 14 | 
         
             
            index_uri = os.getenv("INDEX_URI")
         
     | 
| 15 | 
         
             
            gemini_flash_api_key = os.getenv("FlashAPI")
         
     | 
| 16 | 
         
            -
             
     | 
| 17 | 
         
             
            if not all([gemini_flash_api_key, mongo_uri, index_uri]):
         
     | 
| 18 | 
         
             
                raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
         
     | 
| 19 | 
         
            -
             
     | 
| 20 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 21 | 
         
             
            os.environ["OMP_NUM_THREADS"] = "1"
         
     | 
| 22 | 
         
             
            os.environ["TOKENIZERS_PARALLELISM"] = "false"
         
     | 
| 23 | 
         | 
| 24 | 
         
             
            # ✅ Initialize FastAPI app
         
     | 
| 25 | 
         
             
            app = FastAPI(title="Medical Chatbot API")
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 26 | 
         | 
| 27 | 
         
             
            # ✅ Use Lazy Loading for FAISS Index
         
     | 
| 28 | 
         
             
            index = None  # Delay FAISS Index loading until first query
         
     | 
| 29 | 
         | 
| 30 | 
         
            -
            # ✅ Load SentenceTransformer Model (Quantized)
         
     | 
| 
         | 
|
| 31 | 
         
             
            print("📥 Loading SentenceTransformer Model...")
         
     | 
| 32 | 
         
             
            MODEL_CACHE_DIR = "/app/model_cache"
         
     | 
| 33 | 
         
            -
             
     | 
| 34 | 
         
            -
            embedding_model =  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 35 | 
         | 
| 36 | 
         
             
            # ✅ Setup MongoDB Connection
         
     | 
| 
         | 
|
| 37 | 
         
             
            client = MongoClient(mongo_uri)
         
     | 
| 38 | 
         
             
            db = client["MedicalChatbotDB"]
         
     | 
| 39 | 
         
             
            qa_collection = db["qa_data"]
         
     | 
| 40 | 
         
            -
             
     | 
| 41 | 
         
             
            iclient = MongoClient(index_uri)
         
     | 
| 42 | 
         
             
            idb = iclient["MedicalChatbotDB"]
         
     | 
| 43 | 
         
             
            index_collection = idb["faiss_index_files"]
         
     | 
| 
         @@ -56,18 +106,20 @@ def load_faiss_index(): 
     | 
|
| 56 | 
         
             
                        index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
         
     | 
| 57 | 
         
             
                        index = faiss.deserialize_index(index_bytes_np)
         
     | 
| 58 | 
         
             
                        print("✅ FAISS Index Loaded")
         
     | 
| 
         | 
|
| 59 | 
         
             
                    else:
         
     | 
| 60 | 
         
             
                        print("❌ FAISS index not found in GridFS.")
         
     | 
| 
         | 
|
| 61 | 
         
             
                return index
         
     | 
| 62 | 
         | 
| 63 | 
         
             
            # ✅ Retrieve Medical Info
         
     | 
| 64 | 
         
             
            def retrieve_medical_info(query):
         
     | 
| 65 | 
         
             
                global index
         
     | 
| 66 | 
         
             
                index = load_faiss_index()  # Load FAISS on demand
         
     | 
| 67 | 
         
            -
                
         
     | 
| 68 | 
         
             
                if index is None:
         
     | 
| 69 | 
         
             
                    return ["No medical information available."]
         
     | 
| 70 | 
         
            -
                
         
     | 
| 71 | 
         
             
                query_embedding = embedding_model.encode([query], convert_to_numpy=True)
         
     | 
| 72 | 
         
             
                _, idxs = index.search(query_embedding, k=3)
         
     | 
| 73 | 
         
             
                results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
         
     | 
| 
         @@ -80,6 +132,7 @@ def gemini_flash_completion(prompt, model, temperature=0.7): 
     | 
|
| 80 | 
         
             
                    response = client_genai.models.generate_content(model=model, contents=prompt)
         
     | 
| 81 | 
         
             
                    return response.text
         
     | 
| 82 | 
         
             
                except Exception as e:
         
     | 
| 
         | 
|
| 83 | 
         
             
                    print(f"❌ Error calling Gemini API: {e}")
         
     | 
| 84 | 
         
             
                    return "Error generating response from Gemini."
         
     | 
| 85 | 
         | 
| 
         @@ -118,15 +171,20 @@ async def chat_endpoint(data: dict): 
     | 
|
| 118 | 
         
             
                lang = data.get("lang", "EN")
         
     | 
| 119 | 
         
             
                if not user_query:
         
     | 
| 120 | 
         
             
                    return JSONResponse(content={"response": "No query provided."})
         
     | 
| 121 | 
         
            -
             
     | 
| 122 | 
         
             
                start_time = time.time()
         
     | 
| 123 | 
         
             
                response_text = chatbot.chat(user_query, lang)
         
     | 
| 124 | 
         
             
                end_time = time.time()
         
     | 
| 125 | 
         
             
                response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
         
     | 
| 126 | 
         
            -
             
     | 
| 127 | 
         
             
                return JSONResponse(content={"response": response_text})
         
     | 
| 128 | 
         | 
| 129 | 
         
            -
            # ✅ Run Uvicorn 
     | 
| 130 | 
         
             
            if __name__ == "__main__":
         
     | 
| 
         | 
|
| 131 | 
         
             
                print("✅ Starting FastAPI Server...")
         
     | 
| 132 | 
         
            -
                 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 9 | 
         
             
            from google import genai
         
     | 
| 10 | 
         
             
            from sentence_transformers import SentenceTransformer
         
     | 
| 11 | 
         | 
| 12 | 
         
            +
            # ✅ Enable Logging for Debugging
         
     | 
| 13 | 
         
            +
            logging.basicConfig(level=logging.DEBUG)
         
     | 
| 14 | 
         
            +
            logger = logging.getLogger("medical-chatbot")
         
     | 
| 15 | 
         
            +
            # Debug Start
         
     | 
| 16 | 
         
            +
            logger.info("🚀 Starting Medical Chatbot API...")
         
     | 
| 17 | 
         
            +
            print("🚀 Starting Medical Chatbot API...")
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
             
            # ✅ Environment Variables
         
     | 
| 20 | 
         
             
            mongo_uri = os.getenv("MONGO_URI")
         
     | 
| 21 | 
         
             
            index_uri = os.getenv("INDEX_URI")
         
     | 
| 22 | 
         
             
            gemini_flash_api_key = os.getenv("FlashAPI")
         
     | 
| 23 | 
         
            +
            # Validate environment endpoint
         
     | 
| 24 | 
         
             
            if not all([gemini_flash_api_key, mongo_uri, index_uri]):
         
     | 
| 25 | 
         
             
                raise ValueError("❌ Missing API keys! Set them in Hugging Face Secrets.")
         
     | 
| 26 | 
         
            +
            logger.info(f"🔎 MongoDB URI: {mongo_uri}")
         
     | 
| 27 | 
         
            +
            logger.info(f"🔎 FAISS Index URI: {index_uri}")
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            # ✅ Monitor Resources Before Startup
         
     | 
| 30 | 
         
            +
            import psutil
         
     | 
| 31 | 
         
            +
            def check_system_resources():
         
     | 
| 32 | 
         
            +
                memory = psutil.virtual_memory()
         
     | 
| 33 | 
         
            +
                cpu = psutil.cpu_percent(interval=1)
         
     | 
| 34 | 
         
            +
                disk = psutil.disk_usage("/")
         
     | 
| 35 | 
         
            +
                # Defines log info messages
         
     | 
| 36 | 
         
            +
                logger.info(f"🔍 System Resources - RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%")
         
     | 
| 37 | 
         
            +
                if memory.percent > 85:
         
     | 
| 38 | 
         
            +
                    logger.warning("⚠️ High RAM usage detected!")
         
     | 
| 39 | 
         
            +
                if cpu > 90:
         
     | 
| 40 | 
         
            +
                    logger.warning("⚠️ High CPU usage detected!")
         
     | 
| 41 | 
         
            +
                if disk.percent > 90:
         
     | 
| 42 | 
         
            +
                    logger.warning("⚠️ High Disk usage detected!")
         
     | 
| 43 | 
         
            +
            check_system_resources()
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            # ✅ Reduce Memory usage with optimizers
         
     | 
| 46 | 
         
             
            os.environ["OMP_NUM_THREADS"] = "1"
         
     | 
| 47 | 
         
             
            os.environ["TOKENIZERS_PARALLELISM"] = "false"
         
     | 
| 48 | 
         | 
| 49 | 
         
             
            # ✅ Initialize FastAPI app
         
     | 
| 50 | 
         
             
            app = FastAPI(title="Medical Chatbot API")
         
     | 
| 51 | 
         
            +
            from fastapi.middleware.cors import CORSMiddleware # Bypassing CORS origin
         
     | 
| 52 | 
         
            +
            # Define the origins
         
     | 
| 53 | 
         
            +
            origins = [
         
     | 
| 54 | 
         
            +
                "http://localhost:5173",                    # Vite dev server
         
     | 
| 55 | 
         
            +
                "http://localhost:3000",                    # Another vercel local dev
         
     | 
| 56 | 
         
            +
                "https://medical-chatbot-henna.vercel.app", # ✅ Vercel frontend production URL
         
     | 
| 57 | 
         
            +
                
         
     | 
| 58 | 
         
            +
            ]
         
     | 
| 59 | 
         
            +
            # Add the CORS middleware:
         
     | 
| 60 | 
         
            +
            app.add_middleware(
         
     | 
| 61 | 
         
            +
                CORSMiddleware,
         
     | 
| 62 | 
         
            +
                allow_origins=origins,   # or ["*"] to allow all
         
     | 
| 63 | 
         
            +
                allow_credentials=True,
         
     | 
| 64 | 
         
            +
                allow_methods=["*"],
         
     | 
| 65 | 
         
            +
                allow_headers=["*"],
         
     | 
| 66 | 
         
            +
            )
         
     | 
| 67 | 
         | 
| 68 | 
         
             
            # ✅ Use Lazy Loading for FAISS Index
         
     | 
| 69 | 
         
             
            index = None  # Delay FAISS Index loading until first query
         
     | 
| 70 | 
         | 
| 71 | 
         
            +
            # ✅ Load SentenceTransformer Model (Quantized/Halved)
         
     | 
| 72 | 
         
            +
            logger.info("📥 Loading SentenceTransformer Model...")
         
     | 
| 73 | 
         
             
            print("📥 Loading SentenceTransformer Model...")
         
     | 
| 74 | 
         
             
            MODEL_CACHE_DIR = "/app/model_cache"
         
     | 
| 75 | 
         
            +
            try:
         
     | 
| 76 | 
         
            +
                embedding_model = SentenceTransformer(MODEL_CACHE_DIR, device="cpu")
         
     | 
| 77 | 
         
            +
                embedding_model = embedding_model.half()  # Reduce memory
         
     | 
| 78 | 
         
            +
                logger.info("✅ Model Loaded Successfully.")
         
     | 
| 79 | 
         
            +
                print("✅ Model Loaded Successfully.")
         
     | 
| 80 | 
         
            +
            except Exception as e:
         
     | 
| 81 | 
         
            +
                logger.error(f"❌ Model Loading Failed: {e}")
         
     | 
| 82 | 
         
            +
                exit(1)
         
     | 
| 83 | 
         
            +
             
     | 
| 84 | 
         | 
| 85 | 
         
             
            # ✅ Setup MongoDB Connection
         
     | 
| 86 | 
         
            +
            # QA data
         
     | 
| 87 | 
         
             
            client = MongoClient(mongo_uri)
         
     | 
| 88 | 
         
             
            db = client["MedicalChatbotDB"]
         
     | 
| 89 | 
         
             
            qa_collection = db["qa_data"]
         
     | 
| 90 | 
         
            +
            # FAISS Index data
         
     | 
| 91 | 
         
             
            iclient = MongoClient(index_uri)
         
     | 
| 92 | 
         
             
            idb = iclient["MedicalChatbotDB"]
         
     | 
| 93 | 
         
             
            index_collection = idb["faiss_index_files"]
         
     | 
| 
         | 
|
| 106 | 
         
             
                        index_bytes_np = np.frombuffer(stored_index_bytes, dtype='uint8')
         
     | 
| 107 | 
         
             
                        index = faiss.deserialize_index(index_bytes_np)
         
     | 
| 108 | 
         
             
                        print("✅ FAISS Index Loaded")
         
     | 
| 109 | 
         
            +
                        logger.info("✅ FAISS Index Loaded")
         
     | 
| 110 | 
         
             
                    else:
         
     | 
| 111 | 
         
             
                        print("❌ FAISS index not found in GridFS.")
         
     | 
| 112 | 
         
            +
                        logger.error("❌ FAISS index not found in GridFS.")
         
     | 
| 113 | 
         
             
                return index
         
     | 
| 114 | 
         | 
| 115 | 
         
             
            # ✅ Retrieve Medical Info
         
     | 
| 116 | 
         
             
            def retrieve_medical_info(query):
         
     | 
| 117 | 
         
             
                global index
         
     | 
| 118 | 
         
             
                index = load_faiss_index()  # Load FAISS on demand
         
     | 
| 119 | 
         
            +
                # N/A question
         
     | 
| 120 | 
         
             
                if index is None:
         
     | 
| 121 | 
         
             
                    return ["No medical information available."]
         
     | 
| 122 | 
         
            +
                # Embed the query and send to QA db to lookup
         
     | 
| 123 | 
         
             
                query_embedding = embedding_model.encode([query], convert_to_numpy=True)
         
     | 
| 124 | 
         
             
                _, idxs = index.search(query_embedding, k=3)
         
     | 
| 125 | 
         
             
                results = [qa_collection.find_one({"i": int(i)}).get("Doctor", "No answer available.") for i in idxs[0]]
         
     | 
| 
         | 
|
| 132 | 
         
             
                    response = client_genai.models.generate_content(model=model, contents=prompt)
         
     | 
| 133 | 
         
             
                    return response.text
         
     | 
| 134 | 
         
             
                except Exception as e:
         
     | 
| 135 | 
         
            +
                    logger.error(f"❌ Error calling Gemini API: {e}")
         
     | 
| 136 | 
         
             
                    print(f"❌ Error calling Gemini API: {e}")
         
     | 
| 137 | 
         
             
                    return "Error generating response from Gemini."
         
     | 
| 138 | 
         | 
| 
         | 
|
| 171 | 
         
             
                lang = data.get("lang", "EN")
         
     | 
| 172 | 
         
             
                if not user_query:
         
     | 
| 173 | 
         
             
                    return JSONResponse(content={"response": "No query provided."})
         
     | 
| 174 | 
         
            +
                # Output parameter
         
     | 
| 175 | 
         
             
                start_time = time.time()
         
     | 
| 176 | 
         
             
                response_text = chatbot.chat(user_query, lang)
         
     | 
| 177 | 
         
             
                end_time = time.time()
         
     | 
| 178 | 
         
             
                response_text += f"\n\n(Response time: {end_time - start_time:.2f} seconds)"
         
     | 
| 179 | 
         
            +
                # Send JSON response
         
     | 
| 180 | 
         
             
                return JSONResponse(content={"response": response_text})
         
     | 
| 181 | 
         | 
| 182 | 
         
            +
            # ✅ Run Uvicorn
         
     | 
| 183 | 
         
             
            if __name__ == "__main__":
         
     | 
| 184 | 
         
            +
                logger.info("✅ Starting FastAPI Server...")
         
     | 
| 185 | 
         
             
                print("✅ Starting FastAPI Server...")
         
     | 
| 186 | 
         
            +
                try:
         
     | 
| 187 | 
         
            +
                    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="debug")
         
     | 
| 188 | 
         
            +
                except Exception as e:
         
     | 
| 189 | 
         
            +
                    logger.error(f"❌ Server Startup Failed: {e}")
         
     | 
| 190 | 
         
            +
                    exit(1)
         
     | 
    	
        requirements.txt
    CHANGED
    
    | 
         @@ -16,6 +16,7 @@ pymongo 
     | 
|
| 16 | 
         
             
            uvicorn
         
     | 
| 17 | 
         
             
            fastapi
         
     | 
| 18 | 
         
             
            torch               # Reduce model load with half-precision (float16) to reduce RAM usage
         
     | 
| 
         | 
|
| 19 | 
         
             
            # gradio            # On Huggingface deployment with gradio or serving FastAPI only
         
     | 
| 20 | 
         
             
            # streamlit         # On streamlit deployment with daemon
         
     | 
| 21 | 
         
             
            # requests
         
     | 
| 
         | 
|
| 16 | 
         
             
            uvicorn
         
     | 
| 17 | 
         
             
            fastapi
         
     | 
| 18 | 
         
             
            torch               # Reduce model load with half-precision (float16) to reduce RAM usage
         
     | 
| 19 | 
         
            +
            psutil              # CPU/RAM logger
         
     | 
| 20 | 
         
             
            # gradio            # On Huggingface deployment with gradio or serving FastAPI only
         
     | 
| 21 | 
         
             
            # streamlit         # On streamlit deployment with daemon
         
     | 
| 22 | 
         
             
            # requests
         
     |