Spaces:

salvinjose
/

HNTAI

Paused

App Files Files Community

sachinchandrankallar commited on Aug 27

Commit

625f09f

1 Parent(s): c731bd4

model loader time out fixing

Browse files

Files changed (7) hide show

TODO_PROGRESS.md +20 -13
ai_med_extract/__pycache__/app.cpython-311.pyc +0 -0
ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc +0 -0
ai_med_extract/api/__pycache__/routes.cpython-311.pyc +0 -0
ai_med_extract/api/routes.py +9 -3
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc +0 -0
ai_med_extract/utils/model_loader_gguf.py +6 -2

TODO_PROGRESS.md CHANGED Viewed

@@ -1,16 +1,23 @@
-# GGUF Timeout Fix Progress
-## Steps Completed:
-1. ✅ Increased GGUF timeout from 120s to 300s for Hugging Face Spaces
-2. ✅ Made timeout configurable via GGUF_GENERATION_TIMEOUT environment variable
-3. ✅ Updated both `_generate_with_timeout` and `generate` methods to use configurable timeout
-## Next Steps:
-4. Add better error handling and fallback mechanisms in routes.py
-5. Optimize GGUF model parameters for better performance on Spaces
-6. Add progress logging for generation
-7. Test the changes
-## Environment Configuration:
-- Default timeout: 300s for Spaces, 120s for local development
-- Configurable via: GGUF_GENERATION_TIMEOUT environment variable

+# GGUF Model Timeout Fix - Progress Tracking
+## Plan Overview
+1. Increase timeout settings in GGUFModelPipeline
+2. Optimize model settings for Hugging Face Spaces
+3. Add detailed logging for generation process
+4. Ensure robust fallback mechanism
+5. Test the changes
+## Steps Completed
+- [x] 1. Update timeout settings in model_loader_gguf.py
+- [ ] 2. Optimize model parameters for Spaces environment
+- [ ] 3. Add comprehensive logging to track generation timing
+- [ ] 4. Test the changes with patient summary generation API
+## Files to Modify
+- ai_med_extract/utils/model_loader_gguf.py
+- ai_med_extract/api/routes.py
+## Testing
+- [ ] Test patient summary generation locally
+- [ ] Test on Hugging Face Spaces deployment
+- [ ] Monitor logs for timeout issues

ai_med_extract/__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/__pycache__/app.cpython-311.pyc and b/ai_med_extract/__pycache__/app.cpython-311.pyc differ

ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ

ai_med_extract/api/__pycache__/routes.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/api/__pycache__/routes.cpython-311.pyc and b/ai_med_extract/api/__pycache__/routes.cpython-311.pyc differ

ai_med_extract/api/routes.py CHANGED Viewed

@@ -876,18 +876,22 @@ def register_routes(app, agents):
     @app.route("/api/generate_summary", methods=["POST"])
     def generate_summary():
         data = request.json
         if not data or "text" not in data or not data["text"].strip():
             return jsonify({"error": "No valid text provided"}), 400
         context = data["text"]
         try:
             clean_text = PHIScrubberAgent.scrub_phi(context)
         except Exception:
             clean_text = context
         try:
-            summary = SummarizerAgent.generate_summary(Summarizer_Agent,clean_text)
             return jsonify({"summary": summary}), 200
         except Exception as e:
             return jsonify({"error": f"Summary generation failed: {str(e)}"}), 500
     @app.route("/api/extract_medical_data_from_audio", methods=["POST"])
@@ -1090,6 +1094,7 @@ def register_routes(app, agents):
             import torch
             torch.set_num_threads(2)
             if model_type == "gguf":
                 try:
                     # Support both local path and HuggingFace repo/filename
                     if model_name.endswith('.gguf') and '/' in model_name:
@@ -1098,10 +1103,11 @@ def register_routes(app, agents):
                     else:
                         pipeline = get_gguf_pipeline(model_name)
                     try:
                         # The timeout is now handled internally by the pipeline
                         summary_raw = pipeline.generate_full_summary(prompt, max_tokens=512, max_loops=1)
                         # Extract markdown summary as with other models
                         new_summary = summary_raw.split("Now generate the complete, updated clinical summary with all four sections in a markdown format:")[-1].strip()
                         if not new_summary.strip():
@@ -1114,7 +1120,7 @@ def register_routes(app, agents):
                         validation_report = validate_and_compare_summaries(old_summary, markdown_summary, "Update")
                         # Remove undefined timing variables and only log steps that are actually measured
                         total_time = time.time() - start_total
-                        print(f"[TIMING] API call: {t_api_end-t_api_start:.2f}s, TOTAL: {total_time:.2f}s")
                         return jsonify({
                             "summary": markdown_summary,
                             "validation": validation_report,

     @app.route("/api/generate_summary", methods=["POST"])
     def generate_summary():
+        logger.info("Received request to generate summary.")
         data = request.json
         if not data or "text" not in data or not data["text"].strip():
             return jsonify({"error": "No valid text provided"}), 400
         context = data["text"]
+        logger.info(f"Clean text length: {len(context)} characters.")
         try:
             clean_text = PHIScrubberAgent.scrub_phi(context)
         except Exception:
             clean_text = context
         try:
+            summary = SummarizerAgent.generate_summary(Summarizer_Agent, clean_text)
+            logger.info("Summary generated successfully.")
             return jsonify({"summary": summary}), 200
         except Exception as e:
+            logger.error(f"Summary generation failed: {str(e)}")
             return jsonify({"error": f"Summary generation failed: {str(e)}"}), 500
     @app.route("/api/extract_medical_data_from_audio", methods=["POST"])
             import torch
             torch.set_num_threads(2)
             if model_type == "gguf":
+                logger.info("Using GGUF model for summary generation.")
                 try:
                     # Support both local path and HuggingFace repo/filename
                     if model_name.endswith('.gguf') and '/' in model_name:
                     else:
                         pipeline = get_gguf_pipeline(model_name)
+                    logger.info(f"Prompt length for GGUF model: {len(prompt)} characters.")
                     try:
                         # The timeout is now handled internally by the pipeline
                         summary_raw = pipeline.generate_full_summary(prompt, max_tokens=512, max_loops=1)
                         # Extract markdown summary as with other models
                         new_summary = summary_raw.split("Now generate the complete, updated clinical summary with all four sections in a markdown format:")[-1].strip()
                         if not new_summary.strip():
                         validation_report = validate_and_compare_summaries(old_summary, markdown_summary, "Update")
                         # Remove undefined timing variables and only log steps that are actually measured
                         total_time = time.time() - start_total
+                        logger.info(f"[TIMING] API call: {t_api_end-t_api_start:.2f}s, TOTAL: {total_time:.2f}s")
                         return jsonify({
                             "summary": markdown_summary,
                             "validation": validation_report,

ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ

ai_med_extract/utils/model_loader_gguf.py CHANGED Viewed

@@ -118,10 +118,10 @@ class GGUFModelPipeline:
     def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=None):
         """Generate text with timeout using threading"""
-        # Use environment variable or default timeout (300s for Spaces, 120s otherwise)
         if timeout is None:
             is_hf_space = os.environ.get('SPACE_ID') is not None
-            timeout = int(os.environ.get('GGUF_GENERATION_TIMEOUT', '300' if is_hf_space else '120'))
         def _generate():
             try:
@@ -184,10 +184,12 @@ class GGUFModelPipeline:
         try:
             logger.info(f"[GGUF] Starting full summary generation with max_loops={max_loops}")
             for loop_idx in range(max_loops):
                 loop_start = time.time()
                 logger.info(f"[GGUF] Starting loop {loop_idx+1}/{max_loops}")
                 output = self.generate(current_prompt, max_tokens=max_tokens)
@@ -199,6 +201,7 @@ class GGUFModelPipeline:
                 loop_time = time.time() - loop_start
                 logger.info(f"[GGUF] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
                 # Check if we have all required sections
                 required_present = all(s in full_output for s in ['Clinical Assessment','Key Trends & Changes','Plan & Suggested Actions','Direct Guidance for Physician'])
@@ -213,6 +216,7 @@ class GGUFModelPipeline:
             total_time = time.time() - total_start
             logger.info(f"[GGUF] generate_full_summary completed in {total_time:.2f}s")
             # Final validation check
             if not is_complete(full_output):

     def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=None):
         """Generate text with timeout using threading"""
+        # Use environment variable or default timeout (600s for Spaces, 300s otherwise)
         if timeout is None:
             is_hf_space = os.environ.get('SPACE_ID') is not None
+            timeout = int(os.environ.get('GGUF_GENERATION_TIMEOUT', '600' if is_hf_space else '300'))
         def _generate():
             try:
         try:
             logger.info(f"[GGUF] Starting full summary generation with max_loops={max_loops}")
+            logger.info(f"[GGUF] Prompt length: {len(prompt)} characters")
             for loop_idx in range(max_loops):
                 loop_start = time.time()
                 logger.info(f"[GGUF] Starting loop {loop_idx+1}/{max_loops}")
+                logger.info(f"[GGUF] Current prompt length: {len(current_prompt)} characters")
                 output = self.generate(current_prompt, max_tokens=max_tokens)
                 loop_time = time.time() - loop_start
                 logger.info(f"[GGUF] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
+                logger.info(f"[GGUF] Generated {len(output)} characters in this loop")
                 # Check if we have all required sections
                 required_present = all(s in full_output for s in ['Clinical Assessment','Key Trends & Changes','Plan & Suggested Actions','Direct Guidance for Physician'])
             total_time = time.time() - total_start
             logger.info(f"[GGUF] generate_full_summary completed in {total_time:.2f}s")
+            logger.info(f"[GGUF] Final summary length: {len(full_output)} characters")
             # Final validation check
             if not is_complete(full_output):