Spaces:
Paused
Paused
Commit
·
625f09f
1
Parent(s):
c731bd4
model loader time out fixing
Browse files- TODO_PROGRESS.md +20 -13
- ai_med_extract/__pycache__/app.cpython-311.pyc +0 -0
- ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc +0 -0
- ai_med_extract/api/__pycache__/routes.cpython-311.pyc +0 -0
- ai_med_extract/api/routes.py +9 -3
- ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc +0 -0
- ai_med_extract/utils/model_loader_gguf.py +6 -2
TODO_PROGRESS.md
CHANGED
|
@@ -1,16 +1,23 @@
|
|
| 1 |
-
# GGUF Timeout Fix Progress
|
| 2 |
|
| 3 |
-
##
|
| 4 |
-
1.
|
| 5 |
-
2.
|
| 6 |
-
3.
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
##
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
-
##
|
| 15 |
-
-
|
| 16 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GGUF Model Timeout Fix - Progress Tracking
|
| 2 |
|
| 3 |
+
## Plan Overview
|
| 4 |
+
1. Increase timeout settings in GGUFModelPipeline
|
| 5 |
+
2. Optimize model settings for Hugging Face Spaces
|
| 6 |
+
3. Add detailed logging for generation process
|
| 7 |
+
4. Ensure robust fallback mechanism
|
| 8 |
+
5. Test the changes
|
| 9 |
|
| 10 |
+
## Steps Completed
|
| 11 |
+
- [x] 1. Update timeout settings in model_loader_gguf.py
|
| 12 |
+
- [ ] 2. Optimize model parameters for Spaces environment
|
| 13 |
+
- [ ] 3. Add comprehensive logging to track generation timing
|
| 14 |
+
- [ ] 4. Test the changes with patient summary generation API
|
| 15 |
|
| 16 |
+
## Files to Modify
|
| 17 |
+
- ai_med_extract/utils/model_loader_gguf.py
|
| 18 |
+
- ai_med_extract/api/routes.py
|
| 19 |
+
|
| 20 |
+
## Testing
|
| 21 |
+
- [ ] Test patient summary generation locally
|
| 22 |
+
- [ ] Test on Hugging Face Spaces deployment
|
| 23 |
+
- [ ] Monitor logs for timeout issues
|
ai_med_extract/__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/__pycache__/app.cpython-311.pyc and b/ai_med_extract/__pycache__/app.cpython-311.pyc differ
|
|
|
ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ
|
|
|
ai_med_extract/api/__pycache__/routes.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/api/__pycache__/routes.cpython-311.pyc and b/ai_med_extract/api/__pycache__/routes.cpython-311.pyc differ
|
|
|
ai_med_extract/api/routes.py
CHANGED
|
@@ -876,18 +876,22 @@ def register_routes(app, agents):
|
|
| 876 |
|
| 877 |
@app.route("/api/generate_summary", methods=["POST"])
|
| 878 |
def generate_summary():
|
|
|
|
| 879 |
data = request.json
|
| 880 |
if not data or "text" not in data or not data["text"].strip():
|
| 881 |
return jsonify({"error": "No valid text provided"}), 400
|
| 882 |
context = data["text"]
|
|
|
|
| 883 |
try:
|
| 884 |
clean_text = PHIScrubberAgent.scrub_phi(context)
|
| 885 |
except Exception:
|
| 886 |
clean_text = context
|
| 887 |
try:
|
| 888 |
-
summary = SummarizerAgent.generate_summary(Summarizer_Agent,clean_text)
|
|
|
|
| 889 |
return jsonify({"summary": summary}), 200
|
| 890 |
except Exception as e:
|
|
|
|
| 891 |
return jsonify({"error": f"Summary generation failed: {str(e)}"}), 500
|
| 892 |
|
| 893 |
@app.route("/api/extract_medical_data_from_audio", methods=["POST"])
|
|
@@ -1090,6 +1094,7 @@ def register_routes(app, agents):
|
|
| 1090 |
import torch
|
| 1091 |
torch.set_num_threads(2)
|
| 1092 |
if model_type == "gguf":
|
|
|
|
| 1093 |
try:
|
| 1094 |
# Support both local path and HuggingFace repo/filename
|
| 1095 |
if model_name.endswith('.gguf') and '/' in model_name:
|
|
@@ -1098,10 +1103,11 @@ def register_routes(app, agents):
|
|
| 1098 |
else:
|
| 1099 |
pipeline = get_gguf_pipeline(model_name)
|
| 1100 |
|
|
|
|
|
|
|
| 1101 |
try:
|
| 1102 |
# The timeout is now handled internally by the pipeline
|
| 1103 |
summary_raw = pipeline.generate_full_summary(prompt, max_tokens=512, max_loops=1)
|
| 1104 |
-
|
| 1105 |
# Extract markdown summary as with other models
|
| 1106 |
new_summary = summary_raw.split("Now generate the complete, updated clinical summary with all four sections in a markdown format:")[-1].strip()
|
| 1107 |
if not new_summary.strip():
|
|
@@ -1114,7 +1120,7 @@ def register_routes(app, agents):
|
|
| 1114 |
validation_report = validate_and_compare_summaries(old_summary, markdown_summary, "Update")
|
| 1115 |
# Remove undefined timing variables and only log steps that are actually measured
|
| 1116 |
total_time = time.time() - start_total
|
| 1117 |
-
|
| 1118 |
return jsonify({
|
| 1119 |
"summary": markdown_summary,
|
| 1120 |
"validation": validation_report,
|
|
|
|
| 876 |
|
| 877 |
@app.route("/api/generate_summary", methods=["POST"])
|
| 878 |
def generate_summary():
|
| 879 |
+
logger.info("Received request to generate summary.")
|
| 880 |
data = request.json
|
| 881 |
if not data or "text" not in data or not data["text"].strip():
|
| 882 |
return jsonify({"error": "No valid text provided"}), 400
|
| 883 |
context = data["text"]
|
| 884 |
+
logger.info(f"Clean text length: {len(context)} characters.")
|
| 885 |
try:
|
| 886 |
clean_text = PHIScrubberAgent.scrub_phi(context)
|
| 887 |
except Exception:
|
| 888 |
clean_text = context
|
| 889 |
try:
|
| 890 |
+
summary = SummarizerAgent.generate_summary(Summarizer_Agent, clean_text)
|
| 891 |
+
logger.info("Summary generated successfully.")
|
| 892 |
return jsonify({"summary": summary}), 200
|
| 893 |
except Exception as e:
|
| 894 |
+
logger.error(f"Summary generation failed: {str(e)}")
|
| 895 |
return jsonify({"error": f"Summary generation failed: {str(e)}"}), 500
|
| 896 |
|
| 897 |
@app.route("/api/extract_medical_data_from_audio", methods=["POST"])
|
|
|
|
| 1094 |
import torch
|
| 1095 |
torch.set_num_threads(2)
|
| 1096 |
if model_type == "gguf":
|
| 1097 |
+
logger.info("Using GGUF model for summary generation.")
|
| 1098 |
try:
|
| 1099 |
# Support both local path and HuggingFace repo/filename
|
| 1100 |
if model_name.endswith('.gguf') and '/' in model_name:
|
|
|
|
| 1103 |
else:
|
| 1104 |
pipeline = get_gguf_pipeline(model_name)
|
| 1105 |
|
| 1106 |
+
logger.info(f"Prompt length for GGUF model: {len(prompt)} characters.")
|
| 1107 |
+
|
| 1108 |
try:
|
| 1109 |
# The timeout is now handled internally by the pipeline
|
| 1110 |
summary_raw = pipeline.generate_full_summary(prompt, max_tokens=512, max_loops=1)
|
|
|
|
| 1111 |
# Extract markdown summary as with other models
|
| 1112 |
new_summary = summary_raw.split("Now generate the complete, updated clinical summary with all four sections in a markdown format:")[-1].strip()
|
| 1113 |
if not new_summary.strip():
|
|
|
|
| 1120 |
validation_report = validate_and_compare_summaries(old_summary, markdown_summary, "Update")
|
| 1121 |
# Remove undefined timing variables and only log steps that are actually measured
|
| 1122 |
total_time = time.time() - start_total
|
| 1123 |
+
logger.info(f"[TIMING] API call: {t_api_end-t_api_start:.2f}s, TOTAL: {total_time:.2f}s")
|
| 1124 |
return jsonify({
|
| 1125 |
"summary": markdown_summary,
|
| 1126 |
"validation": validation_report,
|
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc
CHANGED
|
Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ
|
|
|
ai_med_extract/utils/model_loader_gguf.py
CHANGED
|
@@ -118,10 +118,10 @@ class GGUFModelPipeline:
|
|
| 118 |
|
| 119 |
def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=None):
|
| 120 |
"""Generate text with timeout using threading"""
|
| 121 |
-
# Use environment variable or default timeout (
|
| 122 |
if timeout is None:
|
| 123 |
is_hf_space = os.environ.get('SPACE_ID') is not None
|
| 124 |
-
timeout = int(os.environ.get('GGUF_GENERATION_TIMEOUT', '
|
| 125 |
|
| 126 |
def _generate():
|
| 127 |
try:
|
|
@@ -184,10 +184,12 @@ class GGUFModelPipeline:
|
|
| 184 |
|
| 185 |
try:
|
| 186 |
logger.info(f"[GGUF] Starting full summary generation with max_loops={max_loops}")
|
|
|
|
| 187 |
|
| 188 |
for loop_idx in range(max_loops):
|
| 189 |
loop_start = time.time()
|
| 190 |
logger.info(f"[GGUF] Starting loop {loop_idx+1}/{max_loops}")
|
|
|
|
| 191 |
|
| 192 |
output = self.generate(current_prompt, max_tokens=max_tokens)
|
| 193 |
|
|
@@ -199,6 +201,7 @@ class GGUFModelPipeline:
|
|
| 199 |
loop_time = time.time() - loop_start
|
| 200 |
|
| 201 |
logger.info(f"[GGUF] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
|
|
|
|
| 202 |
|
| 203 |
# Check if we have all required sections
|
| 204 |
required_present = all(s in full_output for s in ['Clinical Assessment','Key Trends & Changes','Plan & Suggested Actions','Direct Guidance for Physician'])
|
|
@@ -213,6 +216,7 @@ class GGUFModelPipeline:
|
|
| 213 |
|
| 214 |
total_time = time.time() - total_start
|
| 215 |
logger.info(f"[GGUF] generate_full_summary completed in {total_time:.2f}s")
|
|
|
|
| 216 |
|
| 217 |
# Final validation check
|
| 218 |
if not is_complete(full_output):
|
|
|
|
| 118 |
|
| 119 |
def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=None):
|
| 120 |
"""Generate text with timeout using threading"""
|
| 121 |
+
# Use environment variable or default timeout (600s for Spaces, 300s otherwise)
|
| 122 |
if timeout is None:
|
| 123 |
is_hf_space = os.environ.get('SPACE_ID') is not None
|
| 124 |
+
timeout = int(os.environ.get('GGUF_GENERATION_TIMEOUT', '600' if is_hf_space else '300'))
|
| 125 |
|
| 126 |
def _generate():
|
| 127 |
try:
|
|
|
|
| 184 |
|
| 185 |
try:
|
| 186 |
logger.info(f"[GGUF] Starting full summary generation with max_loops={max_loops}")
|
| 187 |
+
logger.info(f"[GGUF] Prompt length: {len(prompt)} characters")
|
| 188 |
|
| 189 |
for loop_idx in range(max_loops):
|
| 190 |
loop_start = time.time()
|
| 191 |
logger.info(f"[GGUF] Starting loop {loop_idx+1}/{max_loops}")
|
| 192 |
+
logger.info(f"[GGUF] Current prompt length: {len(current_prompt)} characters")
|
| 193 |
|
| 194 |
output = self.generate(current_prompt, max_tokens=max_tokens)
|
| 195 |
|
|
|
|
| 201 |
loop_time = time.time() - loop_start
|
| 202 |
|
| 203 |
logger.info(f"[GGUF] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
|
| 204 |
+
logger.info(f"[GGUF] Generated {len(output)} characters in this loop")
|
| 205 |
|
| 206 |
# Check if we have all required sections
|
| 207 |
required_present = all(s in full_output for s in ['Clinical Assessment','Key Trends & Changes','Plan & Suggested Actions','Direct Guidance for Physician'])
|
|
|
|
| 216 |
|
| 217 |
total_time = time.time() - total_start
|
| 218 |
logger.info(f"[GGUF] generate_full_summary completed in {total_time:.2f}s")
|
| 219 |
+
logger.info(f"[GGUF] Final summary length: {len(full_output)} characters")
|
| 220 |
|
| 221 |
# Final validation check
|
| 222 |
if not is_complete(full_output):
|