sachinchandrankallar commited on
Commit
625f09f
·
1 Parent(s): c731bd4

model loader time out fixing

Browse files
TODO_PROGRESS.md CHANGED
@@ -1,16 +1,23 @@
1
- # GGUF Timeout Fix Progress
2
 
3
- ## Steps Completed:
4
- 1. Increased GGUF timeout from 120s to 300s for Hugging Face Spaces
5
- 2. Made timeout configurable via GGUF_GENERATION_TIMEOUT environment variable
6
- 3. Updated both `_generate_with_timeout` and `generate` methods to use configurable timeout
 
 
7
 
8
- ## Next Steps:
9
- 4. Add better error handling and fallback mechanisms in routes.py
10
- 5. Optimize GGUF model parameters for better performance on Spaces
11
- 6. Add progress logging for generation
12
- 7. Test the changes
13
 
14
- ## Environment Configuration:
15
- - Default timeout: 300s for Spaces, 120s for local development
16
- - Configurable via: GGUF_GENERATION_TIMEOUT environment variable
 
 
 
 
 
 
1
+ # GGUF Model Timeout Fix - Progress Tracking
2
 
3
+ ## Plan Overview
4
+ 1. Increase timeout settings in GGUFModelPipeline
5
+ 2. Optimize model settings for Hugging Face Spaces
6
+ 3. Add detailed logging for generation process
7
+ 4. Ensure robust fallback mechanism
8
+ 5. Test the changes
9
 
10
+ ## Steps Completed
11
+ - [x] 1. Update timeout settings in model_loader_gguf.py
12
+ - [ ] 2. Optimize model parameters for Spaces environment
13
+ - [ ] 3. Add comprehensive logging to track generation timing
14
+ - [ ] 4. Test the changes with patient summary generation API
15
 
16
+ ## Files to Modify
17
+ - ai_med_extract/utils/model_loader_gguf.py
18
+ - ai_med_extract/api/routes.py
19
+
20
+ ## Testing
21
+ - [ ] Test patient summary generation locally
22
+ - [ ] Test on Hugging Face Spaces deployment
23
+ - [ ] Monitor logs for timeout issues
ai_med_extract/__pycache__/app.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/__pycache__/app.cpython-311.pyc and b/ai_med_extract/__pycache__/app.cpython-311.pyc differ
 
ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ
 
ai_med_extract/api/__pycache__/routes.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/api/__pycache__/routes.cpython-311.pyc and b/ai_med_extract/api/__pycache__/routes.cpython-311.pyc differ
 
ai_med_extract/api/routes.py CHANGED
@@ -876,18 +876,22 @@ def register_routes(app, agents):
876
 
877
  @app.route("/api/generate_summary", methods=["POST"])
878
  def generate_summary():
 
879
  data = request.json
880
  if not data or "text" not in data or not data["text"].strip():
881
  return jsonify({"error": "No valid text provided"}), 400
882
  context = data["text"]
 
883
  try:
884
  clean_text = PHIScrubberAgent.scrub_phi(context)
885
  except Exception:
886
  clean_text = context
887
  try:
888
- summary = SummarizerAgent.generate_summary(Summarizer_Agent,clean_text)
 
889
  return jsonify({"summary": summary}), 200
890
  except Exception as e:
 
891
  return jsonify({"error": f"Summary generation failed: {str(e)}"}), 500
892
 
893
  @app.route("/api/extract_medical_data_from_audio", methods=["POST"])
@@ -1090,6 +1094,7 @@ def register_routes(app, agents):
1090
  import torch
1091
  torch.set_num_threads(2)
1092
  if model_type == "gguf":
 
1093
  try:
1094
  # Support both local path and HuggingFace repo/filename
1095
  if model_name.endswith('.gguf') and '/' in model_name:
@@ -1098,10 +1103,11 @@ def register_routes(app, agents):
1098
  else:
1099
  pipeline = get_gguf_pipeline(model_name)
1100
 
 
 
1101
  try:
1102
  # The timeout is now handled internally by the pipeline
1103
  summary_raw = pipeline.generate_full_summary(prompt, max_tokens=512, max_loops=1)
1104
-
1105
  # Extract markdown summary as with other models
1106
  new_summary = summary_raw.split("Now generate the complete, updated clinical summary with all four sections in a markdown format:")[-1].strip()
1107
  if not new_summary.strip():
@@ -1114,7 +1120,7 @@ def register_routes(app, agents):
1114
  validation_report = validate_and_compare_summaries(old_summary, markdown_summary, "Update")
1115
  # Remove undefined timing variables and only log steps that are actually measured
1116
  total_time = time.time() - start_total
1117
- print(f"[TIMING] API call: {t_api_end-t_api_start:.2f}s, TOTAL: {total_time:.2f}s")
1118
  return jsonify({
1119
  "summary": markdown_summary,
1120
  "validation": validation_report,
 
876
 
877
  @app.route("/api/generate_summary", methods=["POST"])
878
  def generate_summary():
879
+ logger.info("Received request to generate summary.")
880
  data = request.json
881
  if not data or "text" not in data or not data["text"].strip():
882
  return jsonify({"error": "No valid text provided"}), 400
883
  context = data["text"]
884
+ logger.info(f"Clean text length: {len(context)} characters.")
885
  try:
886
  clean_text = PHIScrubberAgent.scrub_phi(context)
887
  except Exception:
888
  clean_text = context
889
  try:
890
+ summary = SummarizerAgent.generate_summary(Summarizer_Agent, clean_text)
891
+ logger.info("Summary generated successfully.")
892
  return jsonify({"summary": summary}), 200
893
  except Exception as e:
894
+ logger.error(f"Summary generation failed: {str(e)}")
895
  return jsonify({"error": f"Summary generation failed: {str(e)}"}), 500
896
 
897
  @app.route("/api/extract_medical_data_from_audio", methods=["POST"])
 
1094
  import torch
1095
  torch.set_num_threads(2)
1096
  if model_type == "gguf":
1097
+ logger.info("Using GGUF model for summary generation.")
1098
  try:
1099
  # Support both local path and HuggingFace repo/filename
1100
  if model_name.endswith('.gguf') and '/' in model_name:
 
1103
  else:
1104
  pipeline = get_gguf_pipeline(model_name)
1105
 
1106
+ logger.info(f"Prompt length for GGUF model: {len(prompt)} characters.")
1107
+
1108
  try:
1109
  # The timeout is now handled internally by the pipeline
1110
  summary_raw = pipeline.generate_full_summary(prompt, max_tokens=512, max_loops=1)
 
1111
  # Extract markdown summary as with other models
1112
  new_summary = summary_raw.split("Now generate the complete, updated clinical summary with all four sections in a markdown format:")[-1].strip()
1113
  if not new_summary.strip():
 
1120
  validation_report = validate_and_compare_summaries(old_summary, markdown_summary, "Update")
1121
  # Remove undefined timing variables and only log steps that are actually measured
1122
  total_time = time.time() - start_total
1123
+ logger.info(f"[TIMING] API call: {t_api_end-t_api_start:.2f}s, TOTAL: {total_time:.2f}s")
1124
  return jsonify({
1125
  "summary": markdown_summary,
1126
  "validation": validation_report,
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ
 
ai_med_extract/utils/model_loader_gguf.py CHANGED
@@ -118,10 +118,10 @@ class GGUFModelPipeline:
118
 
119
  def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=None):
120
  """Generate text with timeout using threading"""
121
- # Use environment variable or default timeout (300s for Spaces, 120s otherwise)
122
  if timeout is None:
123
  is_hf_space = os.environ.get('SPACE_ID') is not None
124
- timeout = int(os.environ.get('GGUF_GENERATION_TIMEOUT', '300' if is_hf_space else '120'))
125
 
126
  def _generate():
127
  try:
@@ -184,10 +184,12 @@ class GGUFModelPipeline:
184
 
185
  try:
186
  logger.info(f"[GGUF] Starting full summary generation with max_loops={max_loops}")
 
187
 
188
  for loop_idx in range(max_loops):
189
  loop_start = time.time()
190
  logger.info(f"[GGUF] Starting loop {loop_idx+1}/{max_loops}")
 
191
 
192
  output = self.generate(current_prompt, max_tokens=max_tokens)
193
 
@@ -199,6 +201,7 @@ class GGUFModelPipeline:
199
  loop_time = time.time() - loop_start
200
 
201
  logger.info(f"[GGUF] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
 
202
 
203
  # Check if we have all required sections
204
  required_present = all(s in full_output for s in ['Clinical Assessment','Key Trends & Changes','Plan & Suggested Actions','Direct Guidance for Physician'])
@@ -213,6 +216,7 @@ class GGUFModelPipeline:
213
 
214
  total_time = time.time() - total_start
215
  logger.info(f"[GGUF] generate_full_summary completed in {total_time:.2f}s")
 
216
 
217
  # Final validation check
218
  if not is_complete(full_output):
 
118
 
119
  def _generate_with_timeout(self, prompt, max_tokens=512, temperature=0.5, top_p=0.95, timeout=None):
120
  """Generate text with timeout using threading"""
121
+ # Use environment variable or default timeout (600s for Spaces, 300s otherwise)
122
  if timeout is None:
123
  is_hf_space = os.environ.get('SPACE_ID') is not None
124
+ timeout = int(os.environ.get('GGUF_GENERATION_TIMEOUT', '600' if is_hf_space else '300'))
125
 
126
  def _generate():
127
  try:
 
184
 
185
  try:
186
  logger.info(f"[GGUF] Starting full summary generation with max_loops={max_loops}")
187
+ logger.info(f"[GGUF] Prompt length: {len(prompt)} characters")
188
 
189
  for loop_idx in range(max_loops):
190
  loop_start = time.time()
191
  logger.info(f"[GGUF] Starting loop {loop_idx+1}/{max_loops}")
192
+ logger.info(f"[GGUF] Current prompt length: {len(current_prompt)} characters")
193
 
194
  output = self.generate(current_prompt, max_tokens=max_tokens)
195
 
 
201
  loop_time = time.time() - loop_start
202
 
203
  logger.info(f"[GGUF] loop {loop_idx+1}/{max_loops}: {loop_time:.2f}s, cumulative {time.time()-total_start:.2f}s, length={len(full_output)} chars")
204
+ logger.info(f"[GGUF] Generated {len(output)} characters in this loop")
205
 
206
  # Check if we have all required sections
207
  required_present = all(s in full_output for s in ['Clinical Assessment','Key Trends & Changes','Plan & Suggested Actions','Direct Guidance for Physician'])
 
216
 
217
  total_time = time.time() - total_start
218
  logger.info(f"[GGUF] generate_full_summary completed in {total_time:.2f}s")
219
+ logger.info(f"[GGUF] Final summary length: {len(full_output)} characters")
220
 
221
  # Final validation check
222
  if not is_complete(full_output):