xfcxcxcdfdfd commited on
Commit
99af063
·
verified ·
1 Parent(s): 295803f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -26
app.py CHANGED
@@ -357,16 +357,17 @@ async def process_message(message: str):
357
  eos_found = False
358
 
359
  start_time = time.time()
 
 
360
  while current_inputs and not eos_found:
361
- with ThreadPoolExecutor() as executor:
362
- futures = [
363
- executor.submit(generate_model_response, model, current_inputs, max_tokens=max_token_limit)
364
- for model in global_data['models'].values()
365
- ]
366
- responses = [
367
- {'model': model_name, 'response': future.result()}
368
- for model_name, future in zip(global_data['models'].keys(), as_completed(futures))
369
- ]
370
  unique_responses = remove_repetitive_responses(responses)
371
  formatted_response = next(iter(unique_responses.values()))
372
 
@@ -397,6 +398,7 @@ async def process_message(message: str):
397
  current_inputs = formatted_response if len(formatted_response.split()) > 0 else ""
398
 
399
  end_time = time.time()
 
400
  print(f"Total time taken to process response {end_time-start_time}")
401
 
402
  return StreamingResponse(stream_response(inputs), media_type="text/plain")
@@ -419,13 +421,57 @@ async def generate_image(prompt: str):
419
 
420
  image = Image.open(io.BytesIO(image_bytes))
421
  print("Image generated successfully.")
422
- return image
 
 
 
 
 
423
  except Exception as e:
424
  print(f"Error generating image: {e}")
425
- return None
426
  else:
427
  print("No image model loaded.")
428
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
 
431
  app = FastAPI()
@@ -440,19 +486,10 @@ async def generate(request: ChatRequest):
440
 
441
  @app.post("/generate_image")
442
  async def generate_image_endpoint(request: ImageRequest):
443
- try:
444
- image = await generate_image(request.prompt)
445
- if image:
446
- buffered = io.BytesIO()
447
- image.save(buffered, format="PNG")
448
- image_base64 = base64.b64encode(buffered.getvalue()).decode()
449
-
450
- return JSONResponse(content={"image": image_base64})
451
- else:
452
- return JSONResponse(content={"error": "Image generation failed or no model loaded"})
453
- except Exception as e:
454
- return JSONResponse(content={"error": str(e)})
455
-
456
 
457
  def run_uvicorn():
458
  try:
@@ -460,7 +497,7 @@ def run_uvicorn():
460
  except Exception as e:
461
  print(f"Error al ejecutar uvicorn: {e}")
462
 
463
-
464
  if __name__ == "__main__":
465
  Thread(target=run_uvicorn).start()
 
466
  asyncio.get_event_loop().run_forever()
 
357
  eos_found = False
358
 
359
  start_time = time.time()
360
+
361
+ executor = ThreadPoolExecutor()
362
  while current_inputs and not eos_found:
363
+ futures = [
364
+ executor.submit(generate_model_response, model, current_inputs, max_tokens=max_token_limit)
365
+ for model in global_data['models'].values()
366
+ ]
367
+ responses = [
368
+ {'model': model_name, 'response': future.result()}
369
+ for model_name, future in zip(global_data['models'].keys(), as_completed(futures))
370
+ ]
 
371
  unique_responses = remove_repetitive_responses(responses)
372
  formatted_response = next(iter(unique_responses.values()))
373
 
 
398
  current_inputs = formatted_response if len(formatted_response.split()) > 0 else ""
399
 
400
  end_time = time.time()
401
+ executor.shutdown(wait=True) # waits for all threads to finish
402
  print(f"Total time taken to process response {end_time-start_time}")
403
 
404
  return StreamingResponse(stream_response(inputs), media_type="text/plain")
 
421
 
422
  image = Image.open(io.BytesIO(image_bytes))
423
  print("Image generated successfully.")
424
+
425
+ buffered = io.BytesIO()
426
+ image.save(buffered, format="PNG")
427
+ image_base64 = base64.b64encode(buffered.getvalue()).decode()
428
+
429
+ return JSONResponse(content={"image": image_base64})
430
  except Exception as e:
431
  print(f"Error generating image: {e}")
432
+ return JSONResponse(content={"error": str(e)})
433
  else:
434
  print("No image model loaded.")
435
+ return JSONResponse(content={"error": "No image model loaded"})
436
+
437
+ def release_resources():
438
+ try:
439
+ torch.cuda.empty_cache()
440
+ gc.collect()
441
+ except Exception as e:
442
+ print(f"Failed to release resources: {e}")
443
+
444
+ def resource_manager():
445
+ MAX_RAM_PERCENT = 10
446
+ MAX_CPU_PERCENT = 10
447
+ MAX_GPU_PERCENT = 10
448
+ MAX_RAM_MB = 1024 # 1GB
449
+
450
+ while True:
451
+ try:
452
+ virtual_mem = psutil.virtual_memory()
453
+ current_ram_percent = virtual_mem.percent
454
+ current_ram_mb = virtual_mem.used / (1024 * 1024) # Convert to MB
455
+
456
+ if current_ram_percent > MAX_RAM_PERCENT or current_ram_mb > MAX_RAM_MB:
457
+ release_resources()
458
+
459
+ current_cpu_percent = psutil.cpu_percent()
460
+ if current_cpu_percent > MAX_CPU_PERCENT:
461
+ print("CPU usage too high, attempting to reduce nice")
462
+ p = psutil.Process(os.getpid())
463
+ p.nice(1)
464
+
465
+ if torch.cuda.is_available():
466
+ gpu = torch.cuda.current_device()
467
+ gpu_mem = torch.cuda.memory_percent(gpu)
468
+
469
+ if gpu_mem > MAX_GPU_PERCENT:
470
+ release_resources()
471
+
472
+ time.sleep(10) # Check every 10 seconds
473
+ except Exception as e:
474
+ print(f"Error in resource manager: {e}")
475
 
476
 
477
  app = FastAPI()
 
486
 
487
  @app.post("/generate_image")
488
  async def generate_image_endpoint(request: ImageRequest):
489
+ try:
490
+ return await generate_image(request.prompt)
491
+ except Exception as e:
492
+ return JSONResponse(content={"error": str(e)})
 
 
 
 
 
 
 
 
 
493
 
494
  def run_uvicorn():
495
  try:
 
497
  except Exception as e:
498
  print(f"Error al ejecutar uvicorn: {e}")
499
 
 
500
  if __name__ == "__main__":
501
  Thread(target=run_uvicorn).start()
502
+ Thread(target=resource_manager, daemon=True).start() # Run resource manager in background
503
  asyncio.get_event_loop().run_forever()