Update app.py
Browse files
app.py
CHANGED
@@ -357,16 +357,17 @@ async def process_message(message: str):
|
|
357 |
eos_found = False
|
358 |
|
359 |
start_time = time.time()
|
|
|
|
|
360 |
while current_inputs and not eos_found:
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
]
|
370 |
unique_responses = remove_repetitive_responses(responses)
|
371 |
formatted_response = next(iter(unique_responses.values()))
|
372 |
|
@@ -397,6 +398,7 @@ async def process_message(message: str):
|
|
397 |
current_inputs = formatted_response if len(formatted_response.split()) > 0 else ""
|
398 |
|
399 |
end_time = time.time()
|
|
|
400 |
print(f"Total time taken to process response {end_time-start_time}")
|
401 |
|
402 |
return StreamingResponse(stream_response(inputs), media_type="text/plain")
|
@@ -419,13 +421,57 @@ async def generate_image(prompt: str):
|
|
419 |
|
420 |
image = Image.open(io.BytesIO(image_bytes))
|
421 |
print("Image generated successfully.")
|
422 |
-
|
|
|
|
|
|
|
|
|
|
|
423 |
except Exception as e:
|
424 |
print(f"Error generating image: {e}")
|
425 |
-
return
|
426 |
else:
|
427 |
print("No image model loaded.")
|
428 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
|
431 |
app = FastAPI()
|
@@ -440,19 +486,10 @@ async def generate(request: ChatRequest):
|
|
440 |
|
441 |
@app.post("/generate_image")
|
442 |
async def generate_image_endpoint(request: ImageRequest):
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
image.save(buffered, format="PNG")
|
448 |
-
image_base64 = base64.b64encode(buffered.getvalue()).decode()
|
449 |
-
|
450 |
-
return JSONResponse(content={"image": image_base64})
|
451 |
-
else:
|
452 |
-
return JSONResponse(content={"error": "Image generation failed or no model loaded"})
|
453 |
-
except Exception as e:
|
454 |
-
return JSONResponse(content={"error": str(e)})
|
455 |
-
|
456 |
|
457 |
def run_uvicorn():
|
458 |
try:
|
@@ -460,7 +497,7 @@ def run_uvicorn():
|
|
460 |
except Exception as e:
|
461 |
print(f"Error al ejecutar uvicorn: {e}")
|
462 |
|
463 |
-
|
464 |
if __name__ == "__main__":
|
465 |
Thread(target=run_uvicorn).start()
|
|
|
466 |
asyncio.get_event_loop().run_forever()
|
|
|
357 |
eos_found = False
|
358 |
|
359 |
start_time = time.time()
|
360 |
+
|
361 |
+
executor = ThreadPoolExecutor()
|
362 |
while current_inputs and not eos_found:
|
363 |
+
futures = [
|
364 |
+
executor.submit(generate_model_response, model, current_inputs, max_tokens=max_token_limit)
|
365 |
+
for model in global_data['models'].values()
|
366 |
+
]
|
367 |
+
responses = [
|
368 |
+
{'model': model_name, 'response': future.result()}
|
369 |
+
for model_name, future in zip(global_data['models'].keys(), as_completed(futures))
|
370 |
+
]
|
|
|
371 |
unique_responses = remove_repetitive_responses(responses)
|
372 |
formatted_response = next(iter(unique_responses.values()))
|
373 |
|
|
|
398 |
current_inputs = formatted_response if len(formatted_response.split()) > 0 else ""
|
399 |
|
400 |
end_time = time.time()
|
401 |
+
executor.shutdown(wait=True) # waits for all threads to finish
|
402 |
print(f"Total time taken to process response {end_time-start_time}")
|
403 |
|
404 |
return StreamingResponse(stream_response(inputs), media_type="text/plain")
|
|
|
421 |
|
422 |
image = Image.open(io.BytesIO(image_bytes))
|
423 |
print("Image generated successfully.")
|
424 |
+
|
425 |
+
buffered = io.BytesIO()
|
426 |
+
image.save(buffered, format="PNG")
|
427 |
+
image_base64 = base64.b64encode(buffered.getvalue()).decode()
|
428 |
+
|
429 |
+
return JSONResponse(content={"image": image_base64})
|
430 |
except Exception as e:
|
431 |
print(f"Error generating image: {e}")
|
432 |
+
return JSONResponse(content={"error": str(e)})
|
433 |
else:
|
434 |
print("No image model loaded.")
|
435 |
+
return JSONResponse(content={"error": "No image model loaded"})
|
436 |
+
|
437 |
+
def release_resources():
|
438 |
+
try:
|
439 |
+
torch.cuda.empty_cache()
|
440 |
+
gc.collect()
|
441 |
+
except Exception as e:
|
442 |
+
print(f"Failed to release resources: {e}")
|
443 |
+
|
444 |
+
def resource_manager():
|
445 |
+
MAX_RAM_PERCENT = 10
|
446 |
+
MAX_CPU_PERCENT = 10
|
447 |
+
MAX_GPU_PERCENT = 10
|
448 |
+
MAX_RAM_MB = 1024 # 1GB
|
449 |
+
|
450 |
+
while True:
|
451 |
+
try:
|
452 |
+
virtual_mem = psutil.virtual_memory()
|
453 |
+
current_ram_percent = virtual_mem.percent
|
454 |
+
current_ram_mb = virtual_mem.used / (1024 * 1024) # Convert to MB
|
455 |
+
|
456 |
+
if current_ram_percent > MAX_RAM_PERCENT or current_ram_mb > MAX_RAM_MB:
|
457 |
+
release_resources()
|
458 |
+
|
459 |
+
current_cpu_percent = psutil.cpu_percent()
|
460 |
+
if current_cpu_percent > MAX_CPU_PERCENT:
|
461 |
+
print("CPU usage too high, attempting to reduce nice")
|
462 |
+
p = psutil.Process(os.getpid())
|
463 |
+
p.nice(1)
|
464 |
+
|
465 |
+
if torch.cuda.is_available():
|
466 |
+
gpu = torch.cuda.current_device()
|
467 |
+
gpu_mem = torch.cuda.memory_percent(gpu)
|
468 |
+
|
469 |
+
if gpu_mem > MAX_GPU_PERCENT:
|
470 |
+
release_resources()
|
471 |
+
|
472 |
+
time.sleep(10) # Check every 10 seconds
|
473 |
+
except Exception as e:
|
474 |
+
print(f"Error in resource manager: {e}")
|
475 |
|
476 |
|
477 |
app = FastAPI()
|
|
|
486 |
|
487 |
@app.post("/generate_image")
|
488 |
async def generate_image_endpoint(request: ImageRequest):
|
489 |
+
try:
|
490 |
+
return await generate_image(request.prompt)
|
491 |
+
except Exception as e:
|
492 |
+
return JSONResponse(content={"error": str(e)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
|
494 |
def run_uvicorn():
|
495 |
try:
|
|
|
497 |
except Exception as e:
|
498 |
print(f"Error al ejecutar uvicorn: {e}")
|
499 |
|
|
|
500 |
if __name__ == "__main__":
|
501 |
Thread(target=run_uvicorn).start()
|
502 |
+
Thread(target=resource_manager, daemon=True).start() # Run resource manager in background
|
503 |
asyncio.get_event_loop().run_forever()
|