Spaces:

IntellijMind
/

chat-llm

Runtime error

Threatthriver commited on Sep 20, 2024

Commit

b702fe6

verified ·

1 Parent(s): 2b2a5d6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load the tokenizer and model (lightweight model as per your suggestion)
@@ -9,6 +12,18 @@ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instr
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
 def generate_response(message, history, max_tokens, temperature, top_p):
     """
     Generates a response from the model.

 import gradio as gr
 import torch
+import gc
+import threading
+import time
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load the tokenizer and model (lightweight model as per your suggestion)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
+# Function to clean up memory
+def clean_memory():
+    while True:
+        gc.collect()  # Free up CPU memory
+        if device == "cuda":
+            torch.cuda.empty_cache()  # Free up GPU memory
+        time.sleep(1)  # Clean every second
+# Start memory cleanup in a background thread
+cleanup_thread = threading.Thread(target=clean_memory, daemon=True)
+cleanup_thread.start()
 def generate_response(message, history, max_tokens, temperature, top_p):
     """
     Generates a response from the model.