Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
|
|
|
|
|
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
|
5 |
# Load the tokenizer and model (lightweight model as per your suggestion)
|
@@ -9,6 +12,18 @@ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instr
|
|
9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
model = model.to(device)
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def generate_response(message, history, max_tokens, temperature, top_p):
|
13 |
"""
|
14 |
Generates a response from the model.
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
import gc
|
4 |
+
import threading
|
5 |
+
import time
|
6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
|
8 |
# Load the tokenizer and model (lightweight model as per your suggestion)
|
|
|
12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
model = model.to(device)
|
14 |
|
15 |
+
# Function to clean up memory
|
16 |
+
def clean_memory():
|
17 |
+
while True:
|
18 |
+
gc.collect() # Free up CPU memory
|
19 |
+
if device == "cuda":
|
20 |
+
torch.cuda.empty_cache() # Free up GPU memory
|
21 |
+
time.sleep(1) # Clean every second
|
22 |
+
|
23 |
+
# Start memory cleanup in a background thread
|
24 |
+
cleanup_thread = threading.Thread(target=clean_memory, daemon=True)
|
25 |
+
cleanup_thread.start()
|
26 |
+
|
27 |
def generate_response(message, history, max_tokens, temperature, top_p):
|
28 |
"""
|
29 |
Generates a response from the model.
|