Threatthriver commited on
Commit
b702fe6
·
verified ·
1 Parent(s): 2b2a5d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -0
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import gradio as gr
2
  import torch
 
 
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
  # Load the tokenizer and model (lightweight model as per your suggestion)
@@ -9,6 +12,18 @@ model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instr
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
  model = model.to(device)
11
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def generate_response(message, history, max_tokens, temperature, top_p):
13
  """
14
  Generates a response from the model.
 
1
  import gradio as gr
2
  import torch
3
+ import gc
4
+ import threading
5
+ import time
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
  # Load the tokenizer and model (lightweight model as per your suggestion)
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model = model.to(device)
14
 
15
+ # Function to clean up memory
16
+ def clean_memory():
17
+ while True:
18
+ gc.collect() # Free up CPU memory
19
+ if device == "cuda":
20
+ torch.cuda.empty_cache() # Free up GPU memory
21
+ time.sleep(1) # Clean every second
22
+
23
+ # Start memory cleanup in a background thread
24
+ cleanup_thread = threading.Thread(target=clean_memory, daemon=True)
25
+ cleanup_thread.start()
26
+
27
  def generate_response(message, history, max_tokens, temperature, top_p):
28
  """
29
  Generates a response from the model.