bobpopboom commited on
Commit
0b2dc4c
·
verified ·
1 Parent(s): 8c068ee

ok can i still see xD

Browse files
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -2,6 +2,11 @@ import gradio as gr
2
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
 
 
 
 
 
5
  model_id = "thrishala/mental_health_chatbot"
6
 
7
  try:
@@ -13,25 +18,42 @@ try:
13
  max_memory={"cpu": "15GB"},
14
  offload_folder="offload",
15
  )
16
-
17
  tokenizer = AutoTokenizer.from_pretrained(model_id)
18
  tokenizer.model_max_length = 512 # Set maximum length
 
19
 
20
- pipe = pipeline(
21
- "text-generation",
22
- model=model,
23
- tokenizer=tokenizer,
24
- torch_dtype=torch.float16,
25
- num_return_sequences=1,
26
- do_sample=False,
27
- truncation=True,
28
- max_new_tokens=128
29
- )
30
 
31
  except Exception as e:
32
  print(f"Error loading model: {e}")
33
  exit()
 
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def respond(
36
  message,
37
  history,
@@ -45,15 +67,15 @@ def respond(
45
  prompt += f"User: {message}\nAssistant:"
46
 
47
  try:
48
- response = pipe(
49
- prompt,
50
- max_new_tokens=max_tokens,
51
- do_sample=False,
52
- eos_token_id=tokenizer.eos_token_id, # Use EOS token to stop generation
53
- )[0]["generated_text"]
54
 
55
  # Extract only the new assistant response after the last Assistant: in the prompt
56
- bot_response = response[len(prompt):].split("User:")[0].strip() # Take text after prompt and before next User
57
  yield bot_response
58
  except Exception as e:
59
  print(f"Error during generation: {e}")
 
2
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
+ if torch.cuda.is_available():
6
+ device = "cuda"
7
+ else:
8
+ device = "cpu"
9
+
10
  model_id = "thrishala/mental_health_chatbot"
11
 
12
  try:
 
18
  max_memory={"cpu": "15GB"},
19
  offload_folder="offload",
20
  )
21
+ model.to(device)
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  tokenizer.model_max_length = 512 # Set maximum length
24
+ # ok this is just to slow with pipe i wish it was faster. Si were ren=moving pipe in favor of local model
25
 
26
+ # pipe = pipeline(
27
+ # "text-generation",
28
+ # model=model,
29
+ # tokenizer=tokenizer,
30
+ # torch_dtype=torch.float16,
31
+ # num_return_sequences=1,
32
+ # do_sample=False,
33
+ # truncation=True,
34
+ # max_new_tokens=128
35
+ # )
36
 
37
  except Exception as e:
38
  print(f"Error loading model: {e}")
39
  exit()
40
+ def generate_text(prompt, max_new_tokens=128):
41
+ input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device) #Move input to the same device as the model
42
 
43
+ with torch.no_grad(): #Disable gradients during inference
44
+ output = model.generate(
45
+ input_ids=input_ids,
46
+ max_new_tokens=max_new_tokens,
47
+ do_sample=False, # Or True for sampling
48
+ eos_token_id=tokenizer.eos_token_id, # Use EOS token to stop generation
49
+ )[0]["generated_text"]
50
+
51
+ # Extract only the new assistant response after the last Assistant: in the prompt
52
+ bot_response = response[len(prompt):].split("User:")[0].strip() # Take text after prompt and before next User
53
+ )
54
+
55
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
56
+ return generated_text
57
  def respond(
58
  message,
59
  history,
 
67
  prompt += f"User: {message}\nAssistant:"
68
 
69
  try:
70
+ # response = pipe(
71
+ # prompt,
72
+ # max_new_tokens=max_tokens,
73
+ # do_sample=False,
74
+ # eos_token_id=tokenizer.eos_token_id, # Use EOS token to stop generation
75
+ # )[0]["generated_text"]
76
 
77
  # Extract only the new assistant response after the last Assistant: in the prompt
78
+ bot_response = generate_text(prompt, max_tokens)
79
  yield bot_response
80
  except Exception as e:
81
  print(f"Error during generation: {e}")