PhantHive commited on
Commit
d037b1c
·
verified ·
1 Parent(s): f5987ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -3,11 +3,19 @@ from peft import PeftModel, PeftConfig
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
 
 
6
  # Load the model and config when the script starts
7
  config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
8
  model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
9
  model = PeftModel.from_pretrained(model, "phearion/bigbrain-v0.0.1")
10
 
 
 
 
 
 
 
11
  # Load the tokenizer
12
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
13
 
@@ -15,10 +23,12 @@ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
15
 
16
  def greet(text):
17
  batch = tokenizer(f"'{text}' ->: ", return_tensors='pt')
 
 
18
 
19
  # Use torch.no_grad to disable gradient calculation
20
- with torch.cuda.amp.autocast():
21
- output_tokens = model.generate(**batch, max_new_tokens=20)
22
 
23
  return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
24
 
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+
8
  # Load the model and config when the script starts
9
  config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
10
  model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
11
  model = PeftModel.from_pretrained(model, "phearion/bigbrain-v0.0.1")
12
 
13
+ # Move the model to the device
14
+ model = model.to(device)
15
+
16
+ # Convert the model to TorchScript
17
+ scripted_model = torch.jit.script(model)
18
+
19
  # Load the tokenizer
20
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
21
 
 
23
 
24
  def greet(text):
25
  batch = tokenizer(f"'{text}' ->: ", return_tensors='pt')
26
+ # Move the batch to the device
27
+ batch = {k: v.to(device) for k, v in batch.items()}
28
 
29
  # Use torch.no_grad to disable gradient calculation
30
+ with torch.no_grad():
31
+ output_tokens = scripted_model.generate(**batch, max_new_tokens=20)
32
 
33
  return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
34