Spaces:

PhantHive
/

Phearion-bigbrain-v0.0.1

Paused

PhantHive commited on Feb 14, 2024

Commit

d037b1c

verified ·

1 Parent(s): f5987ce

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,11 +3,19 @@ from peft import PeftModel, PeftConfig
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # Load the model and config when the script starts
 config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
 model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
 model = PeftModel.from_pretrained(model, "phearion/bigbrain-v0.0.1")
 # Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
@@ -15,10 +23,12 @@ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
 def greet(text):
     batch = tokenizer(f"'{text}' ->: ", return_tensors='pt')
     # Use torch.no_grad to disable gradient calculation
-    with torch.cuda.amp.autocast():
-        output_tokens = model.generate(**batch, max_new_tokens=20)
     return tokenizer.decode(output_tokens[0], skip_special_tokens=True)

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load the model and config when the script starts
 config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
 model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
 model = PeftModel.from_pretrained(model, "phearion/bigbrain-v0.0.1")
+# Move the model to the device
+model = model.to(device)
+# Convert the model to TorchScript
+scripted_model = torch.jit.script(model)
 # Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
 def greet(text):
     batch = tokenizer(f"'{text}' ->: ", return_tensors='pt')
+    # Move the batch to the device
+    batch = {k: v.to(device) for k, v in batch.items()}
     # Use torch.no_grad to disable gradient calculation
+    with torch.no_grad():
+        output_tokens = scripted_model.generate(**batch, max_new_tokens=20)
     return tokenizer.decode(output_tokens[0], skip_special_tokens=True)