PhantHive commited on
Commit
1c111fc
·
verified ·
1 Parent(s): 2b702db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -6
app.py CHANGED
@@ -3,7 +3,7 @@ from peft import PeftModel, PeftConfig
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
-
7
  if torch.cuda.is_available():
8
  device = torch.device("cuda")
9
  print("GPU is available!")
@@ -19,13 +19,38 @@ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
19
 
20
  # Load the Lora model
21
  model = PeftModel.from_pretrained(model, peft_model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def greet(text):
24
- batch = tokenizer(f"\"{text}\" ->: ", return_tensors='pt')
25
-
26
- # Use torch.no_grad to disable gradient calculation
27
- with torch.no_grad():
28
- output_tokens = model.generate(**batch, do_sample=True, max_new_tokens=20)
29
 
30
  return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
31
 
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
+ '''
7
  if torch.cuda.is_available():
8
  device = torch.device("cuda")
9
  print("GPU is available!")
 
19
 
20
  # Load the Lora model
21
  model = PeftModel.from_pretrained(model, peft_model_id)
22
+ '''
23
+
24
+ import torch
25
+ from peft import PeftModel, PeftConfig
26
+ from transformers import AutoModelForCausalLM, AutoTokenizer
27
+
28
+ if torch.cuda.is_available():
29
+ device = torch.device("cuda")
30
+ print("GPU is available!")
31
+ else:
32
+ device = torch.device("cpu")
33
+ print("GPU is not available, using CPU.")
34
+
35
+ peft_model_id = "phearion/bigbrain-v0.0.1"
36
+ config = PeftConfig.from_pretrained(peft_model_id)
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ config.base_model_name_or_path,
39
+ low_cpu_mem_usage=True,
40
+ return_dict=True,
41
+ torch_dtype=torch.bfloat16)
42
+
43
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
44
+
45
+ # Load the Lora model
46
+ model = PeftModel.from_pretrained(model, peft_model_id)
47
+ model = model.merge_and_unload()
48
 
49
  def greet(text):
50
+ batch = tokenizer("“aide moi avec les equa diff ” ->: ", return_tensors='pt')
51
+
52
+ with torch.cuda.amp.autocast():
53
+ output_tokens = model.generate(**batch, max_new_tokens=15)
 
54
 
55
  return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
56