PhantHive commited on
Commit
f92a739
·
verified ·
1 Parent(s): 2dce72a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -23,11 +23,25 @@ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, qua
23
  model = PeftModel.from_pretrained(model, model_id)
24
 
25
  def greet(text):
26
- with torch.no_grad(): # Disable gradient calculation for inference
27
- batch = tokenizer(f'### Input:\n{text}\n\n### Answer:\n', return_tensors='pt') # Move tensors to device
28
- with torch.cuda.amp.autocast(): # Enable mixed-precision if available
29
- output_tokens = model.generate(**batch, max_new_tokens=50)
30
- return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  iface = gr.Interface(fn=greet, inputs="text", outputs="text", title="PEFT Model for Big Brain")
33
  iface.launch() # Share directly to Gradio Space
 
23
  model = PeftModel.from_pretrained(model, model_id)
24
 
25
  def greet(text):
26
+ with torch.no_grad():
27
+ # Include EOS token for better context
28
+ input_text = f'<s>### Input:\n{text}\n\n### Response:\n'
29
+ batch = tokenizer(input_text, return_tensors='pt', add_special_tokens=True).to(device)
30
+
31
+ with torch.cuda.amp.autocast():
32
+ output_tokens = model.generate(
33
+ **batch,
34
+ max_new_tokens=25, # Limit response length
35
+ do_sample=True, # Sample from the distribution
36
+ pad_token_id=tokenizer.eos_token_id, # Stop at EOS
37
+ )
38
+
39
+ # Decode only the generated tokens
40
+ response = tokenizer.decode(output_tokens[0][len(batch['input_ids'][0]):], skip_special_tokens=True)
41
+
42
+ # Additional stopping condition at next "\n### Response:"
43
+ response_parts = response.split("\n### Response:")
44
+ return response_parts[0] # Return only the first part
45
 
46
  iface = gr.Interface(fn=greet, inputs="text", outputs="text", title="PEFT Model for Big Brain")
47
  iface.launch() # Share directly to Gradio Space