joaogante HF staff commited on
Commit
57756de
·
verified ·
1 Parent(s): abbc475

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -6,10 +6,10 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
6
  import time
7
 
8
  model_id = "facebook/opt-6.7b"
9
- assistant_id = "facebook/opt-350m"
10
 
11
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
12
- assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(device=model.device, dtype=torch.bfloat16)
13
  tokenizer = AutoTokenizer.from_pretrained(model_id)
14
 
15
  def run_generation(user_text, use_assistant, temperature, max_new_tokens):
@@ -55,8 +55,8 @@ def reset_textbox():
55
  with gr.Blocks() as demo:
56
  gr.Markdown(
57
  "# 🤗 Assisted Generation Demo\n"
58
- f"- Model: {model_id} (BF16, ~14GB)\n"
59
- f"- Assistant Model: {assistant_id} (BF16, ~0.3GB)\n"
60
  "- Running on a T4 GPU\n"
61
  "- Best results are obtained when the model size difference in parameters is >10x"
62
  )
 
6
  import time
7
 
8
  model_id = "facebook/opt-6.7b"
9
+ assistant_id = "facebook/opt-125m"
10
 
11
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
12
+ assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(device=model.device, dtype=torch.float16)
13
  tokenizer = AutoTokenizer.from_pretrained(model_id)
14
 
15
  def run_generation(user_text, use_assistant, temperature, max_new_tokens):
 
55
  with gr.Blocks() as demo:
56
  gr.Markdown(
57
  "# 🤗 Assisted Generation Demo\n"
58
+ f"- Model: {model_id} (FP16, ~14GB)\n"
59
+ f"- Assistant Model: {assistant_id} (FP16, ~0.3GB)\n"
60
  "- Running on a T4 GPU\n"
61
  "- Best results are obtained when the model size difference in parameters is >10x"
62
  )