rahul2001 commited on
Commit
1dabe5d
Β·
1 Parent(s): b6bbd23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -4
app.py CHANGED
@@ -6,16 +6,27 @@ import time
6
  # Use a pipeline as a high-level helper
7
  from transformers import pipeline
8
 
9
- print("loading model")
10
- pipe = pipeline("conversational", model="llSourcell/medllama2_7b")
11
- print("load model")
 
 
 
 
 
 
 
 
 
12
  with gr.Blocks() as demo:
13
  chatbot = gr.Chatbot()
14
  msg = gr.Textbox()
15
  clear = gr.ClearButton([msg, chatbot])
16
 
17
  def respond(message, chat_history):
18
- bot_message = pipe(message)
 
 
19
  chat_history.append((message, bot_message))
20
  time.sleep(2)
21
  return "", chat_history
 
6
  # Use a pipeline as a high-level helper
7
  from transformers import pipeline
8
 
9
+ from transformers import BitsAndBytesConfig
10
+
11
+
12
+ nf4_config = BitsAndBytesConfig(
13
+ load_in_4bit=True,
14
+ bnb_4bit_quant_type="nf4",
15
+ bnb_4bit_use_double_quant=True,
16
+ bnb_4bit_compute_dtype=torch.bfloat16
17
+ )
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config)
20
+ model = AutoModelForCausalLM.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config)
21
  with gr.Blocks() as demo:
22
  chatbot = gr.Chatbot()
23
  msg = gr.Textbox()
24
  clear = gr.ClearButton([msg, chatbot])
25
 
26
  def respond(message, chat_history):
27
+ inputs = tokenizer(message, return_tensors="pt")
28
+ generate_ids = model.generate(inputs.input_ids, max_length=30)
29
+ bot_message = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
30
  chat_history.append((message, bot_message))
31
  time.sleep(2)
32
  return "", chat_history