Spaces:

rahul2001
/

doctor_chat_demo

Runtime error

rahul2001 commited on Sep 24, 2023

Commit

1dabe5d

1 Parent(s): b6bbd23

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,16 +6,27 @@ import time
 # Use a pipeline as a high-level helper
 from transformers import pipeline
-print("loading model")
-pipe = pipeline("conversational", model="llSourcell/medllama2_7b")
-print("load model")
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot()
     msg = gr.Textbox()
     clear = gr.ClearButton([msg, chatbot])
     def respond(message, chat_history):
-        bot_message = pipe(message)
         chat_history.append((message, bot_message))
         time.sleep(2)
         return "", chat_history

 # Use a pipeline as a high-level helper
 from transformers import pipeline
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+   load_in_4bit=True,
+   bnb_4bit_quant_type="nf4",
+   bnb_4bit_use_double_quant=True,
+   bnb_4bit_compute_dtype=torch.bfloat16
+)
+tokenizer = AutoTokenizer.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config)
+model = AutoModelForCausalLM.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config)
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot()
     msg = gr.Textbox()
     clear = gr.ClearButton([msg, chatbot])
     def respond(message, chat_history):
+        inputs = tokenizer(message, return_tensors="pt")
+        generate_ids = model.generate(inputs.input_ids, max_length=30)
+        bot_message = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
         chat_history.append((message, bot_message))
         time.sleep(2)
         return "", chat_history