Spaces:

kietnt0603
/

ChatGLM4CS313

Sleeping

App Files Files Community

kietnt0603 commited on May 30, 2024

Commit

7f63c33

verified ·

1 Parent(s): a679d45

Create app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import streamlit as st
+import os
+import torch
+from datasets import DatasetDict, Dataset
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    logging
+)
+logging.set_verbosity_error()
+model_name = 'THUDM/chatglm3-6b'
+#############################################
+# bitsandbytes parameters
+#############################################
+# Activate 4-bit precision for base model loading
+use_4bit = True
+# Compute dtype of 4-bit base models
+bnb_4bit_compute_dtype = 'float16'
+# Quantization type (fp4 or np4)
+bnb_4bit_quant_type = 'nf4'
+# Activate nested quantization for 4-bit base models
+use_nested_quant = False
+# device mapping
+device_map = {"": 0}
+compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=use_4bit,
+    bnb_4bit_quant_type=bnb_4bit_quant_type,
+    bnb_4bit_compute_dtype=compute_dtype,
+    bnb_4bit_use_double_quant=use_nested_quant,
+)
+if compute_dtype == torch.float16 and use_4bit:
+    major, _ = torch.cuda.get_device_capability()
+    if major >= 8:
+        print('='*80)
+        print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
+        print('='*80)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    trust_remote_code=True,
+    quantization_config=bnb_config,
+    device_map=device_map,
+)
+model.config.use_cache = False
+model.config.pretraining_tp = 1
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+tokenizer.padding_side = 'left'
+# Set the title of the Streamlit app
+st.title("Chatbot with LangChain and HuggingFace Model")
+# Display the conversation history
+conversation_text = st.empty()
+# Get the user input
+user_input = st.text_input("You: ")
+history = []
+# If the user has submitted input
+if st.button("Send"):
+    # Generate the chatbot's response
+    response, history = model.chat(tokenizer, user_input, history=history)
+    # Add the response to the conversation history
+    conversation_history.append(f"Bot: {response}")
+    # Update the conversation text
+    conversation_text.markdown("**Conversation:**\n")
+    for message in conversation_history:
+        conversation_text.markdown(f"- {message}")