kietnt0603 commited on
Commit
7f63c33
·
verified ·
1 Parent(s): a679d45

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import torch
4
+ from datasets import DatasetDict, Dataset
5
+ from transformers import (
6
+ AutoModelForCausalLM,
7
+ AutoTokenizer,
8
+ BitsAndBytesConfig,
9
+ logging
10
+ )
11
+
12
+ logging.set_verbosity_error()
13
+
14
+ model_name = 'THUDM/chatglm3-6b'
15
+
16
+ #############################################
17
+ # bitsandbytes parameters
18
+ #############################################
19
+
20
+ # Activate 4-bit precision for base model loading
21
+ use_4bit = True
22
+
23
+ # Compute dtype of 4-bit base models
24
+ bnb_4bit_compute_dtype = 'float16'
25
+
26
+ # Quantization type (fp4 or np4)
27
+ bnb_4bit_quant_type = 'nf4'
28
+
29
+ # Activate nested quantization for 4-bit base models
30
+ use_nested_quant = False
31
+
32
+ # device mapping
33
+ device_map = {"": 0}
34
+
35
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
36
+ bnb_config = BitsAndBytesConfig(
37
+ load_in_4bit=use_4bit,
38
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
39
+ bnb_4bit_compute_dtype=compute_dtype,
40
+ bnb_4bit_use_double_quant=use_nested_quant,
41
+ )
42
+
43
+ if compute_dtype == torch.float16 and use_4bit:
44
+ major, _ = torch.cuda.get_device_capability()
45
+ if major >= 8:
46
+ print('='*80)
47
+ print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
48
+ print('='*80)
49
+
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ model_name,
52
+ trust_remote_code=True,
53
+ quantization_config=bnb_config,
54
+ device_map=device_map,
55
+ )
56
+ model.config.use_cache = False
57
+ model.config.pretraining_tp = 1
58
+
59
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
60
+ tokenizer.padding_side = 'left'
61
+
62
+ # Set the title of the Streamlit app
63
+ st.title("Chatbot with LangChain and HuggingFace Model")
64
+
65
+ # Display the conversation history
66
+ conversation_text = st.empty()
67
+
68
+ # Get the user input
69
+ user_input = st.text_input("You: ")
70
+
71
+ history = []
72
+ # If the user has submitted input
73
+ if st.button("Send"):
74
+
75
+ # Generate the chatbot's response
76
+ response, history = model.chat(tokenizer, user_input, history=history)
77
+
78
+ # Add the response to the conversation history
79
+ conversation_history.append(f"Bot: {response}")
80
+
81
+ # Update the conversation text
82
+ conversation_text.markdown("**Conversation:**\n")
83
+ for message in conversation_history:
84
+ conversation_text.markdown(f"- {message}")