File size: 2,309 Bytes
7f63c33
 
 
 
 
22eb5b6
 
 
 
7f63c33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22eb5b6
 
7f63c33
 
 
22eb5b6
 
 
 
7f63c33
 
 
22eb5b6
 
 
 
 
7f63c33
 
22eb5b6
 
 
 
7f63c33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22eb5b6
 
7f63c33
22eb5b6
 
7f63c33
22eb5b6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import os
import torch
from datasets import DatasetDict, Dataset
from transformers import (
  AutoModelForCausalLM,
  AutoTokenizer,
  BitsAndBytesConfig,
  logging
)

logging.set_verbosity_error()

model_name = 'THUDM/chatglm3-6b'

#############################################
# bitsandbytes parameters
#############################################

# Activate 4-bit precision for base model loading
use_4bit = True

# Compute dtype of 4-bit base models
bnb_4bit_compute_dtype = 'float16'

# Quantization type (fp4 or np4)
bnb_4bit_quant_type = 'nf4'

# Activate nested quantization for 4-bit base models
use_nested_quant = False

# device mapping
device = torch.device("cpu")  # Set device to CPU
device_map = {"": -1}  # Use -1 for CPU in bnb_config

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
  load_in_4bit=use_4bit,
  bnb_4bit_quant_type=bnb_4bit_quant_type,
  bnb_4bit_compute_dtype=compute_dtype,
  bnb_4bit_use_double_quant=use_nested_quant,
)

if compute_dtype == torch.float16 and use_4bit:
  major, _ = torch.cuda.get_device_capability()
  if major >= 8:
    print('='*80)
    print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
    print('='*80)

model = AutoModelForCausalLM.from_pretrained(
  model_name,
  trust_remote_code=True,
  quantization_config=bnb_config,
  device_map=device_map,
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.padding_side = 'left'

# Set the title of the Streamlit app
st.title("Chatbot with LangChain and HuggingFace Model")

# Display the conversation history
conversation_text = st.empty()

# Get the user input
user_input = st.text_input("You: ")

history = []
# If the user has submitted input
if st.button("Send"):

  # Generate the chatbot's response
  response, history = model.chat(tokenizer, user_input, history=history)

  # Add the response to the conversation history
  conversation_history.append(f"Bot: {response}")

  # Update the conversation text
  conversation_text.markdown("**Conversation:**\n")
  for message in conversation_history:
    conversation_text.markdown(f"- {message}")