Xiongwenhf commited on
Commit
0c7e6a3
·
verified ·
1 Parent(s): 1ae3f4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -10
app.py CHANGED
@@ -1,10 +1,117 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def respond(
@@ -26,27 +133,31 @@ def respond(
26
  messages.append({"role": "user", "content": message})
27
 
28
  response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
 
32
  max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
 
36
  ):
37
  token = message.choices[0].delta.content
38
 
39
  response += token
40
  yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
- respond,
 
 
47
  additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
  gr.Slider(
52
  minimum=0.1,
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from langchain_community.chat_models import ChatOpenAI
4
+ from langchain.chains.retrieval_qa.base import RetrievalQA
5
+ from langchain_community.embeddings import OpenAIEmbeddings
6
+ from langchain.schema import HumanMessage, SystemMessage
7
+ from langchain_community.document_loaders import DirectoryLoader
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
10
+ from langchain_community.embeddings import OpenAIEmbeddings
11
+ from langchain_community.vectorstores import Chroma
12
+ import requests
13
+ from langchain_core.prompts import PromptTemplate
14
 
15
  """
16
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
17
  """
18
+ import gradio as gr
19
+ from openai import OpenAI
20
+ import os
21
+
22
+
23
+ TOKEN = os.getenv("HF_TOKEN")
24
+ def load_embedding_mode():
25
+ # embedding_model_dict = {"m3e-base": "/home/xiongwen/m3e-base"}
26
+ encode_kwargs = {"normalize_embeddings": False}
27
+ model_kwargs = {"device": 'cpu'}
28
+ return HuggingFaceEmbeddings(model_name="BAAI/bge-m3",
29
+ model_kwargs=model_kwargs,
30
+ encode_kwargs=encode_kwargs)
31
+ client = OpenAI(
32
+ base_url="https://api-inference.huggingface.co/v1/",
33
+ api_key=TOKEN,
34
+ )
35
+
36
+
37
+ def qwen_api(user_message, top_p=0.9,temperature=0.7, system_message='', max_tokens=1024, gradio_history=[]):
38
+ history = []
39
+ if gradio_history:
40
+ for message in history:
41
+ if message:
42
+ history.append({"role": "user", "content": message[0]})
43
+ history.append({"role": "assistant", "content": message[1]})
44
+
45
+ if system_message!='':
46
+ history.append({'role': 'system', 'content': system_message})
47
+ history.append({"role": "user", "content": user_message})
48
+
49
+ response = ""
50
+ for message in client.chat.completions.create(
51
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
52
+ # model="Qwen/Qwen1.5-4B-Chat",
53
+ max_tokens=max_tokens,
54
+ stream=True,
55
+ temperature=temperature,
56
+ top_p=top_p,
57
+ messages=history,
58
+ ):
59
+ token = message.choices[0].delta.content
60
+ response += token
61
+ return response
62
+
63
+ os.environ["OPENAI_API_BASE"] = "https://api-inference.huggingface.co/v1/"
64
+ os.environ["OPENAI_API_KEY"] = TOKEN
65
+
66
+
67
+
68
+
69
+ embedding = load_embedding_mode()
70
+ db = Chroma(persist_directory='./VecterStore2_512_txt/VecterStore2_512_txt', embedding_function=embedding)
71
+ prompt_template = """
72
+ {context}
73
+ The above content is a form of biological background knowledge. Please answer the questions according to the above content.
74
+ Question: {question}
75
+ Please be sure to answer the questions according to the background knowledge and attach the doi number of the information source when answering.
76
+ Answer in English:"""
77
+ PROMPT = PromptTemplate(
78
+ template=prompt_template, input_variables=["context", "question"]
79
+ )
80
+ chain_type_kwargs = {"prompt": PROMPT}
81
+ retriever = db.as_retriever()
82
+
83
+ def langchain_chat(message, temperature, top_p, max_tokens):
84
+ llm = ChatOpenAI(
85
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
86
+ # model="Qwen/Qwen1.5-4B-Chat",
87
+ temperature=temperature,
88
+ top_p=top_p,
89
+ max_tokens=max_tokens)
90
+ qa = RetrievalQA.from_chain_type(
91
+ llm=llm,
92
+ chain_type="stuff",
93
+ retriever=retriever,
94
+ chain_type_kwargs=chain_type_kwargs,
95
+ return_source_documents=True
96
+ )
97
+ response = qa.invoke(message)['result']
98
+ return response
99
+
100
+ def chat(
101
+ message,
102
+ history: list[tuple[str, str]],
103
+ system_message,
104
+ max_tokens,
105
+ temperature,
106
+ top_p,
107
+ ):
108
+ if len(history) == 0:
109
+ response = langchain_chat(message, temperature, top_p, max_tokens)
110
+ else:
111
+ response = qwen_api(message, gradio_history=history, max_tokens=max_tokens, top_p=top_p, temperature=temperature)
112
+ print(response)
113
+ yield response
114
+ return response
115
 
116
 
117
  def respond(
 
133
  messages.append({"role": "user", "content": message})
134
 
135
  response = ""
136
+
137
+ for message in client.chat.completions.create(
138
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
139
+ # model="Qwen/Qwen1.5-4B-Chat",
140
  max_tokens=max_tokens,
141
  stream=True,
142
  temperature=temperature,
143
  top_p=top_p,
144
+ messages=messages,
145
  ):
146
  token = message.choices[0].delta.content
147
 
148
  response += token
149
  yield response
150
 
151
+
152
+ chatbot = gr.Chatbot(height=600)
153
+
154
  demo = gr.ChatInterface(
155
+ fn=chat,
156
+ fill_height=True,
157
+ chatbot=chatbot,
158
  additional_inputs=[
159
+ gr.Textbox(label="System message"),
160
+ gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
161
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
162
  gr.Slider(
163
  minimum=0.1,