Charles Chan
commited on
Commit
·
51c0f15
1
Parent(s):
10b5e55
coding
Browse files- app.py +6 -11
- requirements.txt +0 -1
app.py
CHANGED
|
@@ -4,10 +4,8 @@ from langchain_community.llms import HuggingFaceHub
|
|
| 4 |
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from datasets import load_dataset
|
| 7 |
-
from opencc import OpenCC
|
| 8 |
|
| 9 |
-
# 使用
|
| 10 |
-
# 原数据集是是繁体中文,为了调试方便,将其转换成简体中文之后使用
|
| 11 |
if "data_list" not in st.session_state:
|
| 12 |
st.session_state.data_list = []
|
| 13 |
st.session_state.answer_list = []
|
|
@@ -15,15 +13,12 @@ if "data_list" not in st.session_state:
|
|
| 15 |
if not st.session_state.data_list:
|
| 16 |
try:
|
| 17 |
with st.spinner("正在读取数据库..."):
|
| 18 |
-
|
| 19 |
-
dataset = load_dataset("rorubyy/attack_on_titan_wiki_chinese")
|
| 20 |
data_list = []
|
| 21 |
answer_list = []
|
| 22 |
for example in dataset["train"]:
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
answer_list.append(converted_answer)
|
| 26 |
-
data_list.append({"Question": converted_question, "Answer": converted_answer})
|
| 27 |
st.session_state.answer_list = answer_list
|
| 28 |
st.session_state.data_list = data_list
|
| 29 |
st.success("数据库读取完成!")
|
|
@@ -112,7 +107,7 @@ def answer_question(repo_id, temperature, max_length, question):
|
|
| 112 |
return {"prompt": "", "answer": "An error occurred during the answering process.", "pure_answer": ""}
|
| 113 |
|
| 114 |
# Streamlit 界面
|
| 115 |
-
st.title("
|
| 116 |
|
| 117 |
col1, col2 = st.columns(2)
|
| 118 |
with col1:
|
|
@@ -154,7 +149,7 @@ with col3:
|
|
| 154 |
generate_answer(gemma, float(temperature), int(max_length), random_question)
|
| 155 |
|
| 156 |
with col4:
|
| 157 |
-
question = st.text_area("请输入问题", "
|
| 158 |
if st.button("提交输入的问题"):
|
| 159 |
if not question:
|
| 160 |
st.warning("请输入问题!")
|
|
|
|
| 4 |
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from datasets import load_dataset
|
|
|
|
| 7 |
|
| 8 |
+
# 使用 假知识 数据集
|
|
|
|
| 9 |
if "data_list" not in st.session_state:
|
| 10 |
st.session_state.data_list = []
|
| 11 |
st.session_state.answer_list = []
|
|
|
|
| 13 |
if not st.session_state.data_list:
|
| 14 |
try:
|
| 15 |
with st.spinner("正在读取数据库..."):
|
| 16 |
+
dataset = load_dataset("zeerd/fake_knowledge")
|
|
|
|
| 17 |
data_list = []
|
| 18 |
answer_list = []
|
| 19 |
for example in dataset["train"]:
|
| 20 |
+
answer_list.append(example["Answer"])
|
| 21 |
+
data_list.append({"Question": example["Question"], "Answer": example["Answer"]})
|
|
|
|
|
|
|
| 22 |
st.session_state.answer_list = answer_list
|
| 23 |
st.session_state.data_list = data_list
|
| 24 |
st.success("数据库读取完成!")
|
|
|
|
| 107 |
return {"prompt": "", "answer": "An error occurred during the answering process.", "pure_answer": ""}
|
| 108 |
|
| 109 |
# Streamlit 界面
|
| 110 |
+
st.title("假知识库问答系统")
|
| 111 |
|
| 112 |
col1, col2 = st.columns(2)
|
| 113 |
with col1:
|
|
|
|
| 149 |
generate_answer(gemma, float(temperature), int(max_length), random_question)
|
| 150 |
|
| 151 |
with col4:
|
| 152 |
+
question = st.text_area("请输入问题", "谁是潜水员?")
|
| 153 |
if st.button("提交输入的问题"):
|
| 154 |
if not question:
|
| 155 |
st.warning("请输入问题!")
|
requirements.txt
CHANGED
|
@@ -6,4 +6,3 @@ langchain-huggingface
|
|
| 6 |
sentence_transformers
|
| 7 |
faiss-cpu
|
| 8 |
datasets
|
| 9 |
-
opencc-python-reimplemented
|
|
|
|
| 6 |
sentence_transformers
|
| 7 |
faiss-cpu
|
| 8 |
datasets
|
|
|