Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
import streamlit as st
|
2 |
import pdfplumber
|
3 |
-
from
|
4 |
-
|
5 |
-
from langchain.chains import ConversationalRetrievalChain
|
6 |
-
from langchain.vectorstores import Chroma
|
7 |
-
from langchain.document_loaders import TextLoader
|
8 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
|
|
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
-
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
|
12 |
|
13 |
# โหลดโมเดล ThaiBERT จาก Hugging Face
|
14 |
tokenizer = AutoTokenizer.from_pretrained("airesearch/wangchanberta-base-att-spm-uncased")
|
@@ -26,8 +24,8 @@ def extract_text_from_pdf(pdf_file):
|
|
26 |
def answer_question(question, context):
|
27 |
inputs = tokenizer.encode_plus(question, context, return_tensors="pt")
|
28 |
answer_start_scores, answer_end_scores = model(**inputs)
|
29 |
-
answer_start = torch.argmax(answer_start_scores)
|
30 |
-
answer_end = torch.argmax(answer_end_scores) + 1
|
31 |
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
|
32 |
return answer
|
33 |
|
@@ -51,7 +49,7 @@ if uploaded_file:
|
|
51 |
|
52 |
qa_chain = ConversationalRetrievalChain(
|
53 |
retriever=retriever,
|
54 |
-
llm=
|
55 |
memory=memory
|
56 |
)
|
57 |
|
@@ -61,4 +59,3 @@ if uploaded_file:
|
|
61 |
if user_question:
|
62 |
response = qa_chain.run(user_question)
|
63 |
st.write("Answer:", response)
|
64 |
-
|
|
|
1 |
import streamlit as st
|
2 |
import pdfplumber
|
3 |
+
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
|
4 |
+
import torch
|
|
|
|
|
|
|
|
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain.vectorstores import Chroma
|
8 |
+
from langchain.chains import ConversationalRetrievalChain
|
9 |
from langchain.memory import ConversationBufferMemory
|
|
|
10 |
|
11 |
# โหลดโมเดล ThaiBERT จาก Hugging Face
|
12 |
tokenizer = AutoTokenizer.from_pretrained("airesearch/wangchanberta-base-att-spm-uncased")
|
|
|
24 |
def answer_question(question, context):
|
25 |
inputs = tokenizer.encode_plus(question, context, return_tensors="pt")
|
26 |
answer_start_scores, answer_end_scores = model(**inputs)
|
27 |
+
answer_start = torch.argmax(answer_start_scores.logits)
|
28 |
+
answer_end = torch.argmax(answer_end_scores.logits) + 1
|
29 |
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
|
30 |
return answer
|
31 |
|
|
|
49 |
|
50 |
qa_chain = ConversationalRetrievalChain(
|
51 |
retriever=retriever,
|
52 |
+
llm=None, # ถ้าคุณไม่ได้ใช้ HuggingFaceHub, ปรับให้เหมาะสม
|
53 |
memory=memory
|
54 |
)
|
55 |
|
|
|
59 |
if user_question:
|
60 |
response = qa_chain.run(user_question)
|
61 |
st.write("Answer:", response)
|
|