ask_bender / app.py
fitlemon's picture
Update app.py
aac2bbe verified
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from datasets import concatenate_datasets
# Load dataset from Hugging Face Hub
dataset = load_dataset("fitlemon/ostap-phrases")
# Access train/test splits
train_data = dataset["train"]
test_data = dataset["test"]
# concat datasets
dataset = concatenate_datasets([train_data, test_data])
questions = dataset["question"]
answers = dataset["answer"]
# Инициализируем эмбеддинг-модель
model_bge = SentenceTransformer("fitlemon/bge-m3-ru-ostap")
# Вычисляем эмбеддинги для всех вопросов
answer_embeddings = model_bge.encode(answers, convert_to_numpy=True)
# Создаём FAISS-индекс на базе вопросов, но в качестве метаданных нужно положить еще ответы
index = faiss.IndexIDMap(faiss.IndexFlatIP(answer_embeddings.shape[1]))
# Добавляем вопросы в индекс
index.add_with_ids(answer_embeddings, np.arange(len(answers)))
import gradio as gr
import time
with gr.Blocks() as app:
chatbot = gr.Chatbot(type="messages")
msg = gr.Textbox(
label="Напиши свой вопрос Остапу Бендеру здесь...",
placeholder="Привет, Остап!",
)
clear = gr.ClearButton([msg, chatbot])
def respond(message, chat_history):
query_emb = model_bge.encode([message], convert_to_numpy=True)
_, idx = index.search(query_emb, 1)
bot_message = answers[idx[0][0]]
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
time.sleep(2)
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
if __name__ == "__main__":
app.launch()