Spaces:
Running
Running
import pandas as pd | |
import faiss | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from datasets import load_dataset | |
from datasets import concatenate_datasets | |
# Load dataset from Hugging Face Hub | |
dataset = load_dataset("fitlemon/ostap-phrases") | |
# Access train/test splits | |
train_data = dataset["train"] | |
test_data = dataset["test"] | |
# concat datasets | |
dataset = concatenate_datasets([train_data, test_data]) | |
questions = dataset["question"] | |
answers = dataset["answer"] | |
# Инициализируем эмбеддинг-модель | |
model_bge = SentenceTransformer("fitlemon/bge-m3-ru-ostap") | |
# Вычисляем эмбеддинги для всех вопросов | |
answer_embeddings = model_bge.encode(answers, convert_to_numpy=True) | |
# Создаём FAISS-индекс на базе вопросов, но в качестве метаданных нужно положить еще ответы | |
index = faiss.IndexIDMap(faiss.IndexFlatIP(answer_embeddings.shape[1])) | |
# Добавляем вопросы в индекс | |
index.add_with_ids(answer_embeddings, np.arange(len(answers))) | |
import gradio as gr | |
import time | |
with gr.Blocks() as app: | |
chatbot = gr.Chatbot(type="messages") | |
msg = gr.Textbox( | |
label="Напиши свой вопрос Остапу Бендеру здесь...", | |
placeholder="Привет, Остап!", | |
) | |
clear = gr.ClearButton([msg, chatbot]) | |
def respond(message, chat_history): | |
query_emb = model_bge.encode([message], convert_to_numpy=True) | |
_, idx = index.search(query_emb, 1) | |
bot_message = answers[idx[0][0]] | |
chat_history.append({"role": "user", "content": message}) | |
chat_history.append({"role": "assistant", "content": bot_message}) | |
time.sleep(2) | |
return "", chat_history | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
if __name__ == "__main__": | |
app.launch() |