Spaces:
Running
Running
File size: 1,922 Bytes
f3de15f aac2bbe f3de15f aac2bbe 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 f3de15f 67bad47 aac2bbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from datasets import concatenate_datasets
# Load dataset from Hugging Face Hub
dataset = load_dataset("fitlemon/ostap-phrases")
# Access train/test splits
train_data = dataset["train"]
test_data = dataset["test"]
# concat datasets
dataset = concatenate_datasets([train_data, test_data])
questions = dataset["question"]
answers = dataset["answer"]
# Инициализируем эмбеддинг-модель
model_bge = SentenceTransformer("fitlemon/bge-m3-ru-ostap")
# Вычисляем эмбеддинги для всех вопросов
answer_embeddings = model_bge.encode(answers, convert_to_numpy=True)
# Создаём FAISS-индекс на базе вопросов, но в качестве метаданных нужно положить еще ответы
index = faiss.IndexIDMap(faiss.IndexFlatIP(answer_embeddings.shape[1]))
# Добавляем вопросы в индекс
index.add_with_ids(answer_embeddings, np.arange(len(answers)))
import gradio as gr
import time
with gr.Blocks() as app:
chatbot = gr.Chatbot(type="messages")
msg = gr.Textbox(
label="Напиши свой вопрос Остапу Бендеру здесь...",
placeholder="Привет, Остап!",
)
clear = gr.ClearButton([msg, chatbot])
def respond(message, chat_history):
query_emb = model_bge.encode([message], convert_to_numpy=True)
_, idx = index.search(query_emb, 1)
bot_message = answers[idx[0][0]]
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
time.sleep(2)
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
if __name__ == "__main__":
app.launch() |