Spaces:
Build error
Build error
import gradio as gr | |
import faiss | |
import pandas as pd | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from sentence_transformers import SentenceTransformer | |
import numpy as np | |
# Load your FAISS index | |
index_path = "faiss_index/index.faiss" # Update with your FAISS index file path | |
index = faiss.read_index(index_path) | |
# Load the metadata | |
df = pd.read_pickle('df_news (1).pkl') | |
# Load the Hugging Face model and tokenizer | |
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") | |
hf_tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-zh-v1.5') | |
hf_model = AutoModel.from_pretrained('BAAI/bge-large-zh-v1.5') | |
# Define the function for similarity search | |
def search(query, k=10): | |
query_embedding = embedding_model.encode(query).astype('float32') | |
D, I = index.search(np.array([query_embedding]), k) | |
results = [] | |
for idx in I[0]: | |
if idx < len(df): # Ensure the index is within bounds | |
doc = df.iloc[idx] | |
results.append({ | |
'title': doc['title'], | |
'author': doc['author'], | |
'content': doc['full_text'], | |
'source': doc['url'] | |
}) | |
return results | |
# Define the function to generate a response based on the retrieved documents | |
def generate_answer(query, max_tokens, temperature, top_p): | |
# Perform similarity search | |
search_results = search(query) | |
context = "\n\n".join([f"Title: {doc['title']}\nContent: {doc['content']}" for doc in search_results]) | |
# Construct the prompt | |
full_prompt = f"Context:\n{context}\n\nQuestion: {query}" | |
# Tokenize the input prompt | |
inputs = hf_tokenizer(full_prompt, return_tensors="pt") | |
# Generate a response using the model | |
output = hf_model.generate( | |
inputs["input_ids"], | |
max_length=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
pad_token_id=hf_tokenizer.eos_token_id | |
) | |
# Decode the response and return it | |
response = hf_tokenizer.decode(output[0], skip_special_tokens=True) | |
return response | |
# Define the Gradio interface | |
def respond(message, max_tokens, temperature, top_p): | |
response = generate_answer(message, max_tokens, temperature, top_p) | |
return response | |
# Set up the Gradio demo | |
demo = gr.Interface( | |
fn=respond, | |
inputs=[ | |
gr.Textbox(value="What is the latest news?", label="Query"), | |
gr.Slider(minimum=1, maximum=2048, value=150, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="Top-p (nucleus sampling)") | |
], | |
outputs=[gr.Textbox()] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |