Spaces:
Sleeping
Sleeping
File size: 3,251 Bytes
330c288 67cab39 330c288 67cab39 330c288 67cab39 08b4840 330c288 67cab39 08b4840 67cab39 08b4840 67cab39 08b4840 330c288 08b4840 330c288 08b4840 67cab39 08b4840 330c288 67cab39 330c288 67cab39 330c288 08b4840 330c288 08b4840 330c288 67cab39 330c288 08b4840 67cab39 08b4840 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import pandas as pd
import google.generativeai as genai
import numpy as np
import gradio as gr
# Initialize an empty DataFrame with columns 'Title' and 'Text'
df = pd.DataFrame(columns=['Title', 'Text'])
# Mapping filenames to custom titles
title_mapping = {
'company.txt': 'company_data',
'products.txt': 'product_data',
'shipping.txt': 'shipping_data'
}
# Process relevant files in the current directory
for file_name in os.listdir('.'):
if file_name in title_mapping:
try:
with open(file_name, 'r', encoding='utf-8') as file:
text = file.read().replace('\n', ' ') # Replace newlines with spaces for cleaner text
custom_title = title_mapping[file_name]
new_row = pd.DataFrame({'Title': [custom_title], 'Text': [text]})
df = pd.concat([df, new_row], ignore_index=True)
except Exception as e:
print(f"Error processing file {file_name}: {e}")
# Get the Google API key from environment variables
GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GEMINI_API_KEY:
raise EnvironmentError("Error: Gemini API key not found. Please set the GOOGLE_API_KEY environment variable.")
# Configure the Gemini API
try:
genai.configure(api_key=GEMINI_API_KEY)
except Exception as e:
raise RuntimeError(f"Error: Failed to configure the Gemini API. Details: {e}")
# Function to embed text using the Google Generative AI API
def embed_text(text):
try:
return genai.embed_content(
model='models/embedding-001',
content=text,
task_type='retrieval_document'
)['embedding']
except Exception as e:
raise RuntimeError(f"Error embedding text: {e}")
# Add embeddings to the DataFrame
if 'Embeddings' not in df.columns:
df['Embeddings'] = df['Text'].apply(embed_text)
# Function to calculate similarity score between the query and document embeddings
def query_similarity_score(query, vector):
query_embedding = embed_text(query)
return np.dot(query_embedding, vector)
# Function to get the most similar document based on the query
def most_similar_document(query):
local_df = df.copy()
local_df['Similarity'] = local_df['Embeddings'].apply(lambda vector: query_similarity_score(query, vector))
most_similar = local_df.sort_values('Similarity', ascending=False).iloc[0]
return most_similar['Title'], most_similar['Text']
# Function to generate a response using the RAG approach
def RAG(query):
try:
title, text = most_similar_document(query)
model = genai.GenerativeModel('gemini-pro')
prompt = f"Answer this query:\n{query}.\nOnly use this context to answer:\n{text}"
response = model.generate_content(prompt)
return f"{response.text}\n\nSource Document: {title}"
except Exception as e:
return f"Error: {e}"
# Gradio interface
iface = gr.Interface(
fn=RAG, # Main function to handle the query
inputs=[
gr.Textbox(label="Enter Your Query"), # Input for the user's query
],
outputs=gr.Textbox(label="Response"), # Output for the generated response
title="Patrick's Multilingual Query Handler"
)
if __name__ == "__main__":
iface.launch()
|