Spaces:

Patrick079
/

Techglobal

Sleeping

App Files Files Community

Techglobal / app.py

Patrick079

Update app.py

08b4840 verified 27 days ago

raw

history blame contribute delete

3.25 kB

	import os
	import pandas as pd
	import google.generativeai as genai
	import numpy as np
	import gradio as gr

	# Initialize an empty DataFrame with columns 'Title' and 'Text'
	df = pd.DataFrame(columns=['Title', 'Text'])

	# Mapping filenames to custom titles
	title_mapping = {
	'company.txt': 'company_data',
	'products.txt': 'product_data',
	'shipping.txt': 'shipping_data'
	}

	# Process relevant files in the current directory
	for file_name in os.listdir('.'):
	if file_name in title_mapping:
	try:
	with open(file_name, 'r', encoding='utf-8') as file:
	text = file.read().replace('\n', ' ') # Replace newlines with spaces for cleaner text
	custom_title = title_mapping[file_name]
	new_row = pd.DataFrame({'Title': [custom_title], 'Text': [text]})
	df = pd.concat([df, new_row], ignore_index=True)
	except Exception as e:
	print(f"Error processing file {file_name}: {e}")

	# Get the Google API key from environment variables
	GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
	if not GEMINI_API_KEY:
	raise EnvironmentError("Error: Gemini API key not found. Please set the GOOGLE_API_KEY environment variable.")

	# Configure the Gemini API
	try:
	genai.configure(api_key=GEMINI_API_KEY)
	except Exception as e:
	raise RuntimeError(f"Error: Failed to configure the Gemini API. Details: {e}")

	# Function to embed text using the Google Generative AI API
	def embed_text(text):
	try:
	return genai.embed_content(
	model='models/embedding-001',
	content=text,
	task_type='retrieval_document'
	)['embedding']
	except Exception as e:
	raise RuntimeError(f"Error embedding text: {e}")

	# Add embeddings to the DataFrame
	if 'Embeddings' not in df.columns:
	df['Embeddings'] = df['Text'].apply(embed_text)

	# Function to calculate similarity score between the query and document embeddings
	def query_similarity_score(query, vector):
	query_embedding = embed_text(query)
	return np.dot(query_embedding, vector)

	# Function to get the most similar document based on the query
	def most_similar_document(query):
	local_df = df.copy()
	local_df['Similarity'] = local_df['Embeddings'].apply(lambda vector: query_similarity_score(query, vector))
	most_similar = local_df.sort_values('Similarity', ascending=False).iloc[0]
	return most_similar['Title'], most_similar['Text']

	# Function to generate a response using the RAG approach
	def RAG(query):
	try:
	title, text = most_similar_document(query)
	model = genai.GenerativeModel('gemini-pro')
	prompt = f"Answer this query:\n{query}.\nOnly use this context to answer:\n{text}"
	response = model.generate_content(prompt)
	return f"{response.text}\n\nSource Document: {title}"
	except Exception as e:
	return f"Error: {e}"

	# Gradio interface
	iface = gr.Interface(
	fn=RAG, # Main function to handle the query
	inputs=[
	gr.Textbox(label="Enter Your Query"), # Input for the user's query
	],
	outputs=gr.Textbox(label="Response"), # Output for the generated response
	title="Patrick's Multilingual Query Handler"
	)

	if __name__ == "__main__":
	iface.launch()