Spaces:

Izza-shahzad-13
/

Movieanalysis

Sleeping

App Files Files Community

Movieanalysis / app.py

Izza-shahzad-13

Update app.py

50aa180 verified 5 months ago

raw

history blame contribute delete

3.41 kB

	import streamlit as st
	from groq import Groq
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import pandas as pd

	# Initialize Groq client
	api_key = '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' # Replace with your API key
	client = Groq(api_key=api_key)

	# Load embeddings model
	embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2')

	# Streamlit app layout
	st.title("Movie Analysis with RAG and Groq")
	st.write("Enter a query to get a summary-based analysis of movies.")

	# Add an image
	image_url = 'https://huggingface.co/spaces/Izza-shahzad-13/Movieanalysis/blob/main/movie.jpg' # Update with your image URL
	st.image(image_url, caption='Movie Analysis Dashboard', use_column_width=True)
	# Load the movie dataset
	uploaded_file = st.file_uploader("Upload your movie dataset CSV file", type="csv")

	# Initialize the DataFrame and embeddings variables
	df = None
	embeddings = None

	# Check if a file has been uploaded before processing it
	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)

	# Ensure the DataFrame contains the necessary column
	if 'overview' in df.columns:
	# Generate embeddings for the "overview" column
	embeddings = embedder.encode(df['overview'].fillna("").values)
	else:
	st.error("The uploaded CSV does not contain an 'overview' column.")

	# Define functions for movie retrieval and summary generation
	def retrieve_movies_for_summary(query, top_n=5):
	if embeddings is not None: # Check if embeddings have been generated
	# Retrieve movies that are most similar to the query
	query_embedding = embedder.encode([query])
	similarities = cosine_similarity(query_embedding, embeddings)
	indices = similarities[0].argsort()[-top_n:][::-1]
	return df.iloc[indices]
	else:
	return pd.DataFrame() # Return an empty DataFrame if embeddings are not available

	def generate_summary_response(query):
	# Retrieve relevant movies for the query
	relevant_movies = retrieve_movies_for_summary(query)

	# Check if relevant movies are found
	if relevant_movies.empty:
	return "No relevant movies found for the given query."

	# Compile a context summary for each retrieved movie
	movie_context = "\n".join(
	f"Title: {row['title']}\nOverview: {row['overview']}\nGenres: {row['genres']}\n"
	for _, row in relevant_movies.iterrows()
	)

	# Generate a summary response using Groq API
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": query},
	{"role": "system", "content": f"Context Summary: {movie_context}"}],
	model="llama3-8b-8192"
	)

	return chat_completion.choices[0].message.content

	# User input
	query = st.text_input("Enter your query:")

	if st.button("Generate Summary"):
	if query and embeddings is not None: # Check if embeddings have been generated
	# Generate the summary response
	with st.spinner("Generating summary..."):
	summary_response = generate_summary_response(query)
	st.write("### Summary Response")
	st.write(summary_response)
	else:
	if not query:
	st.warning("Please enter a query to generate a summary.")
	if embeddings is None:
	st.warning("Please upload a CSV file first.")