Movieanalysis / app.py
Izza-shahzad-13's picture
Update app.py
50aa180 verified
import streamlit as st
from groq import Groq
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
# Initialize Groq client
api_key = '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' # Replace with your API key
client = Groq(api_key=api_key)
# Load embeddings model
embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# Streamlit app layout
st.title("Movie Analysis with RAG and Groq")
st.write("Enter a query to get a summary-based analysis of movies.")
# Add an image
image_url = 'https://huggingface.co/spaces/Izza-shahzad-13/Movieanalysis/blob/main/movie.jpg' # Update with your image URL
st.image(image_url, caption='Movie Analysis Dashboard', use_column_width=True)
# Load the movie dataset
uploaded_file = st.file_uploader("Upload your movie dataset CSV file", type="csv")
# Initialize the DataFrame and embeddings variables
df = None
embeddings = None
# Check if a file has been uploaded before processing it
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
# Ensure the DataFrame contains the necessary column
if 'overview' in df.columns:
# Generate embeddings for the "overview" column
embeddings = embedder.encode(df['overview'].fillna("").values)
else:
st.error("The uploaded CSV does not contain an 'overview' column.")
# Define functions for movie retrieval and summary generation
def retrieve_movies_for_summary(query, top_n=5):
if embeddings is not None: # Check if embeddings have been generated
# Retrieve movies that are most similar to the query
query_embedding = embedder.encode([query])
similarities = cosine_similarity(query_embedding, embeddings)
indices = similarities[0].argsort()[-top_n:][::-1]
return df.iloc[indices]
else:
return pd.DataFrame() # Return an empty DataFrame if embeddings are not available
def generate_summary_response(query):
# Retrieve relevant movies for the query
relevant_movies = retrieve_movies_for_summary(query)
# Check if relevant movies are found
if relevant_movies.empty:
return "No relevant movies found for the given query."
# Compile a context summary for each retrieved movie
movie_context = "\n".join(
f"Title: {row['title']}\nOverview: {row['overview']}\nGenres: {row['genres']}\n"
for _, row in relevant_movies.iterrows()
)
# Generate a summary response using Groq API
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": query},
{"role": "system", "content": f"Context Summary: {movie_context}"}],
model="llama3-8b-8192"
)
return chat_completion.choices[0].message.content
# User input
query = st.text_input("Enter your query:")
if st.button("Generate Summary"):
if query and embeddings is not None: # Check if embeddings have been generated
# Generate the summary response
with st.spinner("Generating summary..."):
summary_response = generate_summary_response(query)
st.write("### Summary Response")
st.write(summary_response)
else:
if not query:
st.warning("Please enter a query to generate a summary.")
if embeddings is None:
st.warning("Please upload a CSV file first.")