Spaces:
Sleeping
Sleeping
import streamlit as st | |
from groq import Groq | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import pandas as pd | |
# Initialize Groq client | |
api_key = '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' # Replace with your API key | |
client = Groq(api_key=api_key) | |
# Load embeddings model | |
embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
# Streamlit app layout | |
st.title("Movie Analysis with RAG and Groq") | |
st.write("Enter a query to get a summary-based analysis of movies.") | |
# Add an image | |
image_url = 'https://huggingface.co/spaces/Izza-shahzad-13/Movieanalysis/blob/main/movie.jpg' # Update with your image URL | |
st.image(image_url, caption='Movie Analysis Dashboard', use_column_width=True) | |
# Load the movie dataset | |
uploaded_file = st.file_uploader("Upload your movie dataset CSV file", type="csv") | |
# Initialize the DataFrame and embeddings variables | |
df = None | |
embeddings = None | |
# Check if a file has been uploaded before processing it | |
if uploaded_file is not None: | |
df = pd.read_csv(uploaded_file) | |
# Ensure the DataFrame contains the necessary column | |
if 'overview' in df.columns: | |
# Generate embeddings for the "overview" column | |
embeddings = embedder.encode(df['overview'].fillna("").values) | |
else: | |
st.error("The uploaded CSV does not contain an 'overview' column.") | |
# Define functions for movie retrieval and summary generation | |
def retrieve_movies_for_summary(query, top_n=5): | |
if embeddings is not None: # Check if embeddings have been generated | |
# Retrieve movies that are most similar to the query | |
query_embedding = embedder.encode([query]) | |
similarities = cosine_similarity(query_embedding, embeddings) | |
indices = similarities[0].argsort()[-top_n:][::-1] | |
return df.iloc[indices] | |
else: | |
return pd.DataFrame() # Return an empty DataFrame if embeddings are not available | |
def generate_summary_response(query): | |
# Retrieve relevant movies for the query | |
relevant_movies = retrieve_movies_for_summary(query) | |
# Check if relevant movies are found | |
if relevant_movies.empty: | |
return "No relevant movies found for the given query." | |
# Compile a context summary for each retrieved movie | |
movie_context = "\n".join( | |
f"Title: {row['title']}\nOverview: {row['overview']}\nGenres: {row['genres']}\n" | |
for _, row in relevant_movies.iterrows() | |
) | |
# Generate a summary response using Groq API | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": query}, | |
{"role": "system", "content": f"Context Summary: {movie_context}"}], | |
model="llama3-8b-8192" | |
) | |
return chat_completion.choices[0].message.content | |
# User input | |
query = st.text_input("Enter your query:") | |
if st.button("Generate Summary"): | |
if query and embeddings is not None: # Check if embeddings have been generated | |
# Generate the summary response | |
with st.spinner("Generating summary..."): | |
summary_response = generate_summary_response(query) | |
st.write("### Summary Response") | |
st.write(summary_response) | |
else: | |
if not query: | |
st.warning("Please enter a query to generate a summary.") | |
if embeddings is None: | |
st.warning("Please upload a CSV file first.") | |