engrphoenix's picture
Update app.py
16a6662 verified
raw
history blame
4.07 kB
import os
import streamlit as st
from PyPDF2 import PdfReader
import numpy as np
from groq import Groq
import faiss
# Set up Groq API client
groq_client = Groq(api_key="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")
# Function to extract text from PDF
def extract_pdf_content(pdf_file):
reader = PdfReader(pdf_file)
content = ""
for page in reader.pages:
content += page.extract_text()
return content
# Function to split content into chunks
def chunk_text(text, chunk_size=500):
words = text.split()
return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
# Function to compute embeddings using Groq's Llama3-70B-8192 model
def compute_embeddings(text_chunks):
embeddings = []
for chunk in text_chunks:
response = groq_client.chat.completions.create(
messages=[{"role": "user", "content": chunk}],
model="llama3-70b-8192"
)
embeddings.append(np.array(response['choices'][0]['message']['content']))
return np.array(embeddings)
# Function to build FAISS index
def build_faiss_index(embeddings):
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension) # L2 distance for similarity
index.add(embeddings)
return index
# Function to search in FAISS index
def search_faiss_index(index, query_embedding, text_chunks, top_k=3):
distances, indices = index.search(query_embedding, top_k)
return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])]
# Function to generate professional content using Groq's Llama3-70B-8192 model
def generate_professional_content_groq(topic):
response = groq_client.chat.completions.create(
messages=[{"role": "user", "content": f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications for electrical engineering students."}],
model="llama3-70b-8192"
)
return response['choices'][0]['message']['content'].strip()
# Function to compute query embedding using Groq's Llama3-70B-8192 model
def compute_query_embedding(query):
response = groq_client.chat.completions.create(
messages=[{"role": "user", "content": query}],
model="llama3-70b-8192"
)
return np.array(response['choices'][0]['message']['content']).reshape(1, -1)
# Streamlit app
st.title("Generative AI for Electrical Engineering Education with FAISS and Groq")
st.sidebar.header("AI-Based Tutor with Vector Search")
# File upload section
uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF)", type=["pdf"])
topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law)")
if uploaded_file:
# Extract and process file content
content = extract_pdf_content(uploaded_file)
st.sidebar.success(f"{uploaded_file.name} uploaded successfully!")
# Chunk and compute embeddings
chunks = chunk_text(content)
embeddings = compute_embeddings(chunks)
# Build FAISS index
index = build_faiss_index(embeddings)
st.write("**File Processed and Indexed for Search**")
st.write(f"Total chunks created: {len(chunks)}")
# Generate study material
if st.button("Generate Study Material"):
if topic:
st.header(f"Study Material: {topic}")
# Compute query embedding
query_embedding = compute_query_embedding(topic)
# Search FAISS index
if uploaded_file:
results = search_faiss_index(index, query_embedding, chunks, top_k=3)
st.write("**Relevant Content from Uploaded File:**")
for result, distance in results:
st.write(f"- {result} (Similarity: {distance:.2f})")
else:
st.warning("No file uploaded. Generating AI-based content instead.")
# Generate content using Groq's Llama3-70B-8192 model
ai_content = generate_professional_content_groq(topic)
st.write("**AI-Generated Content (Groq - Llama3-70B-8192):**")
st.write(ai_content)
else:
st.warning("Please enter a topic!")