Spaces:
Runtime error
Runtime error
import os | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
import numpy as np | |
from groq import Groq | |
import faiss | |
import fitz | |
from io import BytesIO | |
# Function to set up Groq API client | |
def get_groq_client(): | |
api_key = os.getenv("groq_api") | |
if not api_key: | |
raise ValueError("Groq API key not found in environment variables.") | |
return Groq(api_key=api_key) | |
groq_client = get_groq_client() | |
# Function to extract text from PDF | |
def extract_pdf_content(uploaded_file): | |
pdf_stream = BytesIO(uploaded_file.read()) # Convert to file-like object | |
doc = fitz.open(stream=pdf_stream, filetype="pdf") | |
content = "" | |
for page in doc: | |
content += page.get_text() | |
return content | |
# Function to split content into chunks | |
def chunk_text(text, chunk_size=500): | |
words = text.split() | |
return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] | |
# Function to compute embeddings using Groq's Llama3-70B-8192 model | |
def compute_embeddings(text_chunks): | |
embeddings = [] | |
for chunk in text_chunks: | |
response = groq_client.chat.completions.create( | |
messages=[{"role": "user", "content": chunk}], | |
model="llama3-70b-8192" | |
) | |
# Access the embedding content from the response | |
embedding = response.choices[0].message.content | |
embedding_array = np.fromstring(embedding, sep=",") # Convert string to NumPy array | |
embeddings.append(embedding_array) | |
return np.array(embeddings) | |
# Function to build FAISS index | |
def build_faiss_index(embeddings): | |
dimension = embeddings.shape[1] | |
index = faiss.IndexFlatL2(dimension) # L2 distance for similarity | |
index.add(embeddings) | |
return index | |
# Function to search in FAISS index | |
def search_faiss_index(index, query_embedding, text_chunks, top_k=3): | |
distances, indices = index.search(query_embedding, top_k) | |
return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])] | |
# Function to generate professional content using Groq's Llama3-70B-8192 model | |
def generate_professional_content_groq(topic): | |
response = groq_client.chat.completions.create( | |
messages=[{"role": "user", "content": f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications for electrical engineering students."}], | |
model="llama3-70b-8192" | |
) | |
# Access content from the response | |
return response.choices[0].message.content.strip() | |
# Function to compute query embedding using Groq's Llama3-70B-8192 model | |
def compute_query_embedding(query): | |
response = groq_client.chat.completions.create( | |
messages=[{"role": "user", "content": query}], | |
model="llama3-70b-8192" | |
) | |
# Access embedding content and convert it to a NumPy array | |
embedding = response.choices[0].message.content | |
return np.fromstring(embedding, sep=",").reshape(1, -1) | |
# Streamlit app | |
st.title("Generative AI for Electrical Engineering Education with FAISS and Groq") | |
st.sidebar.header("AI-Based Tutor with Vector Search") | |
# File upload section | |
uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF)", type=["pdf"]) | |
topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law)") | |
if uploaded_file: | |
try: | |
# Extract and process file content | |
content = extract_pdf_content(uploaded_file) | |
st.sidebar.success(f"{uploaded_file.name} uploaded successfully!") | |
# Chunk and compute embeddings | |
chunks = chunk_text(content) | |
embeddings = compute_embeddings(chunks) | |
# Build FAISS index | |
index = build_faiss_index(embeddings) | |
st.write("**File Processed and Indexed for Search**") | |
st.write(f"Total chunks created: {len(chunks)}") | |
except Exception as e: | |
st.error(f"Error processing file: {e}") | |
# Generate study material | |
if st.button("Generate Study Material"): | |
if topic: | |
try: | |
st.header(f"Study Material: {topic}") | |
# Compute query embedding | |
query_embedding = compute_query_embedding(topic) | |
# Search FAISS index | |
if uploaded_file: | |
results = search_faiss_index(index, query_embedding, chunks, top_k=3) | |
st.write("**Relevant Content from Uploaded File:**") | |
for result, distance in results: | |
st.write(f"- {result} (Similarity: {distance:.2f})") | |
else: | |
st.warning("No file uploaded. Generating AI-based content instead.") | |
# Generate content using Groq's Llama3-70B-8192 model | |
ai_content = generate_professional_content_groq(topic) | |
st.write("**AI-Generated Content (Groq - Llama3-70B-8192):**") | |
st.write(ai_content) | |
except Exception as e: | |
st.error(f"Error generating content: {e}") | |
else: | |
st.warning("Please enter a topic!") | |