import os import streamlit as st from PyPDF2 import PdfReader import numpy as np from groq import Groq import faiss import fitz from io import BytesIO # Function to set up Groq API client def get_groq_client(): api_key = os.getenv("groq_api") if not api_key: raise ValueError("Groq API key not found in environment variables.") return Groq(api_key=api_key) groq_client = get_groq_client() # Function to extract text from PDF def extract_pdf_content(uploaded_file): pdf_stream = BytesIO(uploaded_file.read()) # Convert to file-like object doc = fitz.open(stream=pdf_stream, filetype="pdf") content = "" for page in doc: content += page.get_text() return content # Function to split content into chunks def chunk_text(text, chunk_size=500): words = text.split() return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] # Function to compute embeddings using Groq's Llama3-70B-8192 model def compute_embeddings(text_chunks): embeddings = [] for chunk in text_chunks: response = groq_client.chat.completions.create( messages=[{"role": "user", "content": chunk}], model="llama3-70b-8192" ) # Access the embedding content from the response embedding = response.choices[0].message.content embedding_array = np.fromstring(embedding, sep=",") # Convert string to NumPy array embeddings.append(embedding_array) return np.array(embeddings) # Function to build FAISS index def build_faiss_index(embeddings): dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) # L2 distance for similarity index.add(embeddings) return index # Function to search in FAISS index def search_faiss_index(index, query_embedding, text_chunks, top_k=3): distances, indices = index.search(query_embedding, top_k) return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])] # Function to generate professional content using Groq's Llama3-70B-8192 model def generate_professional_content_groq(topic): response = groq_client.chat.completions.create( messages=[{"role": "user", "content": f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications for electrical engineering students."}], model="llama3-70b-8192" ) # Access content from the response return response.choices[0].message.content.strip() # Function to compute query embedding using Groq's Llama3-70B-8192 model def compute_query_embedding(query): response = groq_client.chat.completions.create( messages=[{"role": "user", "content": query}], model="llama3-70b-8192" ) # Access embedding content and convert it to a NumPy array embedding = response.choices[0].message.content return np.fromstring(embedding, sep=",").reshape(1, -1) # Streamlit app st.title("Generative AI for Electrical Engineering Education with FAISS and Groq") st.sidebar.header("AI-Based Tutor with Vector Search") # File upload section uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF)", type=["pdf"]) topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law)") if uploaded_file: try: # Extract and process file content content = extract_pdf_content(uploaded_file) st.sidebar.success(f"{uploaded_file.name} uploaded successfully!") # Chunk and compute embeddings chunks = chunk_text(content) embeddings = compute_embeddings(chunks) # Build FAISS index index = build_faiss_index(embeddings) st.write("**File Processed and Indexed for Search**") st.write(f"Total chunks created: {len(chunks)}") except Exception as e: st.error(f"Error processing file: {e}") # Generate study material if st.button("Generate Study Material"): if topic: try: st.header(f"Study Material: {topic}") # Compute query embedding query_embedding = compute_query_embedding(topic) # Search FAISS index if uploaded_file: results = search_faiss_index(index, query_embedding, chunks, top_k=3) st.write("**Relevant Content from Uploaded File:**") for result, distance in results: st.write(f"- {result} (Similarity: {distance:.2f})") else: st.warning("No file uploaded. Generating AI-based content instead.") # Generate content using Groq's Llama3-70B-8192 model ai_content = generate_professional_content_groq(topic) st.write("**AI-Generated Content (Groq - Llama3-70B-8192):**") st.write(ai_content) except Exception as e: st.error(f"Error generating content: {e}") else: st.warning("Please enter a topic!")