File size: 4,899 Bytes
16a6662
0ffe0e0
0ce2567
6cbadc6
16a6662
 
81bc43b
0ce2567
21ccc58
0ce2567
026bcae
0ce2567
 
 
 
 
 
 
6cbadc6
0ce2567
 
 
0ffe0e0
81bc43b
 
0ffe0e0
0ce2567
6cbadc6
 
 
 
 
16a6662
4af7e0e
6cbadc6
 
16a6662
 
 
 
683c54c
 
 
 
6cbadc6
0ffe0e0
6cbadc6
 
 
 
 
 
0ffe0e0
6cbadc6
 
 
 
0ffe0e0
16a6662
4af7e0e
16a6662
 
 
 
683c54c
 
0ffe0e0
16a6662
4af7e0e
16a6662
 
 
 
683c54c
 
 
6cbadc6
 
16a6662
6cbadc6
0ffe0e0
 
6cbadc6
 
0ffe0e0
 
0ce2567
 
 
 
0ffe0e0
0ce2567
 
 
0ffe0e0
0ce2567
 
6cbadc6
0ce2567
 
 
 
0ffe0e0
 
 
 
0ce2567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ffe0e0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import streamlit as st
from PyPDF2 import PdfReader
import numpy as np
from groq import Groq
import faiss
import fitz
from io import BytesIO

# Function to set up Groq API client
def get_groq_client():
    api_key = os.getenv("groq_api")
    if not api_key:
        raise ValueError("Groq API key not found in environment variables.")
    return Groq(api_key=api_key)

groq_client = get_groq_client()

# Function to extract text from PDF
def extract_pdf_content(uploaded_file):
    pdf_stream = BytesIO(uploaded_file.read())  # Convert to file-like object
    doc = fitz.open(stream=pdf_stream, filetype="pdf")
    content = ""
    for page in doc:
        content += page.get_text()
    return content

# Function to split content into chunks
def chunk_text(text, chunk_size=500):
    words = text.split()
    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

# Function to compute embeddings using Groq's Llama3-70B-8192 model
def compute_embeddings(text_chunks):
    embeddings = []
    for chunk in text_chunks:
        response = groq_client.chat.completions.create(
            messages=[{"role": "user", "content": chunk}],
            model="llama3-70b-8192"
        )
        # Access the embedding content from the response
        embedding = response.choices[0].message.content
        embedding_array = np.fromstring(embedding, sep=",")  # Convert string to NumPy array
        embeddings.append(embedding_array)
    return np.array(embeddings)

# Function to build FAISS index
def build_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)  # L2 distance for similarity
    index.add(embeddings)
    return index

# Function to search in FAISS index
def search_faiss_index(index, query_embedding, text_chunks, top_k=3):
    distances, indices = index.search(query_embedding, top_k)
    return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])]

# Function to generate professional content using Groq's Llama3-70B-8192 model
def generate_professional_content_groq(topic):
    response = groq_client.chat.completions.create(
        messages=[{"role": "user", "content": f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications for electrical engineering students."}],
        model="llama3-70b-8192"
    )
    # Access content from the response
    return response.choices[0].message.content.strip()

# Function to compute query embedding using Groq's Llama3-70B-8192 model
def compute_query_embedding(query):
    response = groq_client.chat.completions.create(
        messages=[{"role": "user", "content": query}],
        model="llama3-70b-8192"
    )
    # Access embedding content and convert it to a NumPy array
    embedding = response.choices[0].message.content
    return np.fromstring(embedding, sep=",").reshape(1, -1)

# Streamlit app
st.title("Generative AI for Electrical Engineering Education with FAISS and Groq")
st.sidebar.header("AI-Based Tutor with Vector Search")

# File upload section
uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF)", type=["pdf"])
topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law)")

if uploaded_file:
    try:
        # Extract and process file content
        content = extract_pdf_content(uploaded_file)
        st.sidebar.success(f"{uploaded_file.name} uploaded successfully!")

        # Chunk and compute embeddings
        chunks = chunk_text(content)
        embeddings = compute_embeddings(chunks)

        # Build FAISS index
        index = build_faiss_index(embeddings)

        st.write("**File Processed and Indexed for Search**")
        st.write(f"Total chunks created: {len(chunks)}")
    except Exception as e:
        st.error(f"Error processing file: {e}")

# Generate study material
if st.button("Generate Study Material"):
    if topic:
        try:
            st.header(f"Study Material: {topic}")

            # Compute query embedding
            query_embedding = compute_query_embedding(topic)

            # Search FAISS index
            if uploaded_file:
                results = search_faiss_index(index, query_embedding, chunks, top_k=3)
                st.write("**Relevant Content from Uploaded File:**")
                for result, distance in results:
                    st.write(f"- {result} (Similarity: {distance:.2f})")
            else:
                st.warning("No file uploaded. Generating AI-based content instead.")

            # Generate content using Groq's Llama3-70B-8192 model
            ai_content = generate_professional_content_groq(topic)
            st.write("**AI-Generated Content (Groq - Llama3-70B-8192):**")
            st.write(ai_content)
        except Exception as e:
            st.error(f"Error generating content: {e}")
    else:
        st.warning("Please enter a topic!")