File size: 4,236 Bytes
0ffe0e0
 
6cbadc6
 
 
 
0ffe0e0
6cbadc6
0ffe0e0
 
6cbadc6
 
 
 
 
 
 
 
0ffe0e0
 
6cbadc6
0ffe0e0
6cbadc6
0ffe0e0
 
 
 
 
 
 
6cbadc6
 
 
 
 
 
 
 
 
 
 
 
 
0ffe0e0
6cbadc6
 
 
 
 
 
0ffe0e0
6cbadc6
 
 
 
0ffe0e0
6cbadc6
001485a
6cbadc6
0ffe0e0
 
 
6cbadc6
 
 
 
 
 
 
 
 
0ffe0e0
 
6cbadc6
 
0ffe0e0
 
6cbadc6
 
 
0ffe0e0
6cbadc6
 
 
0ffe0e0
6cbadc6
 
 
 
 
0ffe0e0
 
 
 
 
6cbadc6
 
 
 
 
 
 
 
 
 
48fb909
6cbadc6
 
 
 
 
 
0ffe0e0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline, AutoTokenizer, AutoModel
from sklearn.feature_extraction.text import TfidfVectorizer
import faiss
import numpy as np

# Load the Hugging Face model for text generation
@st.cache_resource
def load_text_generator():
    return pipeline("text2text-generation", model="google/flan-t5-base")

# Load the Hugging Face model for embeddings
@st.cache_resource
def load_embedding_model():
    tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
    model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
    return tokenizer, model

text_generator = load_text_generator()
embedding_tokenizer, embedding_model = load_embedding_model()

# Function to extract text from PDF
def extract_pdf_content(pdf_file):
    reader = PdfReader(pdf_file)
    content = ""
    for page in reader.pages:
        content += page.extract_text()
    return content

# Function to split content into chunks
def chunk_text(text, chunk_size=500):
    words = text.split()
    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

# Function to compute embeddings
def compute_embeddings(text_chunks):
    embeddings = []
    for chunk in text_chunks:
        inputs = embedding_tokenizer(chunk, return_tensors="pt", truncation=True, padding=True)
        outputs = embedding_model(**inputs)
        embeddings.append(outputs.pooler_output.detach().numpy()[0])
    return np.array(embeddings)

# Function to build FAISS index
def build_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)  # L2 distance for similarity
    index.add(embeddings)
    return index

# Function to search in FAISS index
def search_faiss_index(index, query_embedding, text_chunks, top_k=3):
    distances, indices = index.search(query_embedding, top_k)
    return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])]

# Function to generate structured content
def generate_professional_content(topic):
    prompt = f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications."
    response = text_generator(prompt, max_length=300, num_return_sequences=1)
    return response[0]['generated_text']

# Function to compute query embedding
def compute_query_embedding(query):
    inputs = embedding_tokenizer(query, return_tensors="pt", truncation=True, padding=True)
    outputs = embedding_model(**inputs)
    return outputs.pooler_output.detach().numpy()

# Streamlit app
st.title("Generative AI for Electrical Engineering Education with FAISS")
st.sidebar.header("AI-Based Tutor with Vector Search")

# File upload section
uploaded_file = st.sidebar.file_uploader("Upload Study Material (PDF)", type=["pdf"])
topic = st.sidebar.text_input("Enter a topic (e.g., Newton's Third Law)")

if uploaded_file:
    # Extract and process file content
    content = extract_pdf_content(uploaded_file)
    st.sidebar.success(f"{uploaded_file.name} uploaded successfully!")

    # Chunk and compute embeddings
    chunks = chunk_text(content)
    embeddings = compute_embeddings(chunks)

    # Build FAISS index
    index = build_faiss_index(embeddings)

    st.write("**File Processed and Indexed for Search**")
    st.write(f"Total chunks created: {len(chunks)}")

# Generate study material
if st.button("Generate Study Material"):
    if topic:
        st.header(f"Study Material: {topic}")
        
        # Compute query embedding
        query_embedding = compute_query_embedding(topic)

        # Search FAISS index
        if uploaded_file:
            results = search_faiss_index(index, query_embedding, chunks, top_k=3)
            st.write("**Relevant Content from Uploaded File:**")
            for result, distance in results:
                st.write(f"- {result} (Similarity: {distance:.2f})")
        else:
            st.warning("No file uploaded. Generating AI-based content instead.")
        
        # Generate AI content
        ai_content = generate_professional_content(topic)
        st.write("**AI-Generated Content:**")
        st.write(ai_content)
    else:
        st.warning("Please enter a topic!")