engrphoenix commited on
Commit
16a6662
·
verified ·
1 Parent(s): 25e9a97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -35
app.py CHANGED
@@ -1,24 +1,12 @@
 
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
- from transformers import pipeline, AutoTokenizer, AutoModel
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- import faiss
6
  import numpy as np
 
 
7
 
8
- # Load the Hugging Face model for text generation
9
- @st.cache_resource
10
- def load_text_generator():
11
- return pipeline("text2text-generation", model="google/flan-t5-base")
12
-
13
- # Load the Hugging Face model for embeddings
14
- @st.cache_resource
15
- def load_embedding_model():
16
- tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
17
- model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
18
- return tokenizer, model
19
-
20
- text_generator = load_text_generator()
21
- embedding_tokenizer, embedding_model = load_embedding_model()
22
 
23
  # Function to extract text from PDF
24
  def extract_pdf_content(pdf_file):
@@ -33,13 +21,15 @@ def chunk_text(text, chunk_size=500):
33
  words = text.split()
34
  return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
35
 
36
- # Function to compute embeddings
37
  def compute_embeddings(text_chunks):
38
  embeddings = []
39
  for chunk in text_chunks:
40
- inputs = embedding_tokenizer(chunk, return_tensors="pt", truncation=True, padding=True)
41
- outputs = embedding_model(**inputs)
42
- embeddings.append(outputs.pooler_output.detach().numpy()[0])
 
 
43
  return np.array(embeddings)
44
 
45
  # Function to build FAISS index
@@ -54,20 +44,24 @@ def search_faiss_index(index, query_embedding, text_chunks, top_k=3):
54
  distances, indices = index.search(query_embedding, top_k)
55
  return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])]
56
 
57
- # Function to generate structured content
58
- def generate_professional_content(topic):
59
- prompt = f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications."
60
- response = text_generator(prompt, max_length=300, num_return_sequences=1)
61
- return response[0]['generated_text']
 
 
62
 
63
- # Function to compute query embedding
64
  def compute_query_embedding(query):
65
- inputs = embedding_tokenizer(query, return_tensors="pt", truncation=True, padding=True)
66
- outputs = embedding_model(**inputs)
67
- return outputs.pooler_output.detach().numpy()
 
 
68
 
69
  # Streamlit app
70
- st.title("Generative AI for Electrical Engineering Education with FAISS")
71
  st.sidebar.header("AI-Based Tutor with Vector Search")
72
 
73
  # File upload section
@@ -106,10 +100,9 @@ if st.button("Generate Study Material"):
106
  else:
107
  st.warning("No file uploaded. Generating AI-based content instead.")
108
 
109
- # Generate AI content
110
- ai_content = generate_professional_content(topic)
111
- st.write("**AI-Generated Content:**")
112
  st.write(ai_content)
113
  else:
114
  st.warning("Please enter a topic!")
115
-
 
1
+ import os
2
  import streamlit as st
3
  from PyPDF2 import PdfReader
 
 
 
4
  import numpy as np
5
+ from groq import Groq
6
+ import faiss
7
 
8
+ # Set up Groq API client
9
+ groq_client = Groq(api_key="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")
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Function to extract text from PDF
12
  def extract_pdf_content(pdf_file):
 
21
  words = text.split()
22
  return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
23
 
24
+ # Function to compute embeddings using Groq's Llama3-70B-8192 model
25
  def compute_embeddings(text_chunks):
26
  embeddings = []
27
  for chunk in text_chunks:
28
+ response = groq_client.chat.completions.create(
29
+ messages=[{"role": "user", "content": chunk}],
30
+ model="llama3-70b-8192"
31
+ )
32
+ embeddings.append(np.array(response['choices'][0]['message']['content']))
33
  return np.array(embeddings)
34
 
35
  # Function to build FAISS index
 
44
  distances, indices = index.search(query_embedding, top_k)
45
  return [(text_chunks[idx], distances[0][i]) for i, idx in enumerate(indices[0])]
46
 
47
+ # Function to generate professional content using Groq's Llama3-70B-8192 model
48
+ def generate_professional_content_groq(topic):
49
+ response = groq_client.chat.completions.create(
50
+ messages=[{"role": "user", "content": f"Explain '{topic}' in bullet points, highlighting key concepts, examples, and applications for electrical engineering students."}],
51
+ model="llama3-70b-8192"
52
+ )
53
+ return response['choices'][0]['message']['content'].strip()
54
 
55
+ # Function to compute query embedding using Groq's Llama3-70B-8192 model
56
  def compute_query_embedding(query):
57
+ response = groq_client.chat.completions.create(
58
+ messages=[{"role": "user", "content": query}],
59
+ model="llama3-70b-8192"
60
+ )
61
+ return np.array(response['choices'][0]['message']['content']).reshape(1, -1)
62
 
63
  # Streamlit app
64
+ st.title("Generative AI for Electrical Engineering Education with FAISS and Groq")
65
  st.sidebar.header("AI-Based Tutor with Vector Search")
66
 
67
  # File upload section
 
100
  else:
101
  st.warning("No file uploaded. Generating AI-based content instead.")
102
 
103
+ # Generate content using Groq's Llama3-70B-8192 model
104
+ ai_content = generate_professional_content_groq(topic)
105
+ st.write("**AI-Generated Content (Groq - Llama3-70B-8192):**")
106
  st.write(ai_content)
107
  else:
108
  st.warning("Please enter a topic!")