varl42 commited on
Commit
b4d1efc
·
verified ·
1 Parent(s): 78137c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -4,29 +4,27 @@ from nltk.tokenize import sent_tokenize
4
  import chromadb
5
  from chromadb.utils import embedding_functions
6
 
 
7
 
8
  # Load the email dataset
9
- # emails = pd.read_csv("/content/drive/MyDrive/Clean/cleaned_data.csv")
10
 
11
 
12
  client = chromadb.Client()
13
- client = chromadb.PersistentClient(path="blob/main/chroma.sqlite3")
14
-
15
- # Load the ChromaDB collection
16
- collection = client.get_collection("enron_emails")
17
 
18
  # Create a ChromaDB client
19
- # client = chromadb.Client()
20
- # collection = client.create_collection("enron_emails")
21
 
22
  # Add documents and IDs to the collection, using ChromaDB's built-in text encoding
23
- # collection.add(
24
- # documents=emails["body"].tolist()[:1000],
25
- # ids=emails["file"].tolist()[:1000],
26
- # metadatas=[{"source": "enron_emails"}] * len(emails[:1000]), # Optional metadata
27
-
28
 
29
 
 
30
  # Load model directly
31
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
32
  # Load the trained model
@@ -35,6 +33,14 @@ model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
35
  # Load the tokenizer
36
  tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
37
 
 
 
 
 
 
 
 
 
38
  def query_collection(query_text):
39
  try:
40
  # Perform the query
@@ -81,6 +87,8 @@ def summarize_from_query(_, query_results):
81
  return query_results, f"An error occurred while summarizing: {e}"
82
 
83
 
 
 
84
  # Setup the Gradio interface
85
  with gr.Blocks() as app:
86
  with gr.Row():
 
4
  import chromadb
5
  from chromadb.utils import embedding_functions
6
 
7
+ #######################################################
8
 
9
  # Load the email dataset
10
+ emails = pd.read_csv("./cleaned_data.csv")
11
 
12
 
13
  client = chromadb.Client()
14
+ client = chromadb.PersistentClient(path="./content")
 
 
 
15
 
16
  # Create a ChromaDB client
17
+ client = chromadb.Client()
18
+ collection = client.create_collection("enron_emails")
19
 
20
  # Add documents and IDs to the collection, using ChromaDB's built-in text encoding
21
+ collection.add(
22
+ documents=emails["body"].tolist()[:1000],
23
+ ids=emails["file"].tolist()[:1000],
24
+ metadatas=[{"source": "enron_emails"}] * len(emails[:1000]), # Optional metadata
 
25
 
26
 
27
+ ####################################################
28
  # Load model directly
29
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
30
  # Load the trained model
 
33
  # Load the tokenizer
34
  tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
35
 
36
+ ##################################################
37
+
38
+ # Load the ChromaDB collection
39
+ client = chromadb.Client()
40
+ collection = client.get_collection("enron_emails")
41
+
42
+ ##################################################
43
+
44
  def query_collection(query_text):
45
  try:
46
  # Perform the query
 
87
  return query_results, f"An error occurred while summarizing: {e}"
88
 
89
 
90
+ ###################################################
91
+
92
  # Setup the Gradio interface
93
  with gr.Blocks() as app:
94
  with gr.Row():