Spaces:

varl42
/

assesment

Runtime error

varl42 commited on Apr 9, 2024

Commit

b4d1efc

verified ·

1 Parent(s): 78137c7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,29 +4,27 @@ from nltk.tokenize import sent_tokenize
 import chromadb
 from chromadb.utils import embedding_functions
 # Load the email dataset
-# emails = pd.read_csv("/content/drive/MyDrive/Clean/cleaned_data.csv")
 client = chromadb.Client()
-client = chromadb.PersistentClient(path="blob/main/chroma.sqlite3")
-# Load the ChromaDB collection
-collection = client.get_collection("enron_emails")
 # Create a ChromaDB client
-# client = chromadb.Client()
-# collection = client.create_collection("enron_emails")
 # Add documents and IDs to the collection, using ChromaDB's built-in text encoding
-# collection.add(
-#    documents=emails["body"].tolist()[:1000],
-#    ids=emails["file"].tolist()[:1000],
-#    metadatas=[{"source": "enron_emails"}] * len(emails[:1000]),  # Optional metadata
 # Load model directly
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 # Load the trained model
@@ -35,6 +33,14 @@ model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
 # Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
 def query_collection(query_text):
     try:
         # Perform the query
@@ -81,6 +87,8 @@ def summarize_from_query(_, query_results):
         return query_results, f"An error occurred while summarizing: {e}"
 # Setup the Gradio interface
 with gr.Blocks() as app:
     with gr.Row():

 import chromadb
 from chromadb.utils import embedding_functions
+#######################################################
 # Load the email dataset
+emails = pd.read_csv("./cleaned_data.csv")
 client = chromadb.Client()
+client = chromadb.PersistentClient(path="./content")
 # Create a ChromaDB client
+client = chromadb.Client()
+collection = client.create_collection("enron_emails")
 # Add documents and IDs to the collection, using ChromaDB's built-in text encoding
+ collection.add(
+    documents=emails["body"].tolist()[:1000],
+    ids=emails["file"].tolist()[:1000],
+    metadatas=[{"source": "enron_emails"}] * len(emails[:1000]),  # Optional metadata
+####################################################
 # Load model directly
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 # Load the trained model
 # Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+##################################################
+# Load the ChromaDB collection
+client = chromadb.Client()
+collection = client.get_collection("enron_emails")
+##################################################
 def query_collection(query_text):
     try:
         # Perform the query
         return query_results, f"An error occurred while summarizing: {e}"
+###################################################
 # Setup the Gradio interface
 with gr.Blocks() as app:
     with gr.Row():