Spaces:

Ak28Akhil
/

TextClassification

Sleeping

App Files Files Community

Akhil Koduri commited on Jun 22, 2024

Commit

fc2e364

verified ·

1 Parent(s): 686e78d

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -3

app.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import streamlit as st
 import pandas as pd
 import fitz  # PyMuPDF
 from transformers import pipeline
 # Load pre-trained model and tokenizer from Hugging Face
 model_name = "google-bert/bert-base-uncased"
@@ -25,6 +28,11 @@ def classify_text(text):
     result = pipe(text)[0]
     label = labels.get(result['label'], result['label'])
     score = result['score']
     return label, score
 if st.button("Classify"):
@@ -40,10 +48,11 @@ if st.button("Classify"):
 # File upload section
 st.write("Upload a file for classification:")
-uploaded_file = st.file_uploader("Choose a file", type=["csv", "pdf"])
 if uploaded_file is not None:
-    if uploaded_file.type == "text/csv":
         # Process CSV file
         df = pd.read_csv(uploaded_file)
         if 'text' not in df.columns:
@@ -53,7 +62,7 @@ if uploaded_file is not None:
             df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1])
             st.write(df)
-    elif uploaded_file.type == "application/pdf":
         # Process PDF file
         with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
             text = ""
@@ -65,3 +74,34 @@ if uploaded_file is not None:
             st.write(f"**Predicted Class for PDF:** {label}")
             st.write(f"**Confidence:** {score:.4f}")

 import streamlit as st
 import pandas as pd
 import fitz  # PyMuPDF
+import docx
 from transformers import pipeline
+from io import StringIO
+import openpyxl
 # Load pre-trained model and tokenizer from Hugging Face
 model_name = "google-bert/bert-base-uncased"
     result = pipe(text)[0]
     label = labels.get(result['label'], result['label'])
     score = result['score']
+    # Adjust classification based on score
+    if score < 0.75:
+        label = "Negative"
     return label, score
 if st.button("Classify"):
 # File upload section
 st.write("Upload a file for classification:")
+uploaded_file = st.file_uploader("Choose a file", type=["csv", "pdf", "txt", "doc", "docx", "xlsx"])
 if uploaded_file is not None:
+    file_type = uploaded_file.type
+    if file_type == "text/csv":
         # Process CSV file
         df = pd.read_csv(uploaded_file)
         if 'text' not in df.columns:
             df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1])
             st.write(df)
+    elif file_type == "application/pdf":
         # Process PDF file
         with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
             text = ""
             st.write(f"**Predicted Class for PDF:** {label}")
             st.write(f"**Confidence:** {score:.4f}")
+    elif file_type == "text/plain":
+        # Process TXT file
+        text = StringIO(uploaded_file.getvalue().decode("utf-8")).read()
+        # Perform classification
+        label, score = classify_text(text)
+        st.write(f"**Predicted Class for TXT:** {label}")
+        st.write(f"**Confidence:** {score:.4f}")
+    elif file_type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
+        # Process DOCX or DOC file
+        doc = docx.Document(uploaded_file)
+        text = "\n".join([para.text for para in doc.paragraphs])
+        # Perform classification
+        label, score = classify_text(text)
+        st.write(f"**Predicted Class for DOC/DOCX:** {label}")
+        st.write(f"**Confidence:** {score:.4f}")
+    elif file_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+        # Process XLSX file
+        df = pd.read_excel(uploaded_file)
+        if 'text' not in df.columns:
+            st.write("The XLSX file must contain a 'text' column.")
+        else:
+            df['Prediction'] = df['text'].apply(lambda x: classify_text(x)[0])
+            df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1])
+            st.write(df)