Akhil Koduri commited on
Commit
fc2e364
·
verified ·
1 Parent(s): 686e78d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -3
app.py CHANGED
@@ -1,7 +1,10 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import fitz # PyMuPDF
 
4
  from transformers import pipeline
 
 
5
 
6
  # Load pre-trained model and tokenizer from Hugging Face
7
  model_name = "google-bert/bert-base-uncased"
@@ -25,6 +28,11 @@ def classify_text(text):
25
  result = pipe(text)[0]
26
  label = labels.get(result['label'], result['label'])
27
  score = result['score']
 
 
 
 
 
28
  return label, score
29
 
30
  if st.button("Classify"):
@@ -40,10 +48,11 @@ if st.button("Classify"):
40
  # File upload section
41
  st.write("Upload a file for classification:")
42
 
43
- uploaded_file = st.file_uploader("Choose a file", type=["csv", "pdf"])
44
 
45
  if uploaded_file is not None:
46
- if uploaded_file.type == "text/csv":
 
47
  # Process CSV file
48
  df = pd.read_csv(uploaded_file)
49
  if 'text' not in df.columns:
@@ -53,7 +62,7 @@ if uploaded_file is not None:
53
  df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1])
54
  st.write(df)
55
 
56
- elif uploaded_file.type == "application/pdf":
57
  # Process PDF file
58
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
59
  text = ""
@@ -65,3 +74,34 @@ if uploaded_file is not None:
65
 
66
  st.write(f"**Predicted Class for PDF:** {label}")
67
  st.write(f"**Confidence:** {score:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import fitz # PyMuPDF
4
+ import docx
5
  from transformers import pipeline
6
+ from io import StringIO
7
+ import openpyxl
8
 
9
  # Load pre-trained model and tokenizer from Hugging Face
10
  model_name = "google-bert/bert-base-uncased"
 
28
  result = pipe(text)[0]
29
  label = labels.get(result['label'], result['label'])
30
  score = result['score']
31
+
32
+ # Adjust classification based on score
33
+ if score < 0.75:
34
+ label = "Negative"
35
+
36
  return label, score
37
 
38
  if st.button("Classify"):
 
48
  # File upload section
49
  st.write("Upload a file for classification:")
50
 
51
+ uploaded_file = st.file_uploader("Choose a file", type=["csv", "pdf", "txt", "doc", "docx", "xlsx"])
52
 
53
  if uploaded_file is not None:
54
+ file_type = uploaded_file.type
55
+ if file_type == "text/csv":
56
  # Process CSV file
57
  df = pd.read_csv(uploaded_file)
58
  if 'text' not in df.columns:
 
62
  df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1])
63
  st.write(df)
64
 
65
+ elif file_type == "application/pdf":
66
  # Process PDF file
67
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
68
  text = ""
 
74
 
75
  st.write(f"**Predicted Class for PDF:** {label}")
76
  st.write(f"**Confidence:** {score:.4f}")
77
+
78
+ elif file_type == "text/plain":
79
+ # Process TXT file
80
+ text = StringIO(uploaded_file.getvalue().decode("utf-8")).read()
81
+
82
+ # Perform classification
83
+ label, score = classify_text(text)
84
+
85
+ st.write(f"**Predicted Class for TXT:** {label}")
86
+ st.write(f"**Confidence:** {score:.4f}")
87
+
88
+ elif file_type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
89
+ # Process DOCX or DOC file
90
+ doc = docx.Document(uploaded_file)
91
+ text = "\n".join([para.text for para in doc.paragraphs])
92
+
93
+ # Perform classification
94
+ label, score = classify_text(text)
95
+
96
+ st.write(f"**Predicted Class for DOC/DOCX:** {label}")
97
+ st.write(f"**Confidence:** {score:.4f}")
98
+
99
+ elif file_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
100
+ # Process XLSX file
101
+ df = pd.read_excel(uploaded_file)
102
+ if 'text' not in df.columns:
103
+ st.write("The XLSX file must contain a 'text' column.")
104
+ else:
105
+ df['Prediction'] = df['text'].apply(lambda x: classify_text(x)[0])
106
+ df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1])
107
+ st.write(df)