Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import fitz # PyMuPDF | |
from transformers import pipeline | |
# Load pre-trained model and tokenizer from Hugging Face | |
model_name = "google-bert/bert-base-uncased" | |
pipe = pipeline("text-classification", model=model_name) | |
# Custom labels for your classification task | |
labels = { | |
"LABEL_0": "Negative", | |
"LABEL_1": "Positive" | |
} | |
# Streamlit app | |
st.title("BERT Text Classification") | |
st.write("This app uses a pre-trained BERT model to classify text into positive or negative sentiment.") | |
# Input text area | |
input_text = st.text_area("Enter text to classify") | |
def classify_text(text): | |
result = pipe(text)[0] | |
label = labels.get(result['label'], result['label']) | |
score = result['score'] | |
# Adjust classification based on score | |
if score < 0.75: | |
label = "Negative" | |
return label, score | |
if st.button("Classify"): | |
if input_text: | |
# Perform classification | |
label, score = classify_text(input_text) | |
st.write(f"**Predicted Class:** {label}") | |
st.write(f"**Confidence:** {score:.4f}") | |
else: | |
st.write("Please enter some text to classify.") | |
# File upload section | |
st.write("Upload a file for classification:") | |
uploaded_file = st.file_uploader("Choose a file", type=["csv", "pdf"]) | |
if uploaded_file is not None: | |
try: | |
if uploaded_file.type == "text/csv": | |
# Process CSV file | |
df = pd.read_csv(uploaded_file, encoding='utf-8') | |
if 'text' not in df.columns: | |
st.write("The CSV file must contain a 'text' column.") | |
else: | |
df['Prediction'] = df['text'].apply(lambda x: classify_text(x)[0]) | |
df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1]) | |
st.write(df) | |
elif uploaded_file.type == "application/pdf": | |
# Process PDF file | |
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc: | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
# Perform classification | |
label, score = classify_text(text) | |
st.write(f"**Predicted Class for PDF:** {label}") | |
st.write(f"**Confidence:** {score:.4f}") | |
except Exception as e: | |
st.error(f"Error: {e}") | |