Spaces:

mkoot007
/

Detector

Runtime error

App Files Files Community

mkoot007 commited on Nov 15, 2023

Commit

be16c65

1 Parent(s): 6b717c4

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -17

app.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import pandas as pd
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import re
 import torch
 # Load the pre-trained model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
-model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")
-def analyze_text(text, confidence_threshold=0.6):
     # Preprocess the text
-    text = re.sub(r"[^\w\s]", "", text)
     text = text.lower()
     # Encode the text
@@ -19,14 +17,12 @@ def analyze_text(text, confidence_threshold=0.6):
     # Classify the text
     with torch.no_grad():
         output = model(**encoded_text)
-        logits = output.logits
-        predictions = logits.argmax(-1).item()
-        confidence = torch.softmax(logits, dim=1)[0][predictions].item()
-    if confidence > confidence_threshold:
-        if predictions == 0:
-            return "Job Interview Related"
-    return "Not Job Interview Related"
 def count_job_related_messages(data):
     job_related_count = 0
@@ -34,7 +30,7 @@ def count_job_related_messages(data):
     for message in data["message"]:
         result = analyze_text(message)
-        if result == "Job Interview Related":
             job_related_count += 1
         else:
             not_job_related_count += 1
@@ -42,7 +38,7 @@ def count_job_related_messages(data):
     return job_related_count, not_job_related_count
 # Streamlit application
-st.title("Job Interview Message Analyzer")
 uploaded_file = st.file_uploader("Upload CSV file")
 user_input = st.text_input("Enter text")
@@ -57,14 +53,14 @@ if uploaded_file:
         result = analyze_text(message)
         results.append(result)
-    data["Job Interview Related"] = results
     # Count job-related messages
     job_related_count, not_job_related_count = count_job_related_messages(data)
     st.dataframe(data)
-    st.write(f"Job Interview Related Messages: {job_related_count}")
-    st.write(f"Not Job Interview Related Messages: {not_job_related_count}")
 elif user_input:
     # Analyze user-input text
     result = analyze_text(user_input)

 import pandas as pd
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 # Load the pre-trained model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-imdb")
+model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-imdb")
+def analyze_text(text):
     # Preprocess the text
     text = text.lower()
     # Encode the text
     # Classify the text
     with torch.no_grad():
         output = model(**encoded_text)
+        predictions = output.logits.argmax(-1).item()
+    if predictions == 1:  # For IMDb sentiment analysis, 1 indicates positive sentiment
+        return "Job Related"
+    else:
+        return "Not Job Related"
 def count_job_related_messages(data):
     job_related_count = 0
     for message in data["message"]:
         result = analyze_text(message)
+        if result == "Job Related":
             job_related_count += 1
         else:
             not_job_related_count += 1
     return job_related_count, not_job_related_count
 # Streamlit application
+st.title("Job Related Message Analyzer")
 uploaded_file = st.file_uploader("Upload CSV file")
 user_input = st.text_input("Enter text")
         result = analyze_text(message)
         results.append(result)
+    data["Job Related"] = results
     # Count job-related messages
     job_related_count, not_job_related_count = count_job_related_messages(data)
     st.dataframe(data)
+    st.write(f"Job Related Messages: {job_related_count}")
+    st.write(f"Not Job Related Messages: {not_job_related_count}")
 elif user_input:
     # Analyze user-input text
     result = analyze_text(user_input)