Spaces:

valurank
/

News_Articles_Categorization

Running

abdulmatinomotoso commited on Sep 18, 2023

Commit

4ee9c74

1 Parent(s): 9d55375

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,16 +14,17 @@ model_name = "valurank/finetuned-distilbert-news-article-categorization"
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 #Reading in the text file
 def read_in_text(url):
   with open(url, 'r') as file:
     article = file.read()
     return article
-def clean_text(url):
-  text = url
-  text = text.encode("ascii", errors="ignore").decode(
           "ascii"
     )  # remove non-ascii, Chinese characters
@@ -41,8 +42,8 @@ def clean_text(url):
   return text
 #Defining a function to get the category of the news article
-def get_category(file):
-  text = clean_text(file)
   input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
   logits = model(input_tensor).logits

 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+"""
 #Reading in the text file
 def read_in_text(url):
   with open(url, 'r') as file:
     article = file.read()
     return article
+"""
+def clean_text(raw_text):
+  text = raw_text.encode("ascii", errors="ignore").decode(
           "ascii"
     )  # remove non-ascii, Chinese characters
   return text
 #Defining a function to get the category of the news article
+def get_category(text):
+  text = clean_text(text)
   input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
   logits = model(input_tensor).logits