Spaces:

ssocean
/

Newborn_Article_Impact_Predict

Running on Zero

App Files Files Community

ssocean commited on Dec 10, 2024

Commit

fa3936e

verified ·

1 Parent(s): bb69ff4

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -7

app.py CHANGED Viewed

@@ -15,8 +15,8 @@ tokenizer = None
 @spaces.GPU(duration=60, enable_queue=True)
 def predict(title, abstract):
-    title = title.replace("\n", " ").strip()
-    abstract = abstract.replace("\n", " ").strip()
     global model, tokenizer
     if model is None:
         model = AutoModelForSequenceClassification.from_pretrained(
@@ -54,6 +54,8 @@ examples = [
 ]
 def validate_input(title, abstract):
     non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
     if len(title.strip().split(' '))<3:
         return False, "The title must be at least 3 words long."
@@ -61,11 +63,15 @@ def validate_input(title, abstract):
         return False, "The abstract must be at least 50 words long."
     if len((title + abstract).split(' '))>1024:
         return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
-    if non_latin_pattern.search(title):
-        return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
-    if non_latin_pattern.search(abstract):
-        return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
     return True, "Inputs are valid! Good to go!"
 def update_button_status(title, abstract):

 @spaces.GPU(duration=60, enable_queue=True)
 def predict(title, abstract):
+    title = title.replace("\n", " ").strip().replace('’',"'")
+    abstract = abstract.replace("\n", " ").strip().replace('’',"'")
     global model, tokenizer
     if model is None:
         model = AutoModelForSequenceClassification.from_pretrained(
 ]
 def validate_input(title, abstract):
+    title = title.replace("\n", " ").strip().replace('’',"'")
+    abstract = abstract.replace("\n", " ").strip().replace('’',"'")
     non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
     if len(title.strip().split(' '))<3:
         return False, "The title must be at least 3 words long."
         return False, "The abstract must be at least 50 words long."
     if len((title + abstract).split(' '))>1024:
         return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
+    # if non_latin_pattern.search(title):
+    #     return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
+    # if non_latin_pattern.search(abstract):
+    #     return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
+    if non_latin_in_title:
+        return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
+    if non_latin_in_abstract:
+        return False, f"The abstract contains invalid characters: {', '.join(non_latin_in_abstract)}. Only English letters and special symbols are allowed."
     return True, "Inputs are valid! Good to go!"
 def update_button_status(title, abstract):