Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -15,8 +15,8 @@ tokenizer = None
|
|
15 |
|
16 |
@spaces.GPU(duration=60, enable_queue=True)
|
17 |
def predict(title, abstract):
|
18 |
-
title = title.replace("\n", " ").strip()
|
19 |
-
abstract = abstract.replace("\n", " ").strip()
|
20 |
global model, tokenizer
|
21 |
if model is None:
|
22 |
model = AutoModelForSequenceClassification.from_pretrained(
|
@@ -54,6 +54,8 @@ examples = [
|
|
54 |
]
|
55 |
|
56 |
def validate_input(title, abstract):
|
|
|
|
|
57 |
non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
|
58 |
if len(title.strip().split(' '))<3:
|
59 |
return False, "The title must be at least 3 words long."
|
@@ -61,11 +63,15 @@ def validate_input(title, abstract):
|
|
61 |
return False, "The abstract must be at least 50 words long."
|
62 |
if len((title + abstract).split(' '))>1024:
|
63 |
return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
|
64 |
-
if non_latin_pattern.search(title):
|
65 |
-
|
66 |
-
if non_latin_pattern.search(abstract):
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
return True, "Inputs are valid! Good to go!"
|
70 |
|
71 |
def update_button_status(title, abstract):
|
|
|
15 |
|
16 |
@spaces.GPU(duration=60, enable_queue=True)
|
17 |
def predict(title, abstract):
|
18 |
+
title = title.replace("\n", " ").strip().replace('’',"'")
|
19 |
+
abstract = abstract.replace("\n", " ").strip().replace('’',"'")
|
20 |
global model, tokenizer
|
21 |
if model is None:
|
22 |
model = AutoModelForSequenceClassification.from_pretrained(
|
|
|
54 |
]
|
55 |
|
56 |
def validate_input(title, abstract):
|
57 |
+
title = title.replace("\n", " ").strip().replace('’',"'")
|
58 |
+
abstract = abstract.replace("\n", " ").strip().replace('’',"'")
|
59 |
non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
|
60 |
if len(title.strip().split(' '))<3:
|
61 |
return False, "The title must be at least 3 words long."
|
|
|
63 |
return False, "The abstract must be at least 50 words long."
|
64 |
if len((title + abstract).split(' '))>1024:
|
65 |
return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
|
66 |
+
# if non_latin_pattern.search(title):
|
67 |
+
# return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
|
68 |
+
# if non_latin_pattern.search(abstract):
|
69 |
+
# return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
|
70 |
+
if non_latin_in_title:
|
71 |
+
return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
|
72 |
+
if non_latin_in_abstract:
|
73 |
+
return False, f"The abstract contains invalid characters: {', '.join(non_latin_in_abstract)}. Only English letters and special symbols are allowed."
|
74 |
+
|
75 |
return True, "Inputs are valid! Good to go!"
|
76 |
|
77 |
def update_button_status(title, abstract):
|