ssocean commited on
Commit
fa3936e
·
verified ·
1 Parent(s): bb69ff4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -7
app.py CHANGED
@@ -15,8 +15,8 @@ tokenizer = None
15
 
16
  @spaces.GPU(duration=60, enable_queue=True)
17
  def predict(title, abstract):
18
- title = title.replace("\n", " ").strip()
19
- abstract = abstract.replace("\n", " ").strip()
20
  global model, tokenizer
21
  if model is None:
22
  model = AutoModelForSequenceClassification.from_pretrained(
@@ -54,6 +54,8 @@ examples = [
54
  ]
55
 
56
  def validate_input(title, abstract):
 
 
57
  non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
58
  if len(title.strip().split(' '))<3:
59
  return False, "The title must be at least 3 words long."
@@ -61,11 +63,15 @@ def validate_input(title, abstract):
61
  return False, "The abstract must be at least 50 words long."
62
  if len((title + abstract).split(' '))>1024:
63
  return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
64
- if non_latin_pattern.search(title):
65
- return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
66
- if non_latin_pattern.search(abstract):
67
- return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
68
-
 
 
 
 
69
  return True, "Inputs are valid! Good to go!"
70
 
71
  def update_button_status(title, abstract):
 
15
 
16
  @spaces.GPU(duration=60, enable_queue=True)
17
  def predict(title, abstract):
18
+ title = title.replace("\n", " ").strip().replace('’',"'")
19
+ abstract = abstract.replace("\n", " ").strip().replace('’',"'")
20
  global model, tokenizer
21
  if model is None:
22
  model = AutoModelForSequenceClassification.from_pretrained(
 
54
  ]
55
 
56
  def validate_input(title, abstract):
57
+ title = title.replace("\n", " ").strip().replace('’',"'")
58
+ abstract = abstract.replace("\n", " ").strip().replace('’',"'")
59
  non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
60
  if len(title.strip().split(' '))<3:
61
  return False, "The title must be at least 3 words long."
 
63
  return False, "The abstract must be at least 50 words long."
64
  if len((title + abstract).split(' '))>1024:
65
  return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
66
+ # if non_latin_pattern.search(title):
67
+ # return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
68
+ # if non_latin_pattern.search(abstract):
69
+ # return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."
70
+ if non_latin_in_title:
71
+ return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
72
+ if non_latin_in_abstract:
73
+ return False, f"The abstract contains invalid characters: {', '.join(non_latin_in_abstract)}. Only English letters and special symbols are allowed."
74
+
75
  return True, "Inputs are valid! Good to go!"
76
 
77
  def update_button_status(title, abstract):