Spaces:
Paused
Paused
Update src/ASL_gloss_functions.py
Browse files- src/ASL_gloss_functions.py +13 -8
src/ASL_gloss_functions.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
# Define a list of question adverbs
|
| 2 |
opened_question_adverbs = ["how", "when", "where", "why", "how much", "how many", "how often", "how long", "what", "which", "who", "whose", "whom"]
|
| 3 |
|
| 4 |
-
|
| 5 |
-
time_words = ["yesterday", "today", "tomorrow"]
|
| 6 |
|
| 7 |
# ASL glossing rules implemented in functions
|
| 8 |
def gloss_word(word):
|
|
@@ -36,14 +36,13 @@ def add_time_indicator(gloss_sentence_):
|
|
| 36 |
return f"{word.text.upper()} {gloss_sentence_.replace(word.text.upper(), '').strip()}"
|
| 37 |
return gloss_sentence_
|
| 38 |
|
| 39 |
-
|
| 40 |
def skip_stop_words(word):
|
| 41 |
if word.lower() == 'the' or word.lower() == 'a':
|
| 42 |
return ''
|
| 43 |
else:
|
| 44 |
return word
|
| 45 |
|
| 46 |
-
## doc est une liste de tokens
|
| 47 |
def question_type(doc):
|
| 48 |
try:
|
| 49 |
if doc[-1].text == '?':
|
|
@@ -52,11 +51,9 @@ def question_type(doc):
|
|
| 52 |
else:
|
| 53 |
return "yes-no-question"
|
| 54 |
return None
|
| 55 |
-
|
| 56 |
except IndexError:
|
| 57 |
return None
|
| 58 |
|
| 59 |
-
# 수정된 process_sentence 함수
|
| 60 |
def process_sentence(doc):
|
| 61 |
nms = {
|
| 62 |
"wh-question": "wh-q",
|
|
@@ -66,12 +63,19 @@ def process_sentence(doc):
|
|
| 66 |
"car": "CL:3",
|
| 67 |
"person": "CL:1"
|
| 68 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
glossed_sentence = []
|
| 71 |
for token in doc:
|
| 72 |
word = token.lemma_.lower()
|
| 73 |
|
| 74 |
-
# 고유명사 처리 추가
|
| 75 |
if token.pos_ == "PROPN":
|
| 76 |
glossed_word = token.text.upper()
|
| 77 |
elif word in ["i", "me"]:
|
|
@@ -80,13 +84,14 @@ def process_sentence(doc):
|
|
| 80 |
glossed_word = handle_indexing("YOU", 2)
|
| 81 |
elif word in classifiers:
|
| 82 |
glossed_word = classifiers[word]
|
|
|
|
|
|
|
| 83 |
else:
|
| 84 |
glossed_word = gloss_word(word)
|
| 85 |
|
| 86 |
glossed_word = skip_stop_words(glossed_word)
|
| 87 |
glossed_sentence.append(glossed_word)
|
| 88 |
|
| 89 |
-
# Move time words to beginning
|
| 90 |
for gloss in glossed_sentence:
|
| 91 |
if gloss.lower() in time_words:
|
| 92 |
glossed_sentence.insert(0, glossed_sentence.pop(glossed_sentence.index(gloss)))
|
|
|
|
| 1 |
# Define a list of question adverbs
|
| 2 |
opened_question_adverbs = ["how", "when", "where", "why", "how much", "how many", "how often", "how long", "what", "which", "who", "whose", "whom"]
|
| 3 |
|
| 4 |
+
# time adverbs to be moved at the beginning of ASL Gloss sentences
|
| 5 |
+
time_words = ["yesterday", "today", "tomorrow", "now", "before", "after", "morning", "afternoon", "evening", "night"]
|
| 6 |
|
| 7 |
# ASL glossing rules implemented in functions
|
| 8 |
def gloss_word(word):
|
|
|
|
| 36 |
return f"{word.text.upper()} {gloss_sentence_.replace(word.text.upper(), '').strip()}"
|
| 37 |
return gloss_sentence_
|
| 38 |
|
| 39 |
+
# skip stop_words
|
| 40 |
def skip_stop_words(word):
|
| 41 |
if word.lower() == 'the' or word.lower() == 'a':
|
| 42 |
return ''
|
| 43 |
else:
|
| 44 |
return word
|
| 45 |
|
|
|
|
| 46 |
def question_type(doc):
|
| 47 |
try:
|
| 48 |
if doc[-1].text == '?':
|
|
|
|
| 51 |
else:
|
| 52 |
return "yes-no-question"
|
| 53 |
return None
|
|
|
|
| 54 |
except IndexError:
|
| 55 |
return None
|
| 56 |
|
|
|
|
| 57 |
def process_sentence(doc):
|
| 58 |
nms = {
|
| 59 |
"wh-question": "wh-q",
|
|
|
|
| 63 |
"car": "CL:3",
|
| 64 |
"person": "CL:1"
|
| 65 |
}
|
| 66 |
+
basic_verbs = {
|
| 67 |
+
"is": "BE",
|
| 68 |
+
"am": "BE",
|
| 69 |
+
"are": "BE",
|
| 70 |
+
"was": "BE",
|
| 71 |
+
"were": "BE",
|
| 72 |
+
"be": "BE"
|
| 73 |
+
}
|
| 74 |
|
| 75 |
glossed_sentence = []
|
| 76 |
for token in doc:
|
| 77 |
word = token.lemma_.lower()
|
| 78 |
|
|
|
|
| 79 |
if token.pos_ == "PROPN":
|
| 80 |
glossed_word = token.text.upper()
|
| 81 |
elif word in ["i", "me"]:
|
|
|
|
| 84 |
glossed_word = handle_indexing("YOU", 2)
|
| 85 |
elif word in classifiers:
|
| 86 |
glossed_word = classifiers[word]
|
| 87 |
+
elif word in basic_verbs:
|
| 88 |
+
glossed_word = basic_verbs[word]
|
| 89 |
else:
|
| 90 |
glossed_word = gloss_word(word)
|
| 91 |
|
| 92 |
glossed_word = skip_stop_words(glossed_word)
|
| 93 |
glossed_sentence.append(glossed_word)
|
| 94 |
|
|
|
|
| 95 |
for gloss in glossed_sentence:
|
| 96 |
if gloss.lower() in time_words:
|
| 97 |
glossed_sentence.insert(0, glossed_sentence.pop(glossed_sentence.index(gloss)))
|