File size: 1,160 Bytes
e9706fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Language dict
language_code_to_name = {
"asm": "Assamese",
"ben": "Bengali",
"guj": "Gujarati",
"hin": "Hindi",
"kan": "Kannada",
"mal": "Malayalam",
"mar": "Marathi",
"ory": "Odia",
"pan": "Punjabi",
"tam": "Tamil",
"tel": "Telugu",
"urd": "Urdu",
"eng": "English"

}
LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}

# Source langs: S2ST / S2TT / ASR don't need source lang
# T2TT / T2ST use this
text_source_language_codes = [
"asm",
"ben",
"guj",
"hin",
"kan",
"mal",
"mar",
"ory",
"pan",
"tam",
"tel",
"urd",
"eng"

]
TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])

# Target langs:
# S2ST / T2ST
s2st_target_language_codes = [
"asm",
"ben",
"guj",
"hin",
"kan",
"mal",
"mar",
"ory",
"pan",
"tam",
"tel",
"urd",
"eng"

]
S2ST_TARGET_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in s2st_target_language_codes])
T2ST_TARGET_LANGUAGE_NAMES = S2ST_TARGET_LANGUAGE_NAMES

# S2TT / T2TT / ASR
S2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
T2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
ASR_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES