|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
All phonemes in the IPA standard are supported. |
|
|
|
zero-width characters are generally not supported, as |
|
well as some other modifiers. Tone, stress and |
|
lengthening are represented with placeholder dimensions, |
|
however they need to be set manually, this conversion |
|
from phonemes to features works on a character by |
|
character basis. In a few cases, the place of |
|
articulation is approximated because only one phoneme |
|
had such a combination, which does not warrant a new |
|
dimension. |
|
""" |
|
|
|
|
|
def generate_feature_lookup(): |
|
return { |
|
'~': {'symbol_type': 'silence'}, |
|
'#': {'symbol_type': 'end of sentence'}, |
|
'?': {'symbol_type': 'questionmark'}, |
|
'!': {'symbol_type': 'exclamationmark'}, |
|
'.': {'symbol_type': 'fullstop'}, |
|
' ': {'symbol_type': 'word-boundary'}, |
|
'ɜ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'open-mid', |
|
'vowel_roundedness': 'unrounded', |
|
}, |
|
'ə': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'mid', |
|
'vowel_roundedness': 'unrounded', |
|
}, |
|
'a': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'open', |
|
'vowel_roundedness': 'unrounded', |
|
}, |
|
'ð': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'dental', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɛ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'open-mid', |
|
'vowel_roundedness': 'unrounded', |
|
}, |
|
'ɪ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front_central', |
|
'vowel_openness' : 'close_close-mid', |
|
'vowel_roundedness': 'unrounded', |
|
}, |
|
'ŋ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'nasal' |
|
}, |
|
'ɔ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'open-mid', |
|
'vowel_roundedness': 'rounded', |
|
}, |
|
'ɒ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'open', |
|
'vowel_roundedness': 'rounded', |
|
}, |
|
'ɾ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'flap' |
|
}, |
|
'ʃ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'postalveolar', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'θ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'dental', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʊ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central_back', |
|
'vowel_openness' : 'close_close-mid', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'ʌ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'open-mid', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'ʒ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'postalveolar', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'æ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'open-mid_open', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'b': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'ʔ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'glottal', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'd': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'e': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'close-mid', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'f': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'labiodental', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɡ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'h': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'glottal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'i': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'j': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'approximant' |
|
}, |
|
'k': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'l': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'lateral-approximant' |
|
}, |
|
'm': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'nasal' |
|
}, |
|
'n': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'nasal' |
|
}, |
|
'ɳ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'nasal' |
|
}, |
|
'o': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'close-mid', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'p': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'ɹ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'approximant' |
|
}, |
|
'r': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'trill' |
|
}, |
|
's': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'fricative' |
|
}, |
|
't': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'u': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'rounded', |
|
}, |
|
'v': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'labiodental', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'w': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'labial-velar', |
|
'consonant_manner': 'approximant' |
|
}, |
|
'x': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'z': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʀ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'uvular', |
|
'consonant_manner': 'trill' |
|
}, |
|
'ø': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'close-mid', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'ç': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɐ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'open', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'œ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'open-mid', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'y': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'ʏ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front_central', |
|
'vowel_openness' : 'close_close-mid', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'ɑ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'open', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'c': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'ɲ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'nasal' |
|
}, |
|
'ɣ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʎ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'lateral-approximant' |
|
}, |
|
'β': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʝ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɟ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'q': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'uvular', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'ɕ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'alveolopalatal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɭ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'lateral-approximant' |
|
}, |
|
'ɵ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'close-mid', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'ʑ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'alveolopalatal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʋ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'labiodental', |
|
'consonant_manner': 'approximant' |
|
}, |
|
'ʁ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'uvular', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɨ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'ʂ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɓ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'implosive' |
|
}, |
|
'ʙ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'vibrant' |
|
}, |
|
'ɗ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'dental', |
|
'consonant_manner': 'implosive' |
|
}, |
|
'ɖ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'χ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'uvular', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʛ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'uvular', |
|
'consonant_manner': 'implosive' |
|
}, |
|
'ʟ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'lateral-approximant' |
|
}, |
|
'ɽ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'flap' |
|
}, |
|
'ɢ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'uvular', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'ɠ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'implosive' |
|
}, |
|
'ǂ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'alveolopalatal', |
|
'consonant_manner': 'click' |
|
}, |
|
'ɦ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'glottal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ǁ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'alveolar', |
|
'consonant_manner': 'click' |
|
}, |
|
'ĩ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'unrounded', |
|
'consonant_manner' : 'nasal' |
|
}, |
|
'ʍ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'labial-velar', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʕ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'pharyngal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɻ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'approximant' |
|
}, |
|
'ʄ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'palatal', |
|
'consonant_manner': 'implosive' |
|
}, |
|
'ũ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'rounded', |
|
'consonant_manner' : 'nasal' |
|
}, |
|
'ɤ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'close-mid', |
|
'vowel_roundedness': 'unrounded', |
|
}, |
|
'ɶ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'front', |
|
'vowel_openness' : 'open', |
|
'vowel_roundedness': 'rounded', |
|
}, |
|
'õ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'close-mid', |
|
'vowel_roundedness': 'rounded', |
|
'consonant_manner' : 'nasal' |
|
}, |
|
'ʡ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'epiglottal', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'ʈ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'plosive' |
|
}, |
|
'ʜ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'epiglottal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɱ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'labiodental', |
|
'consonant_manner': 'nasal' |
|
}, |
|
'ɯ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'back', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'ǀ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'dental', |
|
'consonant_manner': 'click' |
|
}, |
|
'ɸ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ʘ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'bilabial', |
|
'consonant_manner': 'click' |
|
}, |
|
'ʐ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'retroflex', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɰ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'velar', |
|
'consonant_manner': 'approximant' |
|
}, |
|
'ɘ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'close-mid', |
|
'vowel_roundedness': 'unrounded' |
|
}, |
|
'ħ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'pharyngal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ɞ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'open-mid', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'ʉ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'vowel', |
|
'VUV' : 'voiced', |
|
'vowel_frontness' : 'central', |
|
'vowel_openness' : 'close', |
|
'vowel_roundedness': 'rounded' |
|
}, |
|
'ɴ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'uvular', |
|
'consonant_manner': 'nasal' |
|
}, |
|
'ʢ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'epiglottal', |
|
'consonant_manner': 'fricative' |
|
}, |
|
'ѵ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'voiced', |
|
'consonant_place' : 'labiodental', |
|
'consonant_manner': 'flap' |
|
}, |
|
'ǃ': { |
|
'symbol_type' : 'phoneme', |
|
'vowel_consonant' : 'consonant', |
|
'VUV' : 'unvoiced', |
|
'consonant_place' : 'postalveolar', |
|
'consonant_manner': 'click' |
|
}, |
|
|
|
} |
|
|
|
|
|
def get_phone_to_id(): |
|
""" |
|
for the states of the ctc loss and dijkstra/mas in the aligner |
|
cannot be extracted trivially from above because sets are unordered and the IDs need to be consistent |
|
""" |
|
phone_to_id = dict() |
|
for index, phone in enumerate("~#?!ǃ.ɜəaðɛɪŋɔɒɾʃθʊʌʒæbʔdefghijklmnɳopɡɹrstuvwxzʀøçɐœyʏɑcɲɣʎβʝɟqɕɭɵʑʋʁɨʂɓʙɗɖχʛʟɽɢɠǂɦǁĩʍʕɻʄũɤɶõʡʈʜɱɯǀɸʘʐɰɘħɞʉɴʢѵ"): |
|
phone_to_id[phone] = index |
|
return phone_to_id |
|
|
|
|
|
def get_feature_to_index_lookup(): |
|
return { |
|
|
|
|
|
"stressed" : 0, |
|
|
|
"very-high-tone" : 1, |
|
"high-tone" : 2, |
|
"mid-tone" : 3, |
|
"low-tone" : 4, |
|
"very-low-tone" : 5, |
|
"rising-tone" : 6, |
|
"falling-tone" : 7, |
|
"peaking-tone" : 8, |
|
"dipping-tone" : 9, |
|
|
|
"lengthened" : 10, |
|
"half-length" : 11, |
|
"shortened" : 12, |
|
|
|
|
|
"consonant" : 13, |
|
"vowel" : 14, |
|
"phoneme" : 15, |
|
|
|
|
|
"silence" : 16, |
|
"end of sentence" : 17, |
|
"questionmark" : 18, |
|
"exclamationmark" : 19, |
|
"fullstop" : 20, |
|
"word-boundary" : 21, |
|
|
|
|
|
"dental" : 22, |
|
"postalveolar" : 23, |
|
"velar" : 24, |
|
"palatal" : 25, |
|
"glottal" : 26, |
|
"uvular" : 27, |
|
"labiodental" : 28, |
|
"labial-velar" : 29, |
|
"alveolar" : 30, |
|
"bilabial" : 31, |
|
"alveolopalatal" : 32, |
|
"retroflex" : 33, |
|
"pharyngal" : 34, |
|
"epiglottal" : 35, |
|
|
|
|
|
"central" : 36, |
|
"back" : 37, |
|
"front_central" : 38, |
|
"front" : 39, |
|
"central_back" : 40, |
|
|
|
|
|
"mid" : 41, |
|
"close-mid" : 42, |
|
"close" : 43, |
|
"open-mid" : 44, |
|
"close_close-mid" : 45, |
|
"open-mid_open" : 46, |
|
"open" : 47, |
|
|
|
|
|
"rounded" : 48, |
|
"unrounded" : 49, |
|
|
|
|
|
"plosive" : 50, |
|
"nasal" : 51, |
|
"approximant" : 52, |
|
"trill" : 53, |
|
"flap" : 54, |
|
"fricative" : 55, |
|
"lateral-approximant": 56, |
|
"implosive" : 57, |
|
"vibrant" : 58, |
|
"click" : 59, |
|
|
|
|
|
"unvoiced" : 60, |
|
"voiced" : 61, |
|
} |
|
|
|
|
|
def generate_feature_table(): |
|
ipa_to_phonemefeats = generate_feature_lookup() |
|
|
|
feat_types = set() |
|
for ipa in ipa_to_phonemefeats: |
|
if len(ipa) == 1: |
|
[feat_types.add(feat) for feat in ipa_to_phonemefeats[ipa].keys()] |
|
|
|
feat_to_val_set = dict() |
|
for feat in feat_types: |
|
feat_to_val_set[feat] = set() |
|
for ipa in ipa_to_phonemefeats: |
|
if len(ipa) == 1: |
|
for feat in ipa_to_phonemefeats[ipa]: |
|
feat_to_val_set[feat].add(ipa_to_phonemefeats[ipa][feat]) |
|
|
|
|
|
|
|
value_list = set() |
|
for val_set in [feat_to_val_set[feat] for feat in feat_to_val_set]: |
|
for value in val_set: |
|
value_list.add(value) |
|
|
|
|
|
|
|
|
|
|
|
value_to_index = get_feature_to_index_lookup() |
|
|
|
phone_to_vector = dict() |
|
for ipa in ipa_to_phonemefeats: |
|
if len(ipa) == 1: |
|
phone_to_vector[ipa] = [0] * (13 + sum([len(values) for values in [feat_to_val_set[feat] for feat in feat_to_val_set]])) |
|
|
|
for feat in ipa_to_phonemefeats[ipa]: |
|
if ipa_to_phonemefeats[ipa][feat] in value_to_index: |
|
phone_to_vector[ipa][value_to_index[ipa_to_phonemefeats[ipa][feat]]] = 1 |
|
if phone_to_vector[ipa][value_to_index["phoneme"]] != 1: |
|
|
|
phone_to_vector[ipa][value_to_index["silence"]] = 1 |
|
|
|
for feat in feat_to_val_set: |
|
for value in feat_to_val_set[feat]: |
|
if value not in value_to_index: |
|
print(f"Unknown feature value in featureset! {value}") |
|
|
|
|
|
|
|
return phone_to_vector |
|
|
|
|
|
if __name__ == '__main__': |
|
print(generate_feature_table()) |
|
|