Dionyssos commited on
Commit
c63ff96
·
1 Parent(s): 697d069

del Expressio

Browse files
Files changed (2) hide show
  1. app.py +79 -988
  2. textual.py +536 -0
app.py CHANGED
@@ -1,25 +1,16 @@
1
  # -*- coding: utf-8 -*-
2
  import typing
3
- import types # fusion of forward() of Wav2Vec2
4
  import gradio as gr
5
- import matplotlib.pyplot as plt
6
  import numpy as np
7
  import os
8
  import torch
9
  import torch.nn as nn
10
- from transformers import Wav2Vec2Processor
11
- from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2Model
12
- from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedModel
13
  import audiofile
14
  from tts import StyleTTS2
 
15
  import audresample
16
- import json
17
- import re
18
- import unicodedata
19
  import textwrap
20
  import nltk
21
- from num2words import num2words
22
- from num2word_greek.numbers2words import convert_numbers
23
  from audionar import VitsModel, VitsTokenizer
24
  from audiocraft import AudioGen
25
 
@@ -31,411 +22,9 @@ nltk.download('punkt', download_dir='./')
31
  nltk.download('punkt_tab', download_dir='./')
32
  nltk.data.path.append('.')
33
 
34
- device = 'cpu'
35
 
36
 
37
- def fix_vocals(text, lang='ron'):
38
 
39
- # Longer phrases should come before shorter ones to prevent partial matches.
40
-
41
- ron_replacements = {
42
- 'ţ': 'ț',
43
- 'ț': 'ts',
44
- 'î': 'u',
45
- 'â': 'a',
46
- 'ş': 's',
47
- 'w': 'oui',
48
- 'k': 'c',
49
- 'l': 'll',
50
- # Math symbols
51
- 'sqrt': ' rădăcina pătrată din ',
52
- '^': ' la puterea ',
53
- '+': ' plus ',
54
- ' - ': ' minus ', # only replace if standalone so to not say minus if is a-b-c
55
- '*': ' ori ', # times
56
- '/': ' împărțit la ', # divided by
57
- '=': ' egal cu ', # equals
58
- 'pi': ' pi ',
59
- '<': ' mai mic decât ',
60
- '>': ' mai mare decât',
61
- '%': ' la sută ', # percent (from previous)
62
- '(': ' paranteză deschisă ',
63
- ')': ' paranteză închisă ',
64
- '[': ' paranteză pătrată deschisă ',
65
- ']': ' paranteză pătrată închisă ',
66
- '{': ' acoladă deschisă ',
67
- '}': ' acoladă închisă ',
68
- '≠': ' nu este egal cu ',
69
- '≤': ' mai mic sau egal cu ',
70
- '≥': ' mai mare sau egal cu ',
71
- '≈': ' aproximativ ',
72
- '∞': ' infinit ',
73
- '€': ' euro ',
74
- '$': ' dolar ',
75
- '£': ' liră ',
76
- '&': ' și ', # and
77
- '@': ' la ', # at
78
- '#': ' diez ', # hash
79
- '∑': ' sumă ',
80
- '∫': ' integrală ',
81
- '√': ' rădăcina pătrată a ', # more generic square root
82
- }
83
-
84
- eng_replacements = {
85
- 'wik': 'weaky',
86
- 'sh': 'ss',
87
- 'ch': 'ttss',
88
- 'oo': 'oeo',
89
- # Math symbols for English
90
- 'sqrt': ' square root of ',
91
- '^': ' to the power of ',
92
- '+': ' plus ',
93
- ' - ': ' minus ',
94
- '*': ' times ',
95
- ' / ': ' divided by ',
96
- '=': ' equals ',
97
- 'pi': ' pi ',
98
- '<': ' less than ',
99
- '>': ' greater than ',
100
- # Additional common math symbols from previous list
101
- '%': ' percent ',
102
- '(': ' open parenthesis ',
103
- ')': ' close parenthesis ',
104
- '[': ' open bracket ',
105
- ']': ' close bracket ',
106
- '{': ' open curly brace ',
107
- '}': ' close curly brace ',
108
- '∑': ' sum ',
109
- '∫': ' integral ',
110
- '√': ' square root of ',
111
- '≠': ' not equals ',
112
- '≤': ' less than or equals ',
113
- '≥': ' greater than or equals ',
114
- '≈': ' approximately ',
115
- '∞': ' infinity ',
116
- '€': ' euro ',
117
- '$': ' dollar ',
118
- '£': ' pound ',
119
- '&': ' and ',
120
- '@': ' at ',
121
- '#': ' hash ',
122
- }
123
-
124
- serbian_replacements = {
125
- 'rn': 'rrn',
126
- 'ć': 'č',
127
- 'c': 'č',
128
- 'đ': 'd',
129
- 'j': 'i',
130
- 'l': 'lll',
131
- 'w': 'v',
132
- # https://huggingface.co/facebook/mms-tts-rmc-script_latin
133
- 'sqrt': 'kvadratni koren iz',
134
- '^': ' na stepen ',
135
- '+': ' plus ',
136
- ' - ': ' minus ',
137
- '*': ' puta ',
138
- ' / ': ' podeljeno sa ',
139
- '=': ' jednako ',
140
- 'pi': ' pi ',
141
- '<': ' manje od ',
142
- '>': ' veće od ',
143
- '%': ' procenat ',
144
- '(': ' otvorena zagrada ',
145
- ')': ' zatvorena zagrada ',
146
- '[': ' otvorena uglasta zagrada ',
147
- ']': ' zatvorena uglasta zagrada ',
148
- '{': ' otvorena vitičasta zagrada ',
149
- '}': ' zatvorena vitičasta zagrada ',
150
- '∑': ' suma ',
151
- '∫': ' integral ',
152
- '√': ' kvadratni koren ',
153
- '≠': ' nije jednako ',
154
- '≤': ' manje ili jednako od ',
155
- '≥': ' veće ili jednako od ',
156
- '≈': ' približno ',
157
- '∞': ' beskonačnost ',
158
- '€': ' evro ',
159
- '$': ' dolar ',
160
- '£': ' funta ',
161
- '&': ' i ',
162
- '@': ' et ',
163
- '#': ' taraba ',
164
- # Others
165
- # 'rn': 'rrn',
166
- # 'ć': 'č',
167
- # 'c': 'č',
168
- # 'đ': 'd',
169
- # 'l': 'le',
170
- # 'ij': 'i',
171
- # 'ji': 'i',
172
- # 'j': 'i',
173
- # 'služ': 'sloooozz', # 'službeno'
174
- # 'suver': 'siuveeerra', # 'suverena'
175
- # 'država': 'dirrezav', # 'država'
176
- # 'iči': 'ici', # 'Graniči'
177
- # 's ': 'se', # a s with space
178
- # 'q': 'ku',
179
- # 'w': 'aou',
180
- # 'z': 's',
181
- # "š": "s",
182
- # 'th': 'ta',
183
- # 'v': 'vv',
184
- # "ć": "č",
185
- # "đ": "ď",
186
- # "lj": "ľ",
187
- # "nj": "ň",
188
- # "ž": "z",
189
- # "c": "č"
190
- }
191
-
192
- deu_replacements = {
193
- 'sch': 'sh',
194
- 'ch': 'kh',
195
- 'ie': 'ee',
196
- 'ei': 'ai',
197
- 'ä': 'ae',
198
- 'ö': 'oe',
199
- 'ü': 'ue',
200
- 'ß': 'ss',
201
- # Math symbols for German
202
- 'sqrt': ' Quadratwurzel aus ',
203
- '^': ' hoch ',
204
- '+': ' plus ',
205
- ' - ': ' minus ',
206
- '*': ' mal ',
207
- ' / ': ' geteilt durch ',
208
- '=': ' gleich ',
209
- 'pi': ' pi ',
210
- '<': ' kleiner als ',
211
- '>': ' größer als',
212
- # Additional common math symbols from previous list
213
- '%': ' prozent ',
214
- '(': ' Klammer auf ',
215
- ')': ' Klammer zu ',
216
- '[': ' eckige Klammer auf ',
217
- ']': ' eckige Klammer zu ',
218
- '{': ' geschweifte Klammer auf ',
219
- '}': ' geschweifte Klammer zu ',
220
- '∑': ' Summe ',
221
- '∫': ' Integral ',
222
- '√': ' Quadratwurzel ',
223
- '≠': ' ungleich ',
224
- '≤': ' kleiner oder gleich ',
225
- '≥': ' größer oder gleich ',
226
- '≈': ' ungefähr ',
227
- '∞': ' unendlich ',
228
- '€': ' euro ',
229
- '$': ' dollar ',
230
- '£': ' pfund ',
231
- '&': ' und ',
232
- '@': ' at ', # 'Klammeraffe' is also common but 'at' is simpler
233
- '#': ' raute ',
234
- }
235
-
236
- fra_replacements = {
237
- # French specific phonetic replacements (add as needed)
238
- # e.g., 'ç': 's', 'é': 'e', etc.
239
- 'w': 'v',
240
- # Math symbols for French
241
- 'sqrt': ' racine carrée de ',
242
- '^': ' à la puissance ',
243
- '+': ' plus ',
244
- ' - ': ' moins ', # tiré ;
245
- '*': ' fois ',
246
- ' / ': ' divisé par ',
247
- '=': ' égale ',
248
- 'pi': ' pi ',
249
- '<': ' inférieur à ',
250
- '>': ' supérieur à ',
251
- # Add more common math symbols as needed for French
252
- '%': ' pour cent ',
253
- '(': ' parenthèse ouverte ',
254
- ')': ' parenthèse fermée ',
255
- '[': ' crochet ouvert ',
256
- ']': ' crochet fermé ',
257
- '{': ' accolade ouverte ',
258
- '}': ' accolade fermée ',
259
- '∑': ' somme ',
260
- '∫': ' intégrale ',
261
- '√': ' racine carrée ',
262
- '≠': ' n\'égale pas ',
263
- '≤': ' inférieur ou égal à ',
264
- '≥': ' supérieur ou égal à ',
265
- '≈': ' approximativement ',
266
- '∞': ' infini ',
267
- '€': ' euro ',
268
- '$': ' dollar ',
269
- '£': ' livre ',
270
- '&': ' et ',
271
- '@': ' arobase ',
272
- '#': ' dièse ',
273
- }
274
-
275
- hun_replacements = {
276
- # Hungarian specific phonetic replacements (add as needed)
277
- # e.g., 'á': 'a', 'é': 'e', etc.
278
- 'ch': 'ts',
279
- 'cs': 'tz',
280
- 'g': 'gk',
281
- 'w': 'v',
282
- 'z': 'zz',
283
- # Math symbols for Hungarian
284
- 'sqrt': ' négyzetgyök ',
285
- '^': ' hatvány ',
286
- '+': ' plusz ',
287
- ' - ': ' mínusz ',
288
- '*': ' szorozva ',
289
- ' / ': ' osztva ',
290
- '=': ' egyenlő ',
291
- 'pi': ' pi ',
292
- '<': ' kisebb mint ',
293
- '>': ' nagyobb mint ',
294
- # Add more common math symbols as needed for Hungarian
295
- '%': ' százalék ',
296
- '(': ' nyitó zárójel ',
297
- ')': ' záró zárójel ',
298
- '[': ' nyitó szögletes zárójel ',
299
- ']': ' záró szögletes zárójel ',
300
- '{': ' nyitó kapcsos zárójel ',
301
- '}': ' záró kapcsos zárójel ',
302
- '∑': ' szumma ',
303
- '∫': ' integrál ',
304
- '√': ' négyzetgyök ',
305
- '≠': ' nem egyenlő ',
306
- '≤': ' kisebb vagy egyenlő ',
307
- '≥': ' nagyobb vagy egyenlő ',
308
- '≈': ' körülbelül ',
309
- '∞': ' végtelen ',
310
- '€': ' euró ',
311
- '$': ' dollár ',
312
- '£': ' font ',
313
- '&': ' és ',
314
- '@': ' kukac ',
315
- '#': ' kettőskereszt ',
316
- }
317
-
318
- grc_replacements = {
319
- # Ancient Greek specific phonetic replacements (add as needed)
320
- # These are more about transliterating Greek letters if they are in the input text.
321
- # Math symbols for Ancient Greek (literal translations)
322
- 'sqrt': ' τετραγωνικὴ ῥίζα ',
323
- '^': ' εἰς τὴν δύναμιν ',
324
- '+': ' σὺν ',
325
- ' - ': ' χωρὶς ',
326
- '*': ' πολλάκις ',
327
- ' / ': ' διαιρέω ',
328
- '=': ' ἴσον ',
329
- 'pi': ' πῖ ',
330
- '<': ' ἔλαττον ',
331
- '>': ' μεῖζον ',
332
- # Add more common math symbols as needed for Ancient Greek
333
- '%': ' τοῖς ���κατόν ', # tois hekaton - 'of the hundred'
334
- '(': ' ἀνοικτὴ παρένθεσις ',
335
- ')': ' κλειστὴ παρένθεσις ',
336
- '[': ' ἀνοικτὴ ἀγκύλη ',
337
- ']': ' κλειστὴ ἀγκύλη ',
338
- '{': ' ἀνοικτὴ σγουρὴ ἀγκύλη ',
339
- '}': ' κλειστὴ σγουρὴ ἀγκύλη ',
340
- '∑': ' ἄθροισμα ',
341
- '∫': ' ὁλοκλήρωμα ',
342
- '√': ' τετραγωνικὴ ῥίζα ',
343
- '≠': ' οὐκ ἴσον ',
344
- '≤': ' ἔλαττον ἢ ἴσον ',
345
- '≥': ' μεῖζον ἢ ἴσον ',
346
- '≈': ' περίπου ',
347
- '∞': ' ἄπειρον ',
348
- '€': ' εὐρώ ',
349
- '$': ' δολάριον ',
350
- '£': ' λίρα ',
351
- '&': ' καὶ ',
352
- '@': ' ἀτ ', # at
353
- '#': ' δίεση ', # hash
354
- }
355
-
356
-
357
- # Select the appropriate replacement dictionary based on the language
358
- replacements_map = {
359
- 'grc': grc_replacements,
360
- 'ron': ron_replacements,
361
- 'eng': eng_replacements,
362
- 'deu': deu_replacements,
363
- 'fra': fra_replacements,
364
- 'hun': hun_replacements,
365
- 'rmc-script_latin': serbian_replacements,
366
- }
367
-
368
- current_replacements = replacements_map.get(lang)
369
- if current_replacements:
370
- # Sort replacements by length of the key in descending order.
371
- # This is crucial for correctly replacing multi-character strings (like 'sqrt', 'sch')
372
- # before their shorter substrings ('s', 'ch', 'q', 'r', 't').
373
- sorted_replacements = sorted(current_replacements.items(), key=lambda item: len(item[0]), reverse=True)
374
- for old, new in sorted_replacements:
375
- text = text.replace(old, new)
376
- return text
377
- else:
378
- # If the language is not supported, return the original text
379
- print(f"Warning: Language '{lang}' not supported for text replacement. Returning original text.")
380
- return text
381
-
382
-
383
- def _num2words(text='01234', lang=None):
384
- if lang == 'grc':
385
- return convert_numbers(text)
386
- return num2words(text, lang=lang) # HAS TO BE kwarg lang=lang
387
-
388
-
389
- def transliterate_number(number_string,
390
- lang=None):
391
- if lang == 'rmc-script_latin':
392
- lang = 'sr'
393
- exponential_pronoun = ' puta deset na stepen od '
394
- comma = ' tačka '
395
- elif lang == 'ron':
396
- lang = 'ro'
397
- exponential_pronoun = ' tízszer a erejéig '
398
- comma = ' virgulă '
399
- elif lang == 'hun':
400
- lang = 'hu'
401
- exponential_pronoun = ' tízszer a erejéig '
402
- comma = ' virgula '
403
- elif lang == 'deu':
404
- exponential_pronoun = ' mal zehn hoch '
405
- comma = ' komma '
406
- elif lang == 'fra':
407
- lang = 'fr'
408
- exponential_pronoun = ' puissance '
409
- comma = 'virgule'
410
- elif lang == 'grc':
411
- exponential_pronoun = ' εις την δυναμην του '
412
- comma = 'κομμα'
413
- else:
414
- lang = lang[:2]
415
- exponential_pronoun = ' times ten to the power of '
416
- comma = ' point '
417
-
418
- def replace_number(match):
419
- prefix = match.group(1) or ""
420
- number_part = match.group(2)
421
- suffix = match.group(5) or ""
422
-
423
- try:
424
- if 'e' in number_part.lower():
425
- base, exponent = number_part.lower().split('e')
426
- words = _num2words(base, lang=lang) + exponential_pronoun + _num2words(exponent, lang=lang)
427
- elif '.' in number_part:
428
- integer_part, decimal_part = number_part.split('.')
429
- words = _num2words(integer_part, lang=lang) + comma + " ".join(
430
- [_num2words(digit, lang=lang) for digit in decimal_part])
431
- else:
432
- words = _num2words(number_part, lang=lang)
433
- return prefix + words + suffix
434
- except ValueError:
435
- return match.group(0) # Return original if conversion fails
436
-
437
- pattern = r'([^\d]*)(\d+(\.\d+)?([Ee][+-]?\d+)?)([^\d]*)'
438
- return re.sub(pattern, replace_number, number_string)
439
 
440
 
441
  language_names = ['Ancient greek',
@@ -448,7 +37,7 @@ language_names = ['Ancient greek',
448
 
449
 
450
  def audionar_tts(text=None,
451
- lang='romanian',
452
  soundscape='',
453
  cache_lim=24):
454
 
@@ -464,404 +53,115 @@ def audionar_tts(text=None,
464
  'romanian': 'ron',
465
  'serbian (approx.)': 'rmc-script_latin',
466
  }
467
-
468
- if text and text.strip():
469
-
470
- if lang not in language_names:
471
-
472
- speech_audio = _styletts2(text=text, # Eng.
473
- ref_s='wav/' + lang + '.wav')
474
-
475
- else: # VITS
476
-
477
- lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
478
-
479
- global cached_lang_code, cached_net_g, cached_tokenizer
480
-
481
- if 'cached_lang_code' not in globals() or cached_lang_code != lang_code:
482
- cached_lang_code = lang_code
483
- cached_net_g = VitsModel.from_pretrained(f'facebook/mms-tts-{lang_code}').eval()
484
- cached_tokenizer = VitsTokenizer.from_pretrained(f'facebook/mms-tts-{lang_code}')
485
 
486
- net_g = cached_net_g
487
- tokenizer = cached_tokenizer
488
- text = only_greek_or_only_latin(text, lang=lang_code)
489
- text = transliterate_number(text, lang=lang_code)
490
- text = fix_vocals(text, lang=lang_code)
491
 
 
492
 
493
- sentences = textwrap.wrap(text, width=439)
494
 
495
- total_audio_parts = []
496
- for sentence in sentences:
497
- inputs = cached_tokenizer(sentence, return_tensors="pt")
498
- with torch.no_grad():
499
- audio_part = cached_net_g(
500
- input_ids=inputs.input_ids.to(device),
501
- attention_mask=inputs.attention_mask.to(device),
502
- lang_code=lang_code,
503
- )[0, :]
504
- total_audio_parts.append(audio_part)
505
 
506
- speech_audio = torch.cat(total_audio_parts).cpu().numpy()
507
 
508
- # AudioGen
509
- if soundscape and soundscape.strip():
510
 
 
511
 
512
- speech_duration_secs = len(speech_audio) / 16000 if speech_audio is not None else 0
513
- target_duration = max(speech_duration_secs + 0.74, 2.0)
514
-
515
-
516
- background_audio = audiogen.generate(
517
- soundscape,
518
- duration=target_duration,
519
- cache_lim=max(4, int(cache_lim)) # at least allow 10 A/R stEps
520
- ).numpy()
521
-
522
- if speech_audio is not None:
523
-
524
- len_speech = len(speech_audio)
525
- len_background = len(background_audio)
526
-
527
- if len_background > len_speech:
528
- padding = np.zeros(len_background - len_speech,
529
- dtype=np.float32)
530
- speech_audio = np.concatenate([speech_audio, padding])
531
- elif len_speech > len_background:
532
- padding = np.zeros(len_speech - len_background,
533
- dtype=np.float32)
534
- background_audio = np.concatenate([background_audio, padding])
535
-
536
-
537
- speech_audio_stereo = speech_audio[None, :]
538
- background_audio_stereo = background_audio[None, :]
539
-
540
 
541
- final_audio = np.concatenate([
542
- 0.49 * speech_audio_stereo + 0.51 * background_audio_stereo,
543
- 0.51 * background_audio_stereo + 0.49 * speech_audio_stereo
544
- ], 0)
545
- else:
546
- final_audio = background_audio
547
 
548
- # If no soundscape, use the speech audio as is.
549
- elif speech_audio is not None:
550
- final_audio = speech_audio
551
 
552
- # If both inputs are empty, create a 2s silent audio file.
553
- if final_audio is None:
554
- final_audio = np.zeros(16000 * 2, dtype=np.float32)
555
-
556
- wavfile = '_vits_.wav'
557
- audiofile.write(wavfile, final_audio, 16000)
558
-
559
- return wavfile, wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
560
-
561
-
562
- # -- EXPRESSIO
563
-
564
-
565
- device = 0 if torch.cuda.is_available() else "cpu"
566
- duration = 2 # limit processing of audio
567
- age_gender_model_name = "audeering/wav2vec2-large-robust-6-ft-age-gender"
568
- expression_model_name = "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim"
569
-
570
-
571
- class AgeGenderHead(nn.Module):
572
- r"""Age-gender model head."""
573
-
574
- def __init__(self, config, num_labels):
575
-
576
- super().__init__()
577
-
578
- self.dense = nn.Linear(config.hidden_size, config.hidden_size)
579
- self.dropout = nn.Dropout(config.final_dropout)
580
- self.out_proj = nn.Linear(config.hidden_size, num_labels)
581
-
582
- def forward(self, features, **kwargs):
583
-
584
- x = features
585
- x = self.dropout(x)
586
- x = self.dense(x)
587
- x = torch.tanh(x)
588
- x = self.dropout(x)
589
- x = self.out_proj(x)
590
-
591
- return x
592
-
593
-
594
- class AgeGenderModel(Wav2Vec2PreTrainedModel):
595
- r"""Age-gender recognition model."""
596
-
597
- def __init__(self, config):
598
-
599
- super().__init__(config)
600
-
601
- self.config = config
602
- self.wav2vec2 = Wav2Vec2Model(config)
603
- self.age = AgeGenderHead(config, 1)
604
- self.gender = AgeGenderHead(config, 3)
605
- self.init_weights()
606
-
607
- def forward(
608
- self,
609
- frozen_cnn7,
610
- ):
611
-
612
- hidden_states = self.wav2vec2(frozen_cnn7=frozen_cnn7) # runs only Transformer layers
613
-
614
- hidden_states = torch.mean(hidden_states, dim=1)
615
- logits_age = self.age(hidden_states)
616
- logits_gender = torch.softmax(self.gender(hidden_states), dim=1)
617
-
618
- return hidden_states, logits_age, logits_gender
619
-
620
- # AgeGenderModel.forward() is switched to accept computed frozen CNN7 features from ExpressioNmodel
621
-
622
- def _forward(
623
- self,
624
- frozen_cnn7=None, # CNN7 fetures of wav2vec2 calc. from CNN7 feature extractor (once)
625
- attention_mask=None):
626
-
627
-
628
- if attention_mask is not None:
629
- # compute reduced attention_mask corresponding to feature vectors
630
- attention_mask = self._get_feature_vector_attention_mask(
631
- frozen_cnn7.shape[1], attention_mask, add_adapter=False
632
- )
633
-
634
- hidden_states, _ = self.wav2vec2.feature_projection(frozen_cnn7)
635
-
636
- hidden_states = self.wav2vec2.encoder(
637
- hidden_states,
638
- attention_mask=attention_mask,
639
- output_attentions=None,
640
- output_hidden_states=None,
641
- return_dict=None,
642
- )[0]
643
-
644
- return hidden_states
645
-
646
-
647
- def _forward_and_cnn7(
648
- self,
649
- input_values,
650
- attention_mask=None):
651
-
652
- frozen_cnn7 = self.wav2vec2.feature_extractor(input_values)
653
- frozen_cnn7 = frozen_cnn7.transpose(1, 2)
654
-
655
- if attention_mask is not None:
656
- # compute reduced attention_mask corresponding to feature vectors
657
- attention_mask = self.wav2vec2._get_feature_vector_attention_mask(
658
- frozen_cnn7.shape[1], attention_mask, add_adapter=False
659
- )
660
-
661
- hidden_states, _ = self.wav2vec2.feature_projection(frozen_cnn7) # grad=True non frozen
662
-
663
- hidden_states = self.wav2vec2.encoder(
664
- hidden_states,
665
- attention_mask=attention_mask,
666
- output_attentions=None,
667
- output_hidden_states=None,
668
- return_dict=None,
669
- )[0]
670
-
671
- return hidden_states, frozen_cnn7 #feature_proj is trainable thus we have to access the frozen_cnn7 before projection layer
672
-
673
-
674
- class ExpressionHead(nn.Module):
675
- r"""Expression model head."""
676
-
677
- def __init__(self, config):
678
-
679
- super().__init__()
680
-
681
- self.dense = nn.Linear(config.hidden_size, config.hidden_size)
682
- self.dropout = nn.Dropout(config.final_dropout)
683
- self.out_proj = nn.Linear(config.hidden_size, config.num_labels)
684
-
685
- def forward(self, features, **kwargs):
686
-
687
- x = features
688
- x = self.dropout(x)
689
- x = self.dense(x)
690
- x = torch.tanh(x)
691
- x = self.dropout(x)
692
- x = self.out_proj(x)
693
-
694
- return x
695
 
 
696
 
697
- class ExpressionModel(Wav2Vec2PreTrainedModel):
698
- r"""speech expression model."""
 
 
699
 
700
- def __init__(self, config):
 
 
 
 
701
 
702
- super().__init__(config)
703
 
704
- self.config = config
705
- self.wav2vec2 = Wav2Vec2Model(config)
706
- self.classifier = ExpressionHead(config)
707
- self.init_weights()
708
 
709
- def forward(self, input_values):
710
- hidden_states, frozen_cnn7 = self.wav2vec2(input_values)
711
- hidden_states = torch.mean(hidden_states, dim=1)
712
- logits = self.classifier(hidden_states)
 
 
 
 
 
 
713
 
714
- return hidden_states, logits, frozen_cnn7
715
 
716
 
717
- # Load models from hub
718
 
719
- age_gender_model = AgeGenderModel.from_pretrained(age_gender_model_name)
720
- expression_processor = Wav2Vec2Processor.from_pretrained(expression_model_name)
721
- expression_model = ExpressionModel.from_pretrained(expression_model_name)
722
 
723
- # Emotion Calc. CNN features
724
 
725
- age_gender_model.wav2vec2.forward = types.MethodType(_forward, age_gender_model)
726
- expression_model.wav2vec2.forward = types.MethodType(_forward_and_cnn7, expression_model)
 
 
 
727
 
728
- def process_func(x: np.ndarray, sampling_rate: int) -> typing.Tuple[str, dict, str]:
 
 
 
 
 
 
 
 
 
 
 
 
729
 
730
- # batch audio
731
- y = expression_processor(x, sampling_rate=sampling_rate)
732
- y = y['input_values'][0]
733
- y = y.reshape(1, -1)
734
- y = torch.from_numpy(y).to(device)
735
 
736
- # run through expression model
737
- with torch.no_grad():
738
- _, logits_expression, frozen_cnn7 = expression_model(y)
739
 
740
- _, logits_age, logits_gender = age_gender_model(frozen_cnn7=frozen_cnn7)
741
 
742
- # Plot A/D/V values
743
- plot_expression(logits_expression[0, 0].item(), # implicit detach().cpu().numpy()
744
- logits_expression[0, 1].item(),
745
- logits_expression[0, 2].item())
746
- expression_file = "expression.png"
747
- plt.savefig(expression_file)
748
- return (
749
- f"{round(100 * logits_age[0, 0].item())} years", # age
750
- {
751
- "female": logits_gender[0, 0].item(),
752
- "male": logits_gender[0, 1].item(),
753
- "child": logits_gender[0, 2].item(),
754
- },
755
- expression_file,
756
- )
757
 
 
 
 
758
 
759
- def recognize(input_file):
760
- if input_file is None:
761
- raise gr.Error(
762
- "No audio file submitted! "
763
- "Please upload or record an audio file "
764
- "before submitting your request."
765
- )
766
-
767
- signal, sampling_rate = audiofile.read(input_file, duration=duration)
768
- # Resample to sampling rate supported byu the models
769
- target_rate = 16000
770
- signal = audresample.resample(signal, sampling_rate, target_rate)
771
-
772
- return process_func(signal, target_rate)
773
-
774
-
775
- def explode(data):
776
- """
777
- Expands a 3D array by creating gaps between voxels.
778
- This function is used to create the visual separation between the voxels.
779
- """
780
- shape_orig = np.array(data.shape)
781
- shape_new = shape_orig * 2 - 1
782
- retval = np.zeros(shape_new, dtype=data.dtype)
783
- retval[::2, ::2, ::2] = data
784
- return retval
785
-
786
-
787
- def explode(data):
788
- """
789
- Expands a 3D array by adding new voxels between existing ones.
790
- This is used to create the gaps in the 3D plot.
791
- """
792
- shape = data.shape
793
- new_shape = (2 * shape[0] - 1, 2 * shape[1] - 1, 2 * shape[2] - 1)
794
- new_data = np.zeros(new_shape, dtype=data.dtype)
795
- new_data[::2, ::2, ::2] = data
796
- return new_data
797
-
798
- def plot_expression(arousal, dominance, valence):
799
- '''_h = cuda tensor (N_PIX, N_PIX, N_PIX)'''
800
-
801
- N_PIX = 5
802
- _h = np.random.rand(N_PIX, N_PIX, N_PIX) * 1e-3
803
- adv = np.array([arousal, .994 - dominance, valence]).clip(0, .99)
804
- arousal, dominance, valence = (adv * N_PIX).astype(np.int64) # find voxel
805
- _h[arousal, dominance, valence] = .22
806
-
807
- filled = np.ones((N_PIX, N_PIX, N_PIX), dtype=bool)
808
-
809
- # upscale the above voxel image, leaving gaps
810
- filled_2 = explode(filled)
811
-
812
- # Shrink the gaps
813
- x, y, z = np.indices(np.array(filled_2.shape) + 1).astype(float) // 2
814
- x[1::2, :, :] += 1
815
- y[:, 1::2, :] += 1
816
- z[:, :, 1::2] += 1
817
-
818
- fig = plt.figure()
819
- ax = fig.add_subplot(projection='3d')
820
-
821
- f_2 = np.ones([2 * N_PIX - 1,
822
- 2 * N_PIX - 1,
823
- 2 * N_PIX - 1, 4], dtype=np.float64)
824
- f_2[:, :, :, 3] = explode(_h)
825
- cm = plt.get_cmap('cool')
826
- f_2[:, :, :, :3] = cm(f_2[:, :, :, 3])[..., :3]
827
 
828
- f_2[:, :, :, 3] = f_2[:, :, :, 3].clip(.01, .74)
 
 
829
 
830
- ecolors_2 = f_2
831
 
832
- ax.voxels(x, y, z, filled_2, facecolors=f_2, edgecolors=.006 * ecolors_2)
833
- ax.set_aspect('equal')
834
- ax.set_zticks([0, N_PIX])
835
- ax.set_xticks([0, N_PIX])
836
- ax.set_yticks([0, N_PIX])
837
 
838
- ax.set_zticklabels([f'{n/N_PIX:.2f}'[0:] for n in ax.get_zticks()])
839
- ax.set_zlabel('valence', fontsize=10, labelpad=0)
840
- ax.set_xticklabels([f'{n/N_PIX:.2f}' for n in ax.get_xticks()])
841
- ax.set_xlabel('arousal', fontsize=10, labelpad=7)
842
- # The y-axis rotation is corrected here from 275 to 90 degrees
843
- ax.set_yticklabels([f'{1-n/N_PIX:.2f}' for n in ax.get_yticks()], rotation=90)
844
- ax.set_ylabel('dominance', fontsize=10, labelpad=10)
845
- ax.grid(False)
846
 
847
- ax.plot([N_PIX, N_PIX], [0, N_PIX + .2], [N_PIX, N_PIX], 'g', linewidth=1)
848
- ax.plot([0, N_PIX], [N_PIX, N_PIX + .24], [N_PIX, N_PIX], 'k', linewidth=1)
849
-
850
- # Missing lines on the top face
851
- ax.plot([0, 0], [0, N_PIX], [N_PIX, N_PIX], 'darkred', linewidth=1)
852
- ax.plot([0, N_PIX], [0, 0], [N_PIX, N_PIX], 'darkblue', linewidth=1)
853
 
854
- # Set pane colors after plotting the lines
855
- # UPDATED: Replaced `w_xaxis` with `xaxis` and `w_yaxis` with `yaxis`.
856
- ax.xaxis.set_pane_color((0.8, 0.8, 0.8, 0.5))
857
- ax.yaxis.set_pane_color((0.8, 0.8, 0.8, 0.5))
858
- ax.zaxis.set_pane_color((0.8, 0.8, 0.8, 0.0))
859
 
860
- # Restore the limits to prevent the plot from expanding
861
- ax.set_xlim(0, N_PIX)
862
- ax.set_ylim(0, N_PIX)
863
- ax.set_zlim(0, N_PIX)
864
- # plt.show()
865
 
866
  # TTS
867
  # VOICES = [f'wav/{vox}' for vox in os.listdir('wav')]
@@ -1195,179 +495,8 @@ VOICES = [t[:-4] for t in VOICES] # crop .wav for visuals in gr.DropDown
1195
 
1196
  _tts = StyleTTS2().to('cpu')
1197
 
1198
- def only_greek_or_only_latin(text, lang='grc'):
1199
- '''
1200
- str: The converted string in the specified target script.
1201
- Characters not found in any mapping are preserved as is.
1202
- Latin accented characters in the input (e.g., 'É', 'ü') will
1203
- be preserved in their lowercase form (e.g., 'é', 'ü') if
1204
- converting to Latin.
1205
- '''
1206
-
1207
- # --- Mapping Dictionaries ---
1208
- # Keys are in lowercase as input text is case-folded.
1209
- # If the output needs to maintain original casing, additional logic is required.
1210
-
1211
- latin_to_greek_map = {
1212
- 'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
1213
- 'ch': 'τσο', # Example of a multi-character Latin sequence
1214
- 'z': 'ζ', 'h': 'χ', 'i': 'ι', 'k': 'κ', 'l': 'λ',
1215
- 'm': 'μ', 'n': 'ν', 'x': 'ξ', 'o': 'ο', 'p': 'π',
1216
- 'v': 'β', 'sc': 'σκ', 'r': 'ρ', 's': 'σ', 't': 'τ',
1217
- 'u': 'ου', 'f': 'φ', 'c': 'σ', 'w': 'β', 'y': 'γ',
1218
- }
1219
-
1220
- greek_to_latin_map = {
1221
- 'ου': 'ou', # Prioritize common diphthongs/digraphs
1222
- 'α': 'a', 'β': 'v', 'γ': 'g', 'δ': 'd', 'ε': 'e',
1223
- 'ζ': 'z', 'η': 'i', 'θ': 'th', 'ι': 'i', 'κ': 'k',
1224
- 'λ': 'l', 'μ': 'm', 'ν': 'n', 'ξ': 'x', 'ο': 'o',
1225
- 'π': 'p', 'ρ': 'r', 'σ': 's', 'τ': 't', 'υ': 'y', # 'y' is a common transliteration for upsilon
1226
- 'φ': 'f', 'χ': 'ch', 'ψ': 'ps', 'ω': 'o',
1227
- 'ς': 's', # Final sigma
1228
- }
1229
-
1230
- cyrillic_to_latin_map = {
1231
- 'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh',
1232
- 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o',
1233
- 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
1234
- 'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
1235
- 'я': 'ya',
1236
- }
1237
-
1238
- # Direct Cyrillic to Greek mapping based on phonetic similarity.
1239
- # These are approximations and may not be universally accepted transliterations.
1240
- cyrillic_to_greek_map = {
1241
- 'а': 'α', 'б': 'β', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε', 'ё': 'ιο', 'ж': 'ζ',
1242
- 'з': 'ζ', 'и': 'ι', 'й': 'ι', 'κ': 'κ', 'λ': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο',
1243
- 'π': 'π', 'ρ': 'ρ', 'σ': 'σ', 'τ': 'τ', 'у': 'ου', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
1244
- 'ч': 'τσ', # or τζ depending on desired sound
1245
- 'ш': 'σ', 'щ': 'σ', # approximations
1246
- 'ъ': '', 'ы': 'ι', 'ь': '', 'э': 'ε', 'ю': 'ιου',
1247
- 'я': 'ια',
1248
- }
1249
-
1250
- # Convert the input text to lowercase, preserving accents for Latin characters.
1251
- # casefold() is used for more robust caseless matching across Unicode characters.
1252
- lowercased_text = text.lower() #casefold()
1253
- output_chars = []
1254
- current_index = 0
1255
-
1256
- if lang == 'grc':
1257
- # Combine all relevant maps for direct lookup to Greek
1258
- conversion_map = {**latin_to_greek_map, **cyrillic_to_greek_map}
1259
-
1260
- # Sort keys by length in reverse order to handle multi-character sequences first
1261
- sorted_source_keys = sorted(
1262
- list(latin_to_greek_map.keys()) + list(cyrillic_to_greek_map.keys()),
1263
- key=len,
1264
- reverse=True
1265
- )
1266
-
1267
- while current_index < len(lowercased_text):
1268
- found_conversion = False
1269
- for key in sorted_source_keys:
1270
- if lowercased_text.startswith(key, current_index):
1271
- output_chars.append(conversion_map[key])
1272
- current_index += len(key)
1273
- found_conversion = True
1274
- break
1275
- if not found_conversion:
1276
- # If no specific mapping found, append the character as is.
1277
- # This handles unmapped characters and already Greek characters.
1278
- output_chars.append(lowercased_text[current_index])
1279
- current_index += 1
1280
- return ''.join(output_chars)
1281
-
1282
- else: # Default to 'lat' conversion
1283
- # Combine Greek to Latin and Cyrillic to Latin maps.
1284
- # Cyrillic map keys will take precedence in case of overlap if defined after Greek.
1285
- combined_to_latin_map = {**greek_to_latin_map, **cyrillic_to_latin_map}
1286
-
1287
- # Sort all relevant source keys by length in reverse for replacement
1288
- sorted_source_keys = sorted(
1289
- list(greek_to_latin_map.keys()) + list(cyrillic_to_latin_map.keys()),
1290
- key=len,
1291
- reverse=True
1292
- )
1293
-
1294
- while current_index < len(lowercased_text):
1295
- found_conversion = False
1296
- for key in sorted_source_keys:
1297
- if lowercased_text.startswith(key, current_index):
1298
- latin_equivalent = combined_to_latin_map[key]
1299
-
1300
- # Strip accents ONLY if the source character was from the Greek map.
1301
- # This preserves accents on original Latin characters (like 'é')
1302
- # and allows for intentional accent stripping from Greek transliterations.
1303
- if key in greek_to_latin_map:
1304
- normalized_latin = unicodedata.normalize('NFD', latin_equivalent)
1305
- stripped_latin = ''.join(c for c in normalized_latin if not unicodedata.combining(c))
1306
- output_chars.append(stripped_latin)
1307
- else:
1308
- output_chars.append(latin_equivalent)
1309
-
1310
- current_index += len(key)
1311
- found_conversion = True
1312
- break
1313
-
1314
- if not found_conversion:
1315
- # If no conversion happened from Greek or Cyrillic, append the character as is.
1316
- # This preserves existing Latin characters (including accented ones from input),
1317
- # numbers, punctuation, and other symbols.
1318
- output_chars.append(lowercased_text[current_index])
1319
- current_index += 1
1320
-
1321
- return ''.join(output_chars)
1322
-
1323
-
1324
- def _stylett2(text='Hallov worlds Far over the',
1325
- ref_s='wav/af_ZA_google-nwu_0184.wav'):
1326
-
1327
- if text and text.strip():
1328
-
1329
- text = only_greek_or_only_latin(text, lang='eng')
1330
-
1331
- speech_audio = _tts.inference(text,
1332
- ref_s=re_s)[0, 0, :].numpy() # 24 Khz
1333
-
1334
- if speech_audio.shape[0] > 10:
1335
-
1336
- speech_audio = audresample.resample(signal=speech_audio.astype(np.float32),
1337
- original_rate=24000,
1338
- target_rate=16000)[0, :] # 16 KHz
1339
-
1340
- return speech_audio
1341
-
1342
-
1343
-
1344
-
1345
- import gradio as gr
1346
-
1347
- # Dummy functions to make the code runnable for demonstration
1348
- def audionar_tts(text, choice, soundscape, kv):
1349
- # This function would generate an audio file and return its path
1350
- return "dummy_audio.wav"
1351
-
1352
- def recognize(audio_input_path):
1353
- # This function would analyze the audio and return results
1354
- return "30", "Male", {"Angry": 0.9}
1355
-
1356
- # Assuming these are defined elsewhere in the user's code
1357
- language_names = ["English", "Spanish"]
1358
- VOICES = ["Voice 1", "Voice 2"]
1359
 
1360
  with gr.Blocks(theme='huggingface') as demo:
1361
- tts_file = gr.State(value=None)
1362
- audio_examples_state = gr.State(
1363
- value=[
1364
- ["wav/female-46-neutral.wav"],
1365
- ["wav/female-20-happy.wav"],
1366
- ["wav/male-60-angry.wav"],
1367
- ["wav/male-27-sad.wav"],
1368
- ]
1369
- )
1370
-
1371
  with gr.Tab(label="TTS"):
1372
  with gr.Row():
1373
  text_input = gr.Textbox(
@@ -1394,56 +523,18 @@ with gr.Blocks(theme='huggingface') as demo:
1394
 
1395
  output_audio = gr.Audio(label="TTS Output")
1396
 
1397
- def generate_and_update_state(text, choice, soundscape, kv, current_examples):
1398
- audio_path = audionar_tts(text, choice, soundscape, kv)
1399
- updated_examples = current_examples + [[audio_path]]
1400
- return audio_path, updated_examples
1401
-
1402
  generate_button.click(
1403
- fn=generate_and_update_state,
1404
- inputs=[text_input, choice_dropdown, soundscape_input, kv_input, audio_examples_state],
1405
- outputs=[output_audio, audio_examples_state]
1406
  )
1407
 
1408
- with gr.Tab(label="Speech Analysis"):
1409
  with gr.Row():
1410
  with gr.Column():
1411
- input_audio_analysis = gr.Audio(
1412
- sources=["upload", "microphone"],
1413
- type="filepath",
1414
- label="Audio input",
1415
- min_length=0.025,
1416
- )
1417
-
1418
- audio_examples = gr.Examples(
1419
- examples=[], # Initialize with an empty list
1420
- inputs=[input_audio_analysis],
1421
- label="Examples from CREMA-D, ODbL v1.0 license",
1422
- )
1423
 
1424
  gr.Markdown("Only the first two seconds of the audio will be processed.")
1425
-
1426
- submit_btn = gr.Button(value="Submit", variant="primary")
1427
- with gr.Column():
1428
- output_age = gr.Textbox(label="Age")
1429
- output_gender = gr.Label(label="Gender")
1430
- output_expression = gr.Image(label="Expression")
1431
-
1432
- outputs = [output_age, output_gender, output_expression]
1433
-
1434
- # Fix: This function should not update gr.Examples directly.
1435
- # Instead, it should just return the updated examples list.
1436
- # The `demo.load` event will handle the update.
1437
- def load_examples_from_state(examples_list):
1438
- return gr.Examples.update(examples=examples_list)
1439
-
1440
- demo.load(
1441
- fn=load_examples_from_state,
1442
- inputs=[audio_examples_state],
1443
- outputs=[audio_examples],
1444
- queue=False,
1445
- )
1446
 
1447
- submit_btn.click(recognize, input_audio_analysis, outputs)
1448
 
1449
  demo.launch(debug=True)
 
1
  # -*- coding: utf-8 -*-
2
  import typing
 
3
  import gradio as gr
 
4
  import numpy as np
5
  import os
6
  import torch
7
  import torch.nn as nn
 
 
 
8
  import audiofile
9
  from tts import StyleTTS2
10
+ from textual import only_greek_or_only_latin, transliterate_number, fix_vocals
11
  import audresample
 
 
 
12
  import textwrap
13
  import nltk
 
 
14
  from audionar import VitsModel, VitsTokenizer
15
  from audiocraft import AudioGen
16
 
 
22
  nltk.download('punkt_tab', download_dir='./')
23
  nltk.data.path.append('.')
24
 
 
25
 
26
 
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
 
30
  language_names = ['Ancient greek',
 
37
 
38
 
39
  def audionar_tts(text=None,
40
+ lang='Romanian',
41
  soundscape='',
42
  cache_lim=24):
43
 
 
53
  'romanian': 'ron',
54
  'serbian (approx.)': 'rmc-script_latin',
55
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
 
 
 
57
 
58
+ final_audio = None
59
 
 
60
 
61
+ if text is None or text.strip() == '':
62
+ text = 'No Audio or Txt Input'
 
 
 
 
 
 
 
 
63
 
 
64
 
65
+ print(lang, lang in language_names)
 
66
 
67
+ if lang not in language_names: # StyleTTS2
68
 
69
+ text = only_greek_or_only_latin(text, lang='eng')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ x = _tts.inference(text,
72
+ ref_s='wav/' + lang + '.wav')[0, 0, :].numpy() # 24 Khz
73
+
74
+ if x.shape[0] > 10:
 
 
75
 
76
+ x = audresample.resample(signal=x.astype(np.float32),
77
+ original_rate=24000,
78
+ target_rate=16000)[0, :] # 16 KHz
79
 
80
+ else: # VITS
81
+
82
+ lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ global cached_lang_code, cached_net_g, cached_tokenizer
85
 
86
+ if 'cached_lang_code' not in globals() or cached_lang_code != lang_code:
87
+ cached_lang_code = lang_code
88
+ cached_net_g = VitsModel.from_pretrained(f'facebook/mms-tts-{lang_code}').eval()
89
+ cached_tokenizer = VitsTokenizer.from_pretrained(f'facebook/mms-tts-{lang_code}')
90
 
91
+ net_g = cached_net_g
92
+ tokenizer = cached_tokenizer
93
+ text = only_greek_or_only_latin(text, lang=lang_code)
94
+ text = transliterate_number(text, lang=lang_code)
95
+ text = fix_vocals(text, lang=lang_code)
96
 
 
97
 
98
+ sentences = textwrap.wrap(text, width=439)
 
 
 
99
 
100
+ total_audio_parts = []
101
+ for sentence in sentences:
102
+ inputs = cached_tokenizer(sentence, return_tensors="pt")
103
+ with torch.no_grad():
104
+ audio_part = cached_net_g(
105
+ input_ids=inputs.input_ids,
106
+ attention_mask=inputs.attention_mask,
107
+ lang_code=lang_code,
108
+ )[0, :]
109
+ total_audio_parts.append(audio_part)
110
 
111
+ x = torch.cat(total_audio_parts).cpu().numpy()
112
 
113
 
114
+ if soundscape and soundscape.strip():
115
 
116
+
117
+ speech_duration_secs = len(x) / 16000
118
+ target_duration = max(speech_duration_secs + 0.74, 2.0)
119
 
 
120
 
121
+ background_audio = audiogen.generate(
122
+ soundscape,
123
+ duration=target_duration,
124
+ cache_lim=max(4, int(cache_lim)) # at least allow 10 A/R stEps
125
+ ).numpy()
126
 
127
+ # PAD
128
+
129
+ len_speech = len(speech_audio)
130
+ len_background = len(background_audio)
131
+
132
+ if len_background > len_speech:
133
+ padding = np.zeros(len_background - len_speech,
134
+ dtype=np.float32)
135
+ speech_audio = np.concatenate([speech_audio, padding])
136
+ elif len_speech > len_background:
137
+ padding = np.zeros(len_speech - len_background,
138
+ dtype=np.float32)
139
+ background_audio = np.concatenate([background_audio, padding])
140
 
 
 
 
 
 
141
 
142
+ speech_audio = speech_audio[None, :]
143
+ background_audio = background_audio[None, :]
 
144
 
 
145
 
146
+ final_audio = np.concatenate([
147
+ 0.49 * speech_audio + 0.51 * background_audio,
148
+ 0.51 * background_audio + 0.49 * speech_audio
149
+ ], 0)
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ else:
152
+
153
+ final_audio = x
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ wavfile = '_vits_.wav'
157
+ audiofile.write(wavfile, final_audio, 16000)
158
+ return wavfile, wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
159
 
 
160
 
 
 
 
 
 
161
 
 
 
 
 
 
 
 
 
162
 
 
 
 
 
 
 
163
 
 
 
 
 
 
164
 
 
 
 
 
 
165
 
166
  # TTS
167
  # VOICES = [f'wav/{vox}' for vox in os.listdir('wav')]
 
495
 
496
  _tts = StyleTTS2().to('cpu')
497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
 
499
  with gr.Blocks(theme='huggingface') as demo:
 
 
 
 
 
 
 
 
 
 
500
  with gr.Tab(label="TTS"):
501
  with gr.Row():
502
  text_input = gr.Textbox(
 
523
 
524
  output_audio = gr.Audio(label="TTS Output")
525
 
 
 
 
 
 
526
  generate_button.click(
527
+ fn=audionar_tts,
528
+ inputs=[text_input, choice_dropdown, soundscape_input, kv_input],
529
+ outputs=[output_audio]
530
  )
531
 
532
+ with gr.Tab(label="API"):
533
  with gr.Row():
534
  with gr.Column():
535
+
 
 
 
 
 
 
 
 
 
 
 
536
 
537
  gr.Markdown("Only the first two seconds of the audio will be processed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
 
 
539
 
540
  demo.launch(debug=True)
textual.py ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import unicodedata
3
+ from num2words import num2words
4
+ from num2word_greek.numbers2words import convert_numbers
5
+
6
+ def only_greek_or_only_latin(text, lang='grc'):
7
+ '''
8
+ str: The converted string in the specified target script.
9
+ Characters not found in any mapping are preserved as is.
10
+ Latin accented characters in the input (e.g., 'É', 'ü') will
11
+ be preserved in their lowercase form (e.g., 'é', 'ü') if
12
+ converting to Latin.
13
+ '''
14
+
15
+ # --- Mapping Dictionaries ---
16
+ # Keys are in lowercase as input text is case-folded.
17
+ # If the output needs to maintain original casing, additional logic is required.
18
+
19
+ latin_to_greek_map = {
20
+ 'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
21
+ 'ch': 'τσο', # Example of a multi-character Latin sequence
22
+ 'z': 'ζ', 'h': 'χ', 'i': 'ι', 'k': 'κ', 'l': 'λ',
23
+ 'm': 'μ', 'n': 'ν', 'x': 'ξ', 'o': 'ο', 'p': 'π',
24
+ 'v': 'β', 'sc': 'σκ', 'r': 'ρ', 's': 'σ', 't': 'τ',
25
+ 'u': 'ου', 'f': 'φ', 'c': 'σ', 'w': 'β', 'y': 'γ',
26
+ }
27
+
28
+ greek_to_latin_map = {
29
+ 'ου': 'ou', # Prioritize common diphthongs/digraphs
30
+ 'α': 'a', 'β': 'v', 'γ': 'g', 'δ': 'd', 'ε': 'e',
31
+ 'ζ': 'z', 'η': 'i', 'θ': 'th', 'ι': 'i', 'κ': 'k',
32
+ 'λ': 'l', 'μ': 'm', 'ν': 'n', 'ξ': 'x', 'ο': 'o',
33
+ 'π': 'p', 'ρ': 'r', 'σ': 's', 'τ': 't', 'υ': 'y', # 'y' is a common transliteration for upsilon
34
+ 'φ': 'f', 'χ': 'ch', 'ψ': 'ps', 'ω': 'o',
35
+ 'ς': 's', # Final sigma
36
+ }
37
+
38
+ cyrillic_to_latin_map = {
39
+ 'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh',
40
+ 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o',
41
+ 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
42
+ 'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
43
+ 'я': 'ya',
44
+ }
45
+
46
+ # Direct Cyrillic to Greek mapping based on phonetic similarity.
47
+ # These are approximations and may not be universally accepted transliterations.
48
+ cyrillic_to_greek_map = {
49
+ 'а': 'α', 'б': 'β', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε', 'ё': 'ιο', 'ж': 'ζ',
50
+ 'з': 'ζ', 'и': 'ι', 'й': 'ι', 'κ': 'κ', 'λ': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο',
51
+ 'π': 'π', 'ρ': 'ρ', 'σ': 'σ', 'τ': 'τ', 'у': 'ου', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
52
+ 'ч': 'τσ', # or τζ depending on desired sound
53
+ 'ш': 'σ', 'щ': 'σ', # approximations
54
+ 'ъ': '', 'ы': 'ι', 'ь': '', 'э': 'ε', 'ю': 'ιου',
55
+ 'я': 'ια',
56
+ }
57
+
58
+ # Convert the input text to lowercase, preserving accents for Latin characters.
59
+ # casefold() is used for more robust caseless matching across Unicode characters.
60
+ lowercased_text = text.lower() #casefold()
61
+ output_chars = []
62
+ current_index = 0
63
+
64
+ if lang == 'grc':
65
+ # Combine all relevant maps for direct lookup to Greek
66
+ conversion_map = {**latin_to_greek_map, **cyrillic_to_greek_map}
67
+
68
+ # Sort keys by length in reverse order to handle multi-character sequences first
69
+ sorted_source_keys = sorted(
70
+ list(latin_to_greek_map.keys()) + list(cyrillic_to_greek_map.keys()),
71
+ key=len,
72
+ reverse=True
73
+ )
74
+
75
+ while current_index < len(lowercased_text):
76
+ found_conversion = False
77
+ for key in sorted_source_keys:
78
+ if lowercased_text.startswith(key, current_index):
79
+ output_chars.append(conversion_map[key])
80
+ current_index += len(key)
81
+ found_conversion = True
82
+ break
83
+ if not found_conversion:
84
+ # If no specific mapping found, append the character as is.
85
+ # This handles unmapped characters and already Greek characters.
86
+ output_chars.append(lowercased_text[current_index])
87
+ current_index += 1
88
+ return ''.join(output_chars)
89
+
90
+ else: # Default to 'lat' conversion
91
+ # Combine Greek to Latin and Cyrillic to Latin maps.
92
+ # Cyrillic map keys will take precedence in case of overlap if defined after Greek.
93
+ combined_to_latin_map = {**greek_to_latin_map, **cyrillic_to_latin_map}
94
+
95
+ # Sort all relevant source keys by length in reverse for replacement
96
+ sorted_source_keys = sorted(
97
+ list(greek_to_latin_map.keys()) + list(cyrillic_to_latin_map.keys()),
98
+ key=len,
99
+ reverse=True
100
+ )
101
+
102
+ while current_index < len(lowercased_text):
103
+ found_conversion = False
104
+ for key in sorted_source_keys:
105
+ if lowercased_text.startswith(key, current_index):
106
+ latin_equivalent = combined_to_latin_map[key]
107
+
108
+ # Strip accents ONLY if the source character was from the Greek map.
109
+ # This preserves accents on original Latin characters (like 'é')
110
+ # and allows for intentional accent stripping from Greek transliterations.
111
+ if key in greek_to_latin_map:
112
+ normalized_latin = unicodedata.normalize('NFD', latin_equivalent)
113
+ stripped_latin = ''.join(c for c in normalized_latin if not unicodedata.combining(c))
114
+ output_chars.append(stripped_latin)
115
+ else:
116
+ output_chars.append(latin_equivalent)
117
+
118
+ current_index += len(key)
119
+ found_conversion = True
120
+ break
121
+
122
+ if not found_conversion:
123
+ # If no conversion happened from Greek or Cyrillic, append the character as is.
124
+ # This preserves existing Latin characters (including accented ones from input),
125
+ # numbers, punctuation, and other symbols.
126
+ output_chars.append(lowercased_text[current_index])
127
+ current_index += 1
128
+
129
+ return ''.join(output_chars)
130
+
131
+
132
+ # =====================================================
133
+ #
134
+
135
+ def fix_vocals(text, lang='ron'):
136
+
137
+ # Longer phrases should come before shorter ones to prevent partial matches.
138
+
139
+ ron_replacements = {
140
+ 'ţ': 'ț',
141
+ 'ț': 'ts',
142
+ 'î': 'u',
143
+ 'â': 'a',
144
+ 'ş': 's',
145
+ 'w': 'oui',
146
+ 'k': 'c',
147
+ 'l': 'll',
148
+ # Math symbols
149
+ 'sqrt': ' rădăcina pătrată din ',
150
+ '^': ' la puterea ',
151
+ '+': ' plus ',
152
+ ' - ': ' minus ', # only replace if standalone so to not say minus if is a-b-c
153
+ '*': ' ori ', # times
154
+ '/': ' împărțit la ', # divided by
155
+ '=': ' egal cu ', # equals
156
+ 'pi': ' pi ',
157
+ '<': ' mai mic decât ',
158
+ '>': ' mai mare decât',
159
+ '%': ' la sută ', # percent (from previous)
160
+ '(': ' paranteză deschisă ',
161
+ ')': ' paranteză închisă ',
162
+ '[': ' paranteză pătrată deschisă ',
163
+ ']': ' paranteză pătrată închisă ',
164
+ '{': ' acoladă deschisă ',
165
+ '}': ' acoladă închisă ',
166
+ '≠': ' nu este egal cu ',
167
+ '≤': ' mai mic sau egal cu ',
168
+ '≥': ' mai mare sau egal cu ',
169
+ '≈': ' aproximativ ',
170
+ '∞': ' infinit ',
171
+ '€': ' euro ',
172
+ '$': ' dolar ',
173
+ '£': ' liră ',
174
+ '&': ' și ', # and
175
+ '@': ' la ', # at
176
+ '#': ' diez ', # hash
177
+ '∑': ' sumă ',
178
+ '∫': ' integrală ',
179
+ '√': ' rădăcina pătrată a ', # more generic square root
180
+ }
181
+
182
+ eng_replacements = {
183
+ 'wik': 'weaky',
184
+ 'sh': 'ss',
185
+ 'ch': 'ttss',
186
+ 'oo': 'oeo',
187
+ # Math symbols for English
188
+ 'sqrt': ' square root of ',
189
+ '^': ' to the power of ',
190
+ '+': ' plus ',
191
+ ' - ': ' minus ',
192
+ '*': ' times ',
193
+ ' / ': ' divided by ',
194
+ '=': ' equals ',
195
+ 'pi': ' pi ',
196
+ '<': ' less than ',
197
+ '>': ' greater than ',
198
+ # Additional common math symbols from previous list
199
+ '%': ' percent ',
200
+ '(': ' open parenthesis ',
201
+ ')': ' close parenthesis ',
202
+ '[': ' open bracket ',
203
+ ']': ' close bracket ',
204
+ '{': ' open curly brace ',
205
+ '}': ' close curly brace ',
206
+ '∑': ' sum ',
207
+ '∫': ' integral ',
208
+ '√': ' square root of ',
209
+ '≠': ' not equals ',
210
+ '≤': ' less than or equals ',
211
+ '≥': ' greater than or equals ',
212
+ '≈': ' approximately ',
213
+ '∞': ' infinity ',
214
+ '€': ' euro ',
215
+ '$': ' dollar ',
216
+ '£': ' pound ',
217
+ '&': ' and ',
218
+ '@': ' at ',
219
+ '#': ' hash ',
220
+ }
221
+
222
+ serbian_replacements = {
223
+ 'rn': 'rrn',
224
+ 'ć': 'č',
225
+ 'c': 'č',
226
+ 'đ': 'd',
227
+ 'j': 'i',
228
+ 'l': 'lll',
229
+ 'w': 'v',
230
+ # https://huggingface.co/facebook/mms-tts-rmc-script_latin
231
+ 'sqrt': 'kvadratni koren iz',
232
+ '^': ' na stepen ',
233
+ '+': ' plus ',
234
+ ' - ': ' minus ',
235
+ '*': ' puta ',
236
+ ' / ': ' podeljeno sa ',
237
+ '=': ' jednako ',
238
+ 'pi': ' pi ',
239
+ '<': ' manje od ',
240
+ '>': ' veće od ',
241
+ '%': ' procenat ',
242
+ '(': ' otvorena zagrada ',
243
+ ')': ' zatvorena zagrada ',
244
+ '[': ' otvorena uglasta zagrada ',
245
+ ']': ' zatvorena uglasta zagrada ',
246
+ '{': ' otvorena vitičasta zagrada ',
247
+ '}': ' zatvorena vitičasta zagrada ',
248
+ '∑': ' suma ',
249
+ '∫': ' integral ',
250
+ '√': ' kvadratni koren ',
251
+ '≠': ' nije jednako ',
252
+ '≤': ' manje ili jednako od ',
253
+ '≥': ' veće ili jednako od ',
254
+ '≈': ' približno ',
255
+ '∞': ' beskonačnost ',
256
+ '€': ' evro ',
257
+ '$': ' dolar ',
258
+ '£': ' funta ',
259
+ '&': ' i ',
260
+ '@': ' et ',
261
+ '#': ' taraba ',
262
+ # Others
263
+ # 'rn': 'rrn',
264
+ # 'ć': 'č',
265
+ # 'c': 'č',
266
+ # 'đ': 'd',
267
+ # 'l': 'le',
268
+ # 'ij': 'i',
269
+ # 'ji': 'i',
270
+ # 'j': 'i',
271
+ # 'služ': 'sloooozz', # 'službeno'
272
+ # 'suver': 'siuveeerra', # 'suverena'
273
+ # 'država': 'dirrezav', # 'država'
274
+ # 'iči': 'ici', # 'Graniči'
275
+ # 's ': 'se', # a s with space
276
+ # 'q': 'ku',
277
+ # 'w': 'aou',
278
+ # 'z': 's',
279
+ # "š": "s",
280
+ # 'th': 'ta',
281
+ # 'v': 'vv',
282
+ # "ć": "č",
283
+ # "đ": "ď",
284
+ # "lj": "ľ",
285
+ # "nj": "ň",
286
+ # "ž": "z",
287
+ # "c": "č"
288
+ }
289
+
290
+ deu_replacements = {
291
+ 'sch': 'sh',
292
+ 'ch': 'kh',
293
+ 'ie': 'ee',
294
+ 'ei': 'ai',
295
+ 'ä': 'ae',
296
+ 'ö': 'oe',
297
+ 'ü': 'ue',
298
+ 'ß': 'ss',
299
+ # Math symbols for German
300
+ 'sqrt': ' Quadratwurzel aus ',
301
+ '^': ' hoch ',
302
+ '+': ' plus ',
303
+ ' - ': ' minus ',
304
+ '*': ' mal ',
305
+ ' / ': ' geteilt durch ',
306
+ '=': ' gleich ',
307
+ 'pi': ' pi ',
308
+ '<': ' kleiner als ',
309
+ '>': ' größer als',
310
+ # Additional common math symbols from previous list
311
+ '%': ' prozent ',
312
+ '(': ' Klammer auf ',
313
+ ')': ' Klammer zu ',
314
+ '[': ' eckige Klammer auf ',
315
+ ']': ' eckige Klammer zu ',
316
+ '{': ' geschweifte Klammer auf ',
317
+ '}': ' geschweifte Klammer zu ',
318
+ '∑': ' Summe ',
319
+ '∫': ' Integral ',
320
+ '√': ' Quadratwurzel ',
321
+ '≠': ' ungleich ',
322
+ '≤': ' kleiner oder gleich ',
323
+ '≥': ' größer oder gleich ',
324
+ '≈': ' ungefähr ',
325
+ '∞': ' unendlich ',
326
+ '€': ' euro ',
327
+ '$': ' dollar ',
328
+ '£': ' pfund ',
329
+ '&': ' und ',
330
+ '@': ' at ', # 'Klammeraffe' is also common but 'at' is simpler
331
+ '#': ' raute ',
332
+ }
333
+
334
+ fra_replacements = {
335
+ # French specific phonetic replacements (add as needed)
336
+ # e.g., 'ç': 's', 'é': 'e', etc.
337
+ 'w': 'v',
338
+ # Math symbols for French
339
+ 'sqrt': ' racine carrée de ',
340
+ '^': ' à la puissance ',
341
+ '+': ' plus ',
342
+ ' - ': ' moins ', # tiré ;
343
+ '*': ' fois ',
344
+ ' / ': ' divisé par ',
345
+ '=': ' égale ',
346
+ 'pi': ' pi ',
347
+ '<': ' inférieur à ',
348
+ '>': ' supérieur à ',
349
+ # Add more common math symbols as needed for French
350
+ '%': ' pour cent ',
351
+ '(': ' parenthèse ouverte ',
352
+ ')': ' parenthèse fermée ',
353
+ '[': ' crochet ouvert ',
354
+ ']': ' crochet fermé ',
355
+ '{': ' accolade ouverte ',
356
+ '}': ' accolade fermée ',
357
+ '∑': ' somme ',
358
+ '∫': ' intégrale ',
359
+ '√': ' racine carrée ',
360
+ '≠': ' n\'égale pas ',
361
+ '≤': ' inférieur ou égal à ',
362
+ '≥': ' supérieur ou égal à ',
363
+ '≈': ' approximativement ',
364
+ '∞': ' infini ',
365
+ '€': ' euro ',
366
+ '$': ' dollar ',
367
+ '£': ' livre ',
368
+ '&': ' et ',
369
+ '@': ' arobase ',
370
+ '#': ' dièse ',
371
+ }
372
+
373
+ hun_replacements = {
374
+ # Hungarian specific phonetic replacements (add as needed)
375
+ # e.g., 'á': 'a', 'é': 'e', etc.
376
+ 'ch': 'ts',
377
+ 'cs': 'tz',
378
+ 'g': 'gk',
379
+ 'w': 'v',
380
+ 'z': 'zz',
381
+ # Math symbols for Hungarian
382
+ 'sqrt': ' négyzetgyök ',
383
+ '^': ' hatvány ',
384
+ '+': ' plusz ',
385
+ ' - ': ' mínusz ',
386
+ '*': ' szorozva ',
387
+ ' / ': ' osztva ',
388
+ '=': ' egyenlő ',
389
+ 'pi': ' pi ',
390
+ '<': ' kisebb mint ',
391
+ '>': ' nagyobb mint ',
392
+ # Add more common math symbols as needed for Hungarian
393
+ '%': ' százalék ',
394
+ '(': ' nyitó zárójel ',
395
+ ')': ' záró zárójel ',
396
+ '[': ' nyitó szögletes zárójel ',
397
+ ']': ' záró szögletes zárójel ',
398
+ '{': ' nyitó kapcsos zárójel ',
399
+ '}': ' záró kapcsos zárójel ',
400
+ '∑': ' szumma ',
401
+ '∫': ' integrál ',
402
+ '√': ' négyzetgyök ',
403
+ '≠': ' nem egyenlő ',
404
+ '≤': ' kisebb vagy egyenlő ',
405
+ '≥': ' nagyobb vagy egyenlő ',
406
+ '≈': ' körülbelül ',
407
+ '∞': ' végtelen ',
408
+ '€': ' euró ',
409
+ '$': ' dollár ',
410
+ '£': ' font ',
411
+ '&': ' és ',
412
+ '@': ' kukac ',
413
+ '#': ' kettőskereszt ',
414
+ }
415
+
416
+ grc_replacements = {
417
+ # Ancient Greek specific phonetic replacements (add as needed)
418
+ # These are more about transliterating Greek letters if they are in the input text.
419
+ # Math symbols for Ancient Greek (literal translations)
420
+ 'sqrt': ' τετραγωνικὴ ῥίζα ',
421
+ '^': ' εἰς τὴν δύναμιν ',
422
+ '+': ' σὺν ',
423
+ ' - ': ' χωρὶς ',
424
+ '*': ' πο��λάκις ',
425
+ ' / ': ' διαιρέω ',
426
+ '=': ' ἴσον ',
427
+ 'pi': ' πῖ ',
428
+ '<': ' ἔλαττον ',
429
+ '>': ' μεῖζον ',
430
+ # Add more common math symbols as needed for Ancient Greek
431
+ '%': ' τοῖς ἑκατόν ', # tois hekaton - 'of the hundred'
432
+ '(': ' ἀνοικτὴ παρένθεσις ',
433
+ ')': ' κλειστὴ παρένθεσις ',
434
+ '[': ' ἀνοικτὴ ἀγκύλη ',
435
+ ']': ' κλειστὴ ἀγκύλη ',
436
+ '{': ' ἀνοικτὴ σγουρὴ ἀγκύλη ',
437
+ '}': ' κλειστὴ σγουρὴ ἀγκύλη ',
438
+ '∑': ' ἄθροισμα ',
439
+ '∫': ' ὁλοκλήρωμα ',
440
+ '√': ' τετραγωνικὴ ῥίζα ',
441
+ '≠': ' οὐκ ἴσον ',
442
+ '≤': ' ἔλαττον ἢ ἴσον ',
443
+ '≥': ' μεῖζον ἢ ἴσον ',
444
+ '≈': ' περίπου ',
445
+ '∞': ' ἄπειρον ',
446
+ '€': ' εὐρώ ',
447
+ '$': ' δολάριον ',
448
+ '£': ' λίρα ',
449
+ '&': ' καὶ ',
450
+ '@': ' ἀτ ', # at
451
+ '#': ' δίεση ', # hash
452
+ }
453
+
454
+
455
+ # Select the appropriate replacement dictionary based on the language
456
+ replacements_map = {
457
+ 'grc': grc_replacements,
458
+ 'ron': ron_replacements,
459
+ 'eng': eng_replacements,
460
+ 'deu': deu_replacements,
461
+ 'fra': fra_replacements,
462
+ 'hun': hun_replacements,
463
+ 'rmc-script_latin': serbian_replacements,
464
+ }
465
+
466
+ current_replacements = replacements_map.get(lang)
467
+ if current_replacements:
468
+ # Sort replacements by length of the key in descending order.
469
+ # This is crucial for correctly replacing multi-character strings (like 'sqrt', 'sch')
470
+ # before their shorter substrings ('s', 'ch', 'q', 'r', 't').
471
+ sorted_replacements = sorted(current_replacements.items(), key=lambda item: len(item[0]), reverse=True)
472
+ for old, new in sorted_replacements:
473
+ text = text.replace(old, new)
474
+ return text
475
+ else:
476
+ # If the language is not supported, return the original text
477
+ print(f"Warning: Language '{lang}' not supported for text replacement. Returning original text.")
478
+ return text
479
+
480
+
481
+ def _num2words(text='01234', lang=None):
482
+ if lang == 'grc':
483
+ return convert_numbers(text)
484
+ return num2words(text, lang=lang) # HAS TO BE kwarg lang=lang
485
+
486
+
487
+ def transliterate_number(number_string,
488
+ lang=None):
489
+ if lang == 'rmc-script_latin':
490
+ lang = 'sr'
491
+ exponential_pronoun = ' puta deset na stepen od '
492
+ comma = ' tačka '
493
+ elif lang == 'ron':
494
+ lang = 'ro'
495
+ exponential_pronoun = ' tízszer a erejéig '
496
+ comma = ' virgulă '
497
+ elif lang == 'hun':
498
+ lang = 'hu'
499
+ exponential_pronoun = ' tízszer a erejéig '
500
+ comma = ' virgula '
501
+ elif lang == 'deu':
502
+ exponential_pronoun = ' mal zehn hoch '
503
+ comma = ' komma '
504
+ elif lang == 'fra':
505
+ lang = 'fr'
506
+ exponential_pronoun = ' puissance '
507
+ comma = 'virgule'
508
+ elif lang == 'grc':
509
+ exponential_pronoun = ' εις την δυναμην του '
510
+ comma = 'κομμα'
511
+ else:
512
+ lang = lang[:2]
513
+ exponential_pronoun = ' times ten to the power of '
514
+ comma = ' point '
515
+
516
+ def replace_number(match):
517
+ prefix = match.group(1) or ""
518
+ number_part = match.group(2)
519
+ suffix = match.group(5) or ""
520
+
521
+ try:
522
+ if 'e' in number_part.lower():
523
+ base, exponent = number_part.lower().split('e')
524
+ words = _num2words(base, lang=lang) + exponential_pronoun + _num2words(exponent, lang=lang)
525
+ elif '.' in number_part:
526
+ integer_part, decimal_part = number_part.split('.')
527
+ words = _num2words(integer_part, lang=lang) + comma + " ".join(
528
+ [_num2words(digit, lang=lang) for digit in decimal_part])
529
+ else:
530
+ words = _num2words(number_part, lang=lang)
531
+ return prefix + words + suffix
532
+ except ValueError:
533
+ return match.group(0) # Return original if conversion fails
534
+
535
+ pattern = r'([^\d]*)(\d+(\.\d+)?([Ee][+-]?\d+)?)([^\d]*)'
536
+ return re.sub(pattern, replace_number, number_string)