Spaces:
Paused
Paused
Update pipeline.py
Browse files- pipeline.py +21 -7
pipeline.py
CHANGED
@@ -30,14 +30,14 @@ translation_hdr = {
|
|
30 |
# Request headers
|
31 |
"Content-Type": "application/json",
|
32 |
"Cache-Control": "no-cache",
|
33 |
-
"Ocp-Apim-Subscription-Key": KHAYA_TOKEN,
|
34 |
}
|
35 |
|
36 |
transcription_hdr = {
|
37 |
# Request headers
|
38 |
'Content-Type': 'audio/mpeg',
|
39 |
"Cache-Control": "no-cache",
|
40 |
-
"Ocp-Apim-Subscription-Key": KHAYA_TOKEN,
|
41 |
}
|
42 |
|
43 |
tts_header = {
|
@@ -75,6 +75,21 @@ async def fetch(session, url, headers, data, semaphore, index):
|
|
75 |
print(f"Unexpected error: {e}")
|
76 |
return index, str(e)
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
async def translation_main(sentences, url, headers, lang, out_lang):
|
80 |
khaya_translations = [None] * len(sentences)
|
@@ -113,17 +128,16 @@ async def transcription_main(output_audio_path, url, headers, lang):
|
|
113 |
|
114 |
url_with_lang = url+"?language="+lang
|
115 |
|
116 |
-
tasks.append(
|
117 |
|
118 |
for f in tqdm(
|
119 |
asyncio.as_completed(tasks), total=len(tasks), desc="Transcribing Audio"
|
120 |
):
|
121 |
index, result = await f
|
122 |
# TODO: handle error response
|
123 |
-
khaya_output = result.
|
124 |
|
125 |
# preprocess the output before machine translation
|
126 |
-
khaya_output = khaya_output.strip("\"")
|
127 |
paragraph = eval(khaya_output)['text']
|
128 |
paragraph = paragraph.strip()
|
129 |
sentences = re.split('((?<=[.?!]")|((?<=[.?!])(?!")))\s*', paragraph)
|
@@ -132,8 +146,8 @@ async def transcription_main(output_audio_path, url, headers, lang):
|
|
132 |
# replace numbers with words
|
133 |
# sentences = [replace_numbers_with_words(sent) for sent in sentences]
|
134 |
|
135 |
-
|
136 |
-
return khaya_output
|
137 |
|
138 |
|
139 |
async def convert_text_to_speech(
|
|
|
30 |
# Request headers
|
31 |
"Content-Type": "application/json",
|
32 |
"Cache-Control": "no-cache",
|
33 |
+
"Ocp-Apim-Subscription-Key": f"{KHAYA_TOKEN}",
|
34 |
}
|
35 |
|
36 |
transcription_hdr = {
|
37 |
# Request headers
|
38 |
'Content-Type': 'audio/mpeg',
|
39 |
"Cache-Control": "no-cache",
|
40 |
+
"Ocp-Apim-Subscription-Key": f"{KHAYA_TOKEN}",
|
41 |
}
|
42 |
|
43 |
tts_header = {
|
|
|
75 |
print(f"Unexpected error: {e}")
|
76 |
return index, str(e)
|
77 |
|
78 |
+
async def fetch_data(session, url, headers, data, semaphore, index):
|
79 |
+
async with semaphore:
|
80 |
+
try:
|
81 |
+
async with session.post(
|
82 |
+
url, headers=headers, data=data, timeout=10
|
83 |
+
) as response:
|
84 |
+
response.raise_for_status()
|
85 |
+
return index, await response.content.decode("utf-8")
|
86 |
+
except aiohttp.ClientError as e:
|
87 |
+
print(f"Request error: {e}")
|
88 |
+
return index, str(e)
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Unexpected error: {e}")
|
91 |
+
return index, str(e)
|
92 |
+
|
93 |
|
94 |
async def translation_main(sentences, url, headers, lang, out_lang):
|
95 |
khaya_translations = [None] * len(sentences)
|
|
|
128 |
|
129 |
url_with_lang = url+"?language="+lang
|
130 |
|
131 |
+
tasks.append(fetch_data(session, url_with_lang, headers, data, semaphore, 0))
|
132 |
|
133 |
for f in tqdm(
|
134 |
asyncio.as_completed(tasks), total=len(tasks), desc="Transcribing Audio"
|
135 |
):
|
136 |
index, result = await f
|
137 |
# TODO: handle error response
|
138 |
+
khaya_output = result.strip("\"")
|
139 |
|
140 |
# preprocess the output before machine translation
|
|
|
141 |
paragraph = eval(khaya_output)['text']
|
142 |
paragraph = paragraph.strip()
|
143 |
sentences = re.split('((?<=[.?!]")|((?<=[.?!])(?!")))\s*', paragraph)
|
|
|
146 |
# replace numbers with words
|
147 |
# sentences = [replace_numbers_with_words(sent) for sent in sentences]
|
148 |
|
149 |
+
return sentences
|
150 |
+
#return khaya_output
|
151 |
|
152 |
|
153 |
async def convert_text_to_speech(
|