Spaces:

Ghana-NLP
/

demo-dubbing

Paused

App Files Files Community

azunre commited on 1 day ago

Commit

46089f2

verified ·

1 Parent(s): 52370ec

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +21 -7

pipeline.py CHANGED Viewed

@@ -30,14 +30,14 @@ translation_hdr = {
     # Request headers
     "Content-Type": "application/json",
     "Cache-Control": "no-cache",
-    "Ocp-Apim-Subscription-Key": KHAYA_TOKEN,
 }
 transcription_hdr = {
     # Request headers
     'Content-Type': 'audio/mpeg',
     "Cache-Control": "no-cache",
-    "Ocp-Apim-Subscription-Key": KHAYA_TOKEN,
 }
 tts_header = {
@@ -75,6 +75,21 @@ async def fetch(session, url, headers, data, semaphore, index):
             print(f"Unexpected error: {e}")
             return index, str(e)
 async def translation_main(sentences, url, headers, lang, out_lang):
     khaya_translations = [None] * len(sentences)
@@ -113,17 +128,16 @@ async def transcription_main(output_audio_path, url, headers, lang):
         url_with_lang = url+"?language="+lang
-        tasks.append(fetch(session, url_with_lang, headers, data, semaphore, 0))
         for f in tqdm(
             asyncio.as_completed(tasks), total=len(tasks), desc="Transcribing Audio"
         ):
             index, result = await f
             # TODO: handle error response
-            khaya_output = result.content.decode("utf-8")
         # preprocess the output before machine translation
-        khaya_output = khaya_output.strip("\"")
         paragraph = eval(khaya_output)['text']
         paragraph = paragraph.strip()
         sentences = re.split('((?<=[.?!]")|((?<=[.?!])(?!")))\s*', paragraph)
@@ -132,8 +146,8 @@ async def transcription_main(output_audio_path, url, headers, lang):
         # replace numbers with words
         # sentences = [replace_numbers_with_words(sent) for sent in sentences]
-    #return sentences
-    return khaya_output
 async def convert_text_to_speech(

     # Request headers
     "Content-Type": "application/json",
     "Cache-Control": "no-cache",
+    "Ocp-Apim-Subscription-Key": f"{KHAYA_TOKEN}",
 }
 transcription_hdr = {
     # Request headers
     'Content-Type': 'audio/mpeg',
     "Cache-Control": "no-cache",
+    "Ocp-Apim-Subscription-Key": f"{KHAYA_TOKEN}",
 }
 tts_header = {
             print(f"Unexpected error: {e}")
             return index, str(e)
+async def fetch_data(session, url, headers, data, semaphore, index):
+    async with semaphore:
+        try:
+            async with session.post(
+                url, headers=headers, data=data, timeout=10
+            ) as response:
+                response.raise_for_status()
+                return index, await response.content.decode("utf-8")
+        except aiohttp.ClientError as e:
+            print(f"Request error: {e}")
+            return index, str(e)
+        except Exception as e:
+            print(f"Unexpected error: {e}")
+            return index, str(e)
 async def translation_main(sentences, url, headers, lang, out_lang):
     khaya_translations = [None] * len(sentences)
         url_with_lang = url+"?language="+lang
+        tasks.append(fetch_data(session, url_with_lang, headers, data, semaphore, 0))
         for f in tqdm(
             asyncio.as_completed(tasks), total=len(tasks), desc="Transcribing Audio"
         ):
             index, result = await f
             # TODO: handle error response
+            khaya_output = result.strip("\"")
         # preprocess the output before machine translation
         paragraph = eval(khaya_output)['text']
         paragraph = paragraph.strip()
         sentences = re.split('((?<=[.?!]")|((?<=[.?!])(?!")))\s*', paragraph)
         # replace numbers with words
         # sentences = [replace_numbers_with_words(sent) for sent in sentences]
+    return sentences
+    #return khaya_output
 async def convert_text_to_speech(