azunre commited on
Commit
46089f2
·
verified ·
1 Parent(s): 52370ec

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +21 -7
pipeline.py CHANGED
@@ -30,14 +30,14 @@ translation_hdr = {
30
  # Request headers
31
  "Content-Type": "application/json",
32
  "Cache-Control": "no-cache",
33
- "Ocp-Apim-Subscription-Key": KHAYA_TOKEN,
34
  }
35
 
36
  transcription_hdr = {
37
  # Request headers
38
  'Content-Type': 'audio/mpeg',
39
  "Cache-Control": "no-cache",
40
- "Ocp-Apim-Subscription-Key": KHAYA_TOKEN,
41
  }
42
 
43
  tts_header = {
@@ -75,6 +75,21 @@ async def fetch(session, url, headers, data, semaphore, index):
75
  print(f"Unexpected error: {e}")
76
  return index, str(e)
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  async def translation_main(sentences, url, headers, lang, out_lang):
80
  khaya_translations = [None] * len(sentences)
@@ -113,17 +128,16 @@ async def transcription_main(output_audio_path, url, headers, lang):
113
 
114
  url_with_lang = url+"?language="+lang
115
 
116
- tasks.append(fetch(session, url_with_lang, headers, data, semaphore, 0))
117
 
118
  for f in tqdm(
119
  asyncio.as_completed(tasks), total=len(tasks), desc="Transcribing Audio"
120
  ):
121
  index, result = await f
122
  # TODO: handle error response
123
- khaya_output = result.content.decode("utf-8")
124
 
125
  # preprocess the output before machine translation
126
- khaya_output = khaya_output.strip("\"")
127
  paragraph = eval(khaya_output)['text']
128
  paragraph = paragraph.strip()
129
  sentences = re.split('((?<=[.?!]")|((?<=[.?!])(?!")))\s*', paragraph)
@@ -132,8 +146,8 @@ async def transcription_main(output_audio_path, url, headers, lang):
132
  # replace numbers with words
133
  # sentences = [replace_numbers_with_words(sent) for sent in sentences]
134
 
135
- #return sentences
136
- return khaya_output
137
 
138
 
139
  async def convert_text_to_speech(
 
30
  # Request headers
31
  "Content-Type": "application/json",
32
  "Cache-Control": "no-cache",
33
+ "Ocp-Apim-Subscription-Key": f"{KHAYA_TOKEN}",
34
  }
35
 
36
  transcription_hdr = {
37
  # Request headers
38
  'Content-Type': 'audio/mpeg',
39
  "Cache-Control": "no-cache",
40
+ "Ocp-Apim-Subscription-Key": f"{KHAYA_TOKEN}",
41
  }
42
 
43
  tts_header = {
 
75
  print(f"Unexpected error: {e}")
76
  return index, str(e)
77
 
78
+ async def fetch_data(session, url, headers, data, semaphore, index):
79
+ async with semaphore:
80
+ try:
81
+ async with session.post(
82
+ url, headers=headers, data=data, timeout=10
83
+ ) as response:
84
+ response.raise_for_status()
85
+ return index, await response.content.decode("utf-8")
86
+ except aiohttp.ClientError as e:
87
+ print(f"Request error: {e}")
88
+ return index, str(e)
89
+ except Exception as e:
90
+ print(f"Unexpected error: {e}")
91
+ return index, str(e)
92
+
93
 
94
  async def translation_main(sentences, url, headers, lang, out_lang):
95
  khaya_translations = [None] * len(sentences)
 
128
 
129
  url_with_lang = url+"?language="+lang
130
 
131
+ tasks.append(fetch_data(session, url_with_lang, headers, data, semaphore, 0))
132
 
133
  for f in tqdm(
134
  asyncio.as_completed(tasks), total=len(tasks), desc="Transcribing Audio"
135
  ):
136
  index, result = await f
137
  # TODO: handle error response
138
+ khaya_output = result.strip("\"")
139
 
140
  # preprocess the output before machine translation
 
141
  paragraph = eval(khaya_output)['text']
142
  paragraph = paragraph.strip()
143
  sentences = re.split('((?<=[.?!]")|((?<=[.?!])(?!")))\s*', paragraph)
 
146
  # replace numbers with words
147
  # sentences = [replace_numbers_with_words(sent) for sent in sentences]
148
 
149
+ return sentences
150
+ #return khaya_output
151
 
152
 
153
  async def convert_text_to_speech(