chenoi commited on
Commit
7ca00b5
·
1 Parent(s): cd1cab2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -168
app.py CHANGED
@@ -1,21 +1,4 @@
1
  #!/usr/bin/env python3
2
- #
3
- # Copyright 2022 Xiaomi Corp. (authors: Fangjun Kuang)
4
- #
5
- # See LICENSE for clarification regarding multiple authors
6
- #
7
- # Licensed under the Apache License, Version 2.0 (the "License");
8
- # you may not use this file except in compliance with the License.
9
- # You may obtain a copy of the License at
10
- #
11
- # http://www.apache.org/licenses/LICENSE-2.0
12
- #
13
- # Unless required by applicable law or agreed to in writing, software
14
- # distributed under the License is distributed on an "AS IS" BASIS,
15
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
- # See the License for the specific language governing permissions and
17
- # limitations under the License.
18
-
19
  # References:
20
  # https://gradio.app/docs/#dropdown
21
 
@@ -37,7 +20,7 @@ from model import decode, get_pretrained_model, language_to_models, sample_rate
37
 
38
  languages = list(language_to_models.keys())
39
 
40
-
41
  def convert_to_wav(in_filename: str) -> str:
42
  """Convert the input audio file to a wave file"""
43
  out_filename = in_filename + ".wav"
@@ -54,7 +37,7 @@ def convert_to_wav(in_filename: str) -> str:
54
 
55
  return out_filename
56
 
57
-
58
  def build_html_output(s: str, style: str = "result_item_success"):
59
  return f"""
60
  <div class='result'>
@@ -64,57 +47,7 @@ def build_html_output(s: str, style: str = "result_item_success"):
64
  </div>
65
  """
66
 
67
- def process_url(
68
- language: str,
69
- repo_id: str,
70
- decoding_method: str,
71
- num_active_paths: int,
72
- url: str,
73
- ):
74
- logging.info(f"Processing URL: {url}")
75
- with tempfile.NamedTemporaryFile() as f:
76
- try:
77
- urllib.request.urlretrieve(url, f.name)
78
-
79
- return process(
80
- in_filename=f.name,
81
- language=language,
82
- repo_id=repo_id,
83
- decoding_method=decoding_method,
84
- num_active_paths=num_active_paths,
85
- )
86
- except Exception as e:
87
- logging.info(str(e))
88
- return "", build_html_output(str(e), "result_item_error")
89
-
90
- def process_uploaded_file(
91
- language: str,
92
- repo_id: str,
93
- decoding_method: str,
94
- num_active_paths: int,
95
- in_filename: str,
96
- ):
97
- if in_filename is None or in_filename == "":
98
- return "", build_html_output(
99
- "Please first upload a file and then click "
100
- 'the button "submit for recognition"',
101
- "result_item_error",
102
- )
103
-
104
- logging.info(f"Processing uploaded file: {in_filename}")
105
- try:
106
- return process(
107
- in_filename=in_filename,
108
- language=language,
109
- repo_id=repo_id,
110
- decoding_method=decoding_method,
111
- num_active_paths=num_active_paths,
112
- )
113
- except Exception as e:
114
- logging.info(str(e))
115
- return "", build_html_output(str(e), "result_item_error")
116
-
117
-
118
  def process_microphone(
119
  language: str,
120
  repo_id: str,
@@ -143,7 +76,7 @@ def process_microphone(
143
  logging.info(str(e))
144
  return "", build_html_output(str(e), "result_item_error")
145
 
146
-
147
  @torch.no_grad()
148
  def process(
149
  language: str,
@@ -200,26 +133,8 @@ def process(
200
  return text, build_html_output(info)
201
 
202
 
203
- title = "# Automatic Speech Recognition with Next-gen Kaldi"
204
- description = """
205
- This space shows how to do automatic speech recognition with Next-gen Kaldi.
206
-
207
- Please visit
208
- <https://huggingface.co/spaces/k2-fsa/streaming-automatic-speech-recognition>
209
- for streaming speech recognition with **Next-gen Kaldi**.
210
 
211
- It is running on CPU within a docker container provided by Hugging Face.
212
-
213
- See more information by visiting the following links:
214
-
215
- - <https://github.com/k2-fsa/icefall>
216
- - <https://github.com/k2-fsa/sherpa>
217
- - <https://github.com/k2-fsa/k2>
218
- - <https://github.com/lhotse-speech/lhotse>
219
-
220
- If you want to deploy it locally, please see
221
- <https://k2-fsa.github.io/sherpa/>
222
- """
223
 
224
  # css style is copied from
225
  # https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
@@ -239,35 +154,135 @@ def update_model_dropdown(language: str):
239
  raise ValueError(f"Unsupported language: {language}")
240
 
241
 
242
- demo = gr.Blocks(css=css)
243
-
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  with demo:
 
246
  gr.Markdown(title)
247
- language_choices = list(language_to_models.keys())
 
 
 
248
 
 
249
  language_radio = gr.Radio(
250
  label="Language",
251
  choices=language_choices,
252
  value=language_choices[0],
253
  )
 
 
254
  model_dropdown = gr.Dropdown(
255
  choices=language_to_models[language_choices[0]],
256
  label="Select a model",
257
  value=language_to_models[language_choices[0]][0],
258
  )
 
259
 
 
260
  language_radio.change(
261
  update_model_dropdown,
262
  inputs=language_radio,
263
  outputs=model_dropdown,
264
  )
265
 
 
266
  decoding_method_radio = gr.Radio(
267
  label="Decoding method",
268
  choices=["greedy_search", "modified_beam_search"],
269
  value="greedy_search",
270
  )
 
271
 
272
  num_active_paths_slider = gr.Slider(
273
  minimum=1,
@@ -275,33 +290,33 @@ with demo:
275
  step=1,
276
  label="Number of active paths for modified_beam_search",
277
  )
278
-
279
- with gr.Tabs():
280
- with gr.TabItem("Upload from disk"):
281
- uploaded_file = gr.Audio(
282
- source="upload", # Choose between "microphone", "upload"
283
- type="filepath",
284
- optional=False,
285
- label="Upload from disk",
286
- )
287
- upload_button = gr.Button("Submit for recognition")
288
- uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
289
- uploaded_html_info = gr.HTML(label="Info")
290
-
291
- gr.Examples(
292
- examples=examples,
293
- inputs=[
294
- language_radio,
295
- model_dropdown,
296
- decoding_method_radio,
297
- num_active_paths_slider,
298
- uploaded_file,
299
- ],
300
- outputs=[uploaded_output, uploaded_html_info],
301
- fn=process_uploaded_file,
302
  )
303
-
 
304
  with gr.TabItem("Record from microphone"):
 
305
  microphone = gr.Audio(
306
  source="microphone", # Choose between "microphone", "upload"
307
  type="filepath",
@@ -312,44 +327,8 @@ with demo:
312
  record_button = gr.Button("Submit for recognition")
313
  recorded_output = gr.Textbox(label="Recognized speech from recordings")
314
  recorded_html_info = gr.HTML(label="Info")
315
-
316
- gr.Examples(
317
- examples=examples,
318
- inputs=[
319
- language_radio,
320
- model_dropdown,
321
- decoding_method_radio,
322
- num_active_paths_slider,
323
- microphone,
324
- ],
325
- outputs=[recorded_output, recorded_html_info],
326
- fn=process_microphone,
327
- )
328
-
329
- with gr.TabItem("From URL"):
330
- url_textbox = gr.Textbox(
331
- max_lines=1,
332
- placeholder="URL to an audio file",
333
- label="URL",
334
- interactive=True,
335
- )
336
-
337
- url_button = gr.Button("Submit for recognition")
338
- url_output = gr.Textbox(label="Recognized speech from URL")
339
- url_html_info = gr.HTML(label="Info")
340
-
341
- upload_button.click(
342
- process_uploaded_file,
343
- inputs=[
344
- language_radio,
345
- model_dropdown,
346
- decoding_method_radio,
347
- num_active_paths_slider,
348
- uploaded_file,
349
- ],
350
- outputs=[uploaded_output, uploaded_html_info],
351
- )
352
-
353
  record_button.click(
354
  process_microphone,
355
  inputs=[
@@ -362,19 +341,20 @@ with demo:
362
  outputs=[recorded_output, recorded_html_info],
363
  )
364
 
365
- url_button.click(
366
- process_url,
367
- inputs=[
368
- language_radio,
369
- model_dropdown,
370
- decoding_method_radio,
371
- num_active_paths_slider,
372
- url_textbox,
373
- ],
374
- outputs=[url_output, url_html_info],
375
- )
376
 
377
- gr.Markdown(description)
 
378
 
379
  torch.set_num_threads(1)
380
  torch.set_num_interop_threads(1)
@@ -386,6 +366,8 @@ torch._C._set_graph_executor_optimize(False)
386
  if __name__ == "__main__":
387
  formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
388
 
 
389
  logging.basicConfig(format=formatter, level=logging.INFO)
390
 
 
391
  demo.launch()
 
1
  #!/usr/bin/env python3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # References:
3
  # https://gradio.app/docs/#dropdown
4
 
 
20
 
21
  languages = list(language_to_models.keys())
22
 
23
+ # 将输入的音频文件转换为.wav格式。它使用ffmpeg工具将输入文件转换为16kHz的.wav文件,然后对转换后的文件进行base64编码。
24
  def convert_to_wav(in_filename: str) -> str:
25
  """Convert the input audio file to a wave file"""
26
  out_filename = in_filename + ".wav"
 
37
 
38
  return out_filename
39
 
40
+ # 函数build_html_output用于构建HTML格式的输出结果。
41
  def build_html_output(s: str, style: str = "result_item_success"):
42
  return f"""
43
  <div class='result'>
 
47
  </div>
48
  """
49
 
50
+ # 负责处理用户通过麦克风录制的音频文件。它会检查录音是否存在,然后调用process函数进行处理。如果处理过程中出现错误,它会捕获异常并返回错误信息。
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def process_microphone(
52
  language: str,
53
  repo_id: str,
 
76
  logging.info(str(e))
77
  return "", build_html_output(str(e), "result_item_error")
78
 
79
+ # 它处理音频输入,执行语音识别,并返回识别的文本和一些元数据信息。它首先调用 convert_to_wav 将音频转换为.wav格式,然后获取当前时间并开始计时。之后,它调用 get_pretrained_model 来获取预训练模型,并使用 decode 函数对音频进行识别。最后,它计算音频的总时长,处理时间以及实时因子(RTF,即处理时间除以音频时长)。
80
  @torch.no_grad()
81
  def process(
82
  language: str,
 
133
  return text, build_html_output(info)
134
 
135
 
136
+ title = "# For Interview!!! Fight!"
 
 
 
 
 
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  # css style is copied from
140
  # https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
 
154
  raise ValueError(f"Unsupported language: {language}")
155
 
156
 
157
+ import openai
158
+ import gradio as gr
159
 
160
+ # Load your API key from an environment variable or secret management service
161
+ openai.api_key = "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"
162
+ # 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
163
+
164
+
165
+ ####################################################################################################################
166
+ ################################## Prompt1 - ask questions ###############################################
167
+ ####################################################################################################################
168
+ messages = [
169
+ {
170
+ "role": "system",
171
+ "content": "You are now serving as an HR for a technology company and would like to ask me some interview questions. If you find my answers difficult to understand or unclear, please feel free to ask me to clarify the unclear parts. Alternatively, if you are interested, you can also ask me some follow-up questions. Please do not attempt to correct my answers or provide suggestions. The conversation should simulate the real interview."
172
+ },
173
+ ]
174
+
175
+ def process_input(user_message):
176
+ global messages
177
+
178
+ # Append user message to conversation
179
+ messages.append({"role": "user", "content": user_message})
180
+
181
+ # Call OpenAI API
182
+ response = openai.ChatCompletion.create(model="gpt-3.5-turbo-16k", messages=messages)
183
+
184
+ # Get the assistant's message from the response
185
+ assistant_message = response['choices'][0]['message']['content']
186
+
187
+ # Append assistant message to conversation
188
+ messages.append({"role": "assistant", "content": assistant_message})
189
+
190
+ # Create conversation history
191
+ conversation_history = ""
192
+ for message in messages:
193
+ role = message["role"]
194
+ content = message["content"]
195
+ conversation_history += f"{role.title()}: {content}\n"
196
+
197
+ return assistant_message, conversation_history
198
+
199
+ def generate_download_content():
200
+ global messages
201
+
202
+ # Create conversation history
203
+ conversation_history = ""
204
+ for message in messages:
205
+ role = message["role"]
206
+ content = message["content"]
207
+ conversation_history += f"{role.title()}: {content}\n"
208
+
209
+ return conversation_history
210
+
211
+ apple = "The quick brown fox jumps over the lazy cat."
212
+ textbox_input = gr.inputs.Textbox(default=apple)
213
+ tts_interface = gr.load("huggingface/facebook/fastspeech2-en-ljspeech",
214
+ inputs=textbox_input,
215
+ description="TTS using FastSpeech2",
216
+ title="Text to Speech (TTS)",
217
+ examples=[["The quick brown fox jumps over the lazy dog."]])
218
+
219
+
220
+ ####################################################################################################################
221
+ ################################## Prompt2 - return result ###############################################
222
+ ####################################################################################################################
223
+ # results
224
+ def langchain_query(txt_file, user_message):
225
+ # Open the file
226
+ if txt_file is None:
227
+ preloaded_text = "what do you think of the conversations below?..."
228
+ else:
229
+ preloaded_text = txt_file.read().decode('utf-8')
230
+
231
+ results = [ {
232
+ "role": "system",
233
+ "content": "You are now a professional job analysis analyst. Below, I need you to help me provide advice to the job seeker in the following conversation."
234
+ }, {"role": "user", "content": preloaded_text + "below is the conversation between me and HR, How do you think I performed? Based on our previous conversation, please correct my response to make it more logical, structured, professional and colloquial. My reply may contain some filler words and verbal tics(catchphrase); please provide suggestions for improvement in this area as well, with the aim of meeting Australian workplace standards. If my answer is too short(less than 2 minutes), you should give advices on how to expand my answer." + user_message}]
235
+
236
+ # Call OpenAI API
237
+ response = openai.ChatCompletion.create(model="gpt-3.5-turbo-16k", messages=results)
238
+
239
+ # Get the assistant's message from the response
240
+ assistant_message = response['choices'][0]['message']['content']
241
+
242
+ return assistant_message
243
+
244
+ ###############################################################################
245
+ ######################### 页面 ###################################
246
+ ################################################################
247
+ # 创建了一个 gr.Blocks 对象,然后在其中添加了一些组件,如 gr.Markdown,gr.Radio,gr.Dropdown,gr.Slider,gr.Tabs,gr.TabItem,gr.Audio,gr.Button,gr.Textbox,gr.HTML 和 gr.Examples。所有这些组件都被配置为以特定方式响应用户输入。
248
+ demo = gr.Blocks(css=css)
249
  with demo:
250
+ # 创建一个Markdown文本区域,用于显示标题。
251
  gr.Markdown(title)
252
+ # 获取所有可用的语言选项。
253
+ # language_choices = list(language_to_models.keys())
254
+ language_choices = ['English']
255
+ print(language_choices)
256
 
257
+ # 接下来的一段代码创建了一个Radio按钮组和一个下拉菜单,用于选择语言和相应的模型。
258
  language_radio = gr.Radio(
259
  label="Language",
260
  choices=language_choices,
261
  value=language_choices[0],
262
  )
263
+ # print(f"radio: {language_radio}")
264
+
265
  model_dropdown = gr.Dropdown(
266
  choices=language_to_models[language_choices[0]],
267
  label="Select a model",
268
  value=language_to_models[language_choices[0]][0],
269
  )
270
+ print(f"dropdown: {model_dropdown}")
271
 
272
+ # 当用户在Radio按钮组中更改选项时,会更新下拉菜单的内容。
273
  language_radio.change(
274
  update_model_dropdown,
275
  inputs=language_radio,
276
  outputs=model_dropdown,
277
  )
278
 
279
+ # 创建了另一个Radio按钮组和一个滑块,用户可以选择解码方法,并设置活动路径的数量。
280
  decoding_method_radio = gr.Radio(
281
  label="Decoding method",
282
  choices=["greedy_search", "modified_beam_search"],
283
  value="greedy_search",
284
  )
285
+ print(f"decoding_method_radio: {decoding_method_radio}")
286
 
287
  num_active_paths_slider = gr.Slider(
288
  minimum=1,
 
290
  step=1,
291
  label="Number of active paths for modified_beam_search",
292
  )
293
+ print(f"num_active_paths_slider: {num_active_paths_slider}")
294
+
295
+
296
+
297
+ ##############################################################################################
298
+ ######################### 主体部分 ##################################
299
+ ########################################################################
300
+ # 创建一个标签页容器。
301
+
302
+ with gr.Tabs() as tabs:
303
+
304
+ with gr.TabItem("Chat with AI"):
305
+ textbox_input = gr.inputs.Textbox(lines=5, placeholder="Type your message here...")
306
+ textbox_output = gr.outputs.Textbox(label="Assistant's response")
307
+ conversation_output = gr.outputs.Textbox(label="Conversation history")
308
+ submit_button = gr.Button("Submit")
309
+ # download_link = gr.outputs.Download(fn=generate_download_content, label="Download conversation")
310
+
311
+ submit_button.click(
312
+ process_input,
313
+ inputs=[textbox_input],
314
+ outputs=[textbox_output, conversation_output],
 
 
315
  )
316
+
317
+ # 用户可以通过麦克风录制音频进行语音识别。
318
  with gr.TabItem("Record from microphone"):
319
+ gr.TabbedInterface([tts_interface], ["FastSpeech2"])
320
  microphone = gr.Audio(
321
  source="microphone", # Choose between "microphone", "upload"
322
  type="filepath",
 
327
  record_button = gr.Button("Submit for recognition")
328
  recorded_output = gr.Textbox(label="Recognized speech from recordings")
329
  recorded_html_info = gr.HTML(label="Info")
330
+
331
+ # 当用户点击录音按钮或URL按钮时,也会调用相应的处理函数。
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  record_button.click(
333
  process_microphone,
334
  inputs=[
 
341
  outputs=[recorded_output, recorded_html_info],
342
  )
343
 
344
+ with gr.TabItem("Results"):
345
+ filename_input = gr.inputs.File(label="Upload .txt file", optional=True)
346
+ textbox_input = gr.inputs.Textbox(lines=5, placeholder="put all history here")
347
+ textbox_output = gr.outputs.Textbox(label="Assistant's response")
348
+ submit_button = gr.Button("Submit")
349
+
350
+ submit_button.click(
351
+ langchain_query,
352
+ inputs=[filename_input, textbox_input],
353
+ outputs=[textbox_output],
354
+ )
355
 
356
+
357
+
358
 
359
  torch.set_num_threads(1)
360
  torch.set_num_interop_threads(1)
 
366
  if __name__ == "__main__":
367
  formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
368
 
369
+ # 设置日志的格式和级别。
370
  logging.basicConfig(format=formatter, level=logging.INFO)
371
 
372
+ # 启动Gradio界面。
373
  demo.launch()