geethareddy commited on
Commit
78a5c3d
·
verified ·
1 Parent(s): 0076e70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -3
app.py CHANGED
@@ -27,9 +27,24 @@ prompts = {
27
  for key, text in prompts.items():
28
  generate_audio_prompt(text, f"{key}.mp3")
29
 
30
- # Clean transcribed text to allow only English letters, numbers, and basic punctuation
 
 
 
 
 
 
 
 
 
 
 
 
31
  def clean_transcription(text):
32
- return re.sub(r"[^a-zA-Z0-9@.\s]", "", text)
 
 
 
33
 
34
  @app.route("/")
35
  def index():
@@ -45,7 +60,7 @@ def transcribe():
45
  audio_file.save(audio_path)
46
 
47
  try:
48
- # Transcribe audio to text
49
  result = asr_model(audio_path, generate_kwargs={"language": "en"})
50
  transcribed_text = clean_transcription(result["text"])
51
  return jsonify({"text": transcribed_text})
 
27
  for key, text in prompts.items():
28
  generate_audio_prompt(text, f"{key}.mp3")
29
 
30
+ # Mapping for correctly converting spoken symbols to text
31
+ SYMBOL_MAPPING = {
32
+ "at the rate": "@",
33
+ "dot": ".",
34
+ "underscore": "_",
35
+ "hash": "#",
36
+ "plus": "+",
37
+ "dash": "-",
38
+ "comma": ",",
39
+ "space": " "
40
+ }
41
+
42
+ # Function to clean and process transcribed text
43
  def clean_transcription(text):
44
+ text = text.lower()
45
+ for word, symbol in SYMBOL_MAPPING.items():
46
+ text = text.replace(word, symbol)
47
+ return text
48
 
49
  @app.route("/")
50
  def index():
 
60
  audio_file.save(audio_path)
61
 
62
  try:
63
+ # Transcribe audio to text with Whisper
64
  result = asr_model(audio_path, generate_kwargs={"language": "en"})
65
  transcribed_text = clean_transcription(result["text"])
66
  return jsonify({"text": transcribed_text})