Spaces:

geethareddy
/

boltvoice

Runtime error

geethareddy commited on Feb 1

Commit

78a5c3d

verified ·

1 Parent(s): 0076e70

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,9 +27,24 @@ prompts = {
 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
-# Clean transcribed text to allow only English letters, numbers, and basic punctuation
 def clean_transcription(text):
-    return re.sub(r"[^a-zA-Z0-9@.\s]", "", text)
 @app.route("/")
 def index():
@@ -45,7 +60,7 @@ def transcribe():
     audio_file.save(audio_path)
     try:
-        # Transcribe audio to text
         result = asr_model(audio_path, generate_kwargs={"language": "en"})
         transcribed_text = clean_transcription(result["text"])
         return jsonify({"text": transcribed_text})

 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
+# Mapping for correctly converting spoken symbols to text
+SYMBOL_MAPPING = {
+    "at the rate": "@",
+    "dot": ".",
+    "underscore": "_",
+    "hash": "#",
+    "plus": "+",
+    "dash": "-",
+    "comma": ",",
+    "space": " "
+}
+# Function to clean and process transcribed text
 def clean_transcription(text):
+    text = text.lower()
+    for word, symbol in SYMBOL_MAPPING.items():
+        text = text.replace(word, symbol)
+    return text
 @app.route("/")
 def index():
     audio_file.save(audio_path)
     try:
+        # Transcribe audio to text with Whisper
         result = asr_model(audio_path, generate_kwargs={"language": "en"})
         transcribed_text = clean_transcription(result["text"])
         return jsonify({"text": transcribed_text})