Spaces:
Sleeping
Sleeping
File size: 6,409 Bytes
86b0bb9 79c05b3 0599c1f f84787d cb631af 86b0bb9 51d2632 86b0bb9 79c05b3 86b0bb9 79c05b3 86b0bb9 79c05b3 86b0bb9 79c05b3 86b0bb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import streamlit as st
import speech_recognition as sr
from deep_translator import GoogleTranslator
from gtts import gTTS
from playsound import playsound
from PIL import Image
from streamlit_mic_recorder import mic_recorder,speech_to_text
from transformers import AutoTokenizer, M2M100ForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
from sentence_transformers import SentenceTransformer, util
import torch
from pygame import mixer
from io import BytesIO
import httpcore
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
# pygame.init()
# import logging
# import logging.handlers
# import queue
# import threading
# import time
# import urllib.request
# import os
# from collections import deque
# from pathlib import Path
# from typing import List
# import av
# import numpy as np
# import pydub
# import streamlit as st
# from twilio.rest import Client
# from streamlit_webrtc import WebRtcMode, webrtc_streamer
#from streamlit_mic_recorder import mic_recorder,speech_to_text
# Initialize the recognizer
recognizer = sr.Recognizer()
# Initialize the translator
# Language dictionary
language_dict = {
'bengali': 'bn',
'english': 'en',
'gujarati': 'gu',
'hindi': 'hi',
'malayalam': 'ml',
'marathi': 'mr',
'nepali': 'ne',
'odia': 'or',
'punjabi': 'pa',
'tamil': 'ta',
'telugu': 'te',
}
# Function to translate speech
def translate_speech():
#source_language_name = recognize_speech("Please speak the source language name (e.g., 'English'): ")
st.title("BITranSlate")
# st.write("Record your voice, and play the recorded audio:")
# audio=mic_recorder(start_prompt="⏺️",stop_prompt="⏹️",key='recorder')
custom_theme = {
"theme": {
"primaryColor": "#000000",
"backgroundColor": "#89939E",
"secondaryBackgroundColor": "#262730",
"textColor": "#FFFFFF",
"font": "Serif"
}
}
st.markdown(
f"""
<style>
:root {{
--primary-color: {custom_theme["theme"]["primaryColor"]};
--background-color: {custom_theme["theme"]["backgroundColor"]};
--secondary-background-color: {custom_theme["theme"]["secondaryBackgroundColor"]};
--text-color: {custom_theme["theme"]["textColor"]};
--font: {custom_theme["theme"]["font"]};
}}
</style>
""",
unsafe_allow_html=True
)
source_language_name = st.selectbox('Please input the source language',language_dict)
source_language = language_dict[source_language_name]
target_language_name = st.selectbox('Please input the target language',language_dict)
target_language = language_dict[target_language_name]
c1,c2=st.columns(2)
with c1:
st.write("Convert speech to text:")
with c2:
text=speech_to_text(language=source_language,use_container_width=True,just_once=True,key='STT')
sentence = text
nllb_langs = {'hindi':'hin_Deva',
'english':'eng_Latn',
'punjabi':'pan_Guru',
'odia':'ory_Orya',
'bengali':'ben_Beng',
'telugu':'tel_Tulu',
'tamil':'tam_Taml',
'nepali':'npi_Deva',
'marathi':'mar_Deva',
'malayalam':'mal_Mlym',
'kannada':'kan_Knda',
'gujarati':'guj_Gujr',
}
# translator_google = Translator(service_urls=[
# 'translate.googleapis.com'
# ])
#translator_google = google_translator()
translator = pipeline('translation', model=AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M"), tokenizer=AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M"), src_lang=nllb_langs[source_language_name], tgt_lang=nllb_langs[target_language_name], max_length = 4000)
text_to_translate = text
translated_text = translator(text_to_translate)[0]['translation_text']
translated_text_google = GoogleTranslator(source='auto', target=target_language).translate(text_to_translate)
#translated_text_google = translator_google.translate(text_to_translate, lang_tgt=target_language)
# translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language)
#translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language)
model2 = SentenceTransformer("google/muril-base-cased")
# Compute embeddings for the sentences
embedding = model2.encode(text_to_translate, convert_to_tensor=True)
embeddings_nllb = model2.encode(translated_text, convert_to_tensor=True)
embeddings_google = model2.encode(translated_text_google, convert_to_tensor=True)
# Calculate cosine similarities
cosine_score_nllb = util.cos_sim(embedding, embeddings_nllb).item()
cosine_score_google = util.cos_sim(embedding, embeddings_google).item()
# Select the translation with the higher cosine similarity score
selected_translation = translated_text if cosine_score_nllb > cosine_score_google else translated_text_google
st.write(f"Source Language: {source_language_name}")
st.write(f"Sentence: {sentence}")
st.write(f"Destination Language: {target_language_name}")
st.write(f"Translated Text from NLLB: {translated_text}")
st.write(f"Translated Text from Google Translate: {translated_text_google}")
st.write(f"More accurate translation: {selected_translation}")
# Using Google-Text-to-Speech to speak the translated text
# speak = gTTS(text=translated_text, lang=target_language, slow=False)
# speak.save("translated_voice.mp3")
mp3_fp = BytesIO()
speak = gTTS(text=translated_text, lang=target_language, slow=False)
speak.write_to_fp(mp3_fp)
# Play the translated voice
mixer.init()
mp3_fp.seek(0)
mixer.music.load(mp3_fp, "mp3")
mixer.music.play()
# Play the translated voice
# mixer.init()
# mixer.music.load('translated_voice.mp3')
# mixer.music.play()
#playsound('translated_voice.mp3')
#if st.button(" CLICK HERE TO TRANSLATE "):
translate_speech() |