Spaces:
Running
Running
import streamlit as st | |
import speech_recognition as sr | |
from deep_translator import GoogleTranslator | |
from gtts import gTTS | |
from playsound import playsound | |
from PIL import Image | |
from streamlit_mic_recorder import mic_recorder,speech_to_text | |
from transformers import AutoTokenizer, M2M100ForConditionalGeneration | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer | |
from sentence_transformers import SentenceTransformer, util | |
import torch | |
from pygame import mixer | |
from io import BytesIO | |
import httpcore | |
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy') | |
# pygame.init() | |
# import logging | |
# import logging.handlers | |
# import queue | |
# import threading | |
# import time | |
# import urllib.request | |
# import os | |
# from collections import deque | |
# from pathlib import Path | |
# from typing import List | |
# import av | |
# import numpy as np | |
# import pydub | |
# import streamlit as st | |
# from twilio.rest import Client | |
# from streamlit_webrtc import WebRtcMode, webrtc_streamer | |
#from streamlit_mic_recorder import mic_recorder,speech_to_text | |
# Initialize the recognizer | |
recognizer = sr.Recognizer() | |
# Initialize the translator | |
# Language dictionary | |
language_dict = { | |
'bengali': 'bn', | |
'english': 'en', | |
'gujarati': 'gu', | |
'hindi': 'hi', | |
'malayalam': 'ml', | |
'marathi': 'mr', | |
'nepali': 'ne', | |
'odia': 'or', | |
'punjabi': 'pa', | |
'tamil': 'ta', | |
'telugu': 'te', | |
} | |
# Function to translate speech | |
def translate_speech(): | |
#source_language_name = recognize_speech("Please speak the source language name (e.g., 'English'): ") | |
st.title("BITranSlate") | |
# st.write("Record your voice, and play the recorded audio:") | |
# audio=mic_recorder(start_prompt="⏺️",stop_prompt="⏹️",key='recorder') | |
custom_theme = { | |
"theme": { | |
"primaryColor": "#000000", | |
"backgroundColor": "#89939E", | |
"secondaryBackgroundColor": "#262730", | |
"textColor": "#FFFFFF", | |
"font": "Serif" | |
} | |
} | |
st.markdown( | |
f""" | |
<style> | |
:root {{ | |
--primary-color: {custom_theme["theme"]["primaryColor"]}; | |
--background-color: {custom_theme["theme"]["backgroundColor"]}; | |
--secondary-background-color: {custom_theme["theme"]["secondaryBackgroundColor"]}; | |
--text-color: {custom_theme["theme"]["textColor"]}; | |
--font: {custom_theme["theme"]["font"]}; | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
source_language_name = st.selectbox('Please input the source language',language_dict) | |
source_language = language_dict[source_language_name] | |
target_language_name = st.selectbox('Please input the target language',language_dict) | |
target_language = language_dict[target_language_name] | |
c1,c2=st.columns(2) | |
with c1: | |
st.write("Convert speech to text:") | |
with c2: | |
text=speech_to_text(language=source_language,use_container_width=True,just_once=True,key='STT') | |
sentence = text | |
nllb_langs = {'hindi':'hin_Deva', | |
'english':'eng_Latn', | |
'punjabi':'pan_Guru', | |
'odia':'ory_Orya', | |
'bengali':'ben_Beng', | |
'telugu':'tel_Tulu', | |
'tamil':'tam_Taml', | |
'nepali':'npi_Deva', | |
'marathi':'mar_Deva', | |
'malayalam':'mal_Mlym', | |
'kannada':'kan_Knda', | |
'gujarati':'guj_Gujr', | |
} | |
# translator_google = Translator(service_urls=[ | |
# 'translate.googleapis.com' | |
# ]) | |
#translator_google = google_translator() | |
translator = pipeline('translation', model=AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M"), tokenizer=AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M"), src_lang=nllb_langs[source_language_name], tgt_lang=nllb_langs[target_language_name], max_length = 4000) | |
text_to_translate = text | |
translated_text = translator(text_to_translate)[0]['translation_text'] | |
translated_text_google = GoogleTranslator(source='auto', target=target_language).translate(text_to_translate) | |
#translated_text_google = translator_google.translate(text_to_translate, lang_tgt=target_language) | |
# translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language) | |
#translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language) | |
model2 = SentenceTransformer("google/muril-base-cased") | |
# Compute embeddings for the sentences | |
embedding = model2.encode(text_to_translate, convert_to_tensor=True) | |
embeddings_nllb = model2.encode(translated_text, convert_to_tensor=True) | |
embeddings_google = model2.encode(translated_text_google, convert_to_tensor=True) | |
# Calculate cosine similarities | |
cosine_score_nllb = util.cos_sim(embedding, embeddings_nllb).item() | |
cosine_score_google = util.cos_sim(embedding, embeddings_google).item() | |
# Select the translation with the higher cosine similarity score | |
selected_translation = translated_text if cosine_score_nllb > cosine_score_google else translated_text_google | |
st.write(f"Source Language: {source_language_name}") | |
st.write(f"Sentence: {sentence}") | |
st.write(f"Destination Language: {target_language_name}") | |
st.write(f"Translated Text from NLLB: {translated_text}") | |
st.write(f"Translated Text from Google Translate: {translated_text_google}") | |
st.write(f"More accurate translation: {selected_translation}") | |
# Using Google-Text-to-Speech to speak the translated text | |
# speak = gTTS(text=translated_text, lang=target_language, slow=False) | |
# speak.save("translated_voice.mp3") | |
mp3_fp = BytesIO() | |
speak = gTTS(text=translated_text, lang=target_language, slow=False) | |
speak.write_to_fp(mp3_fp) | |
# Play the translated voice | |
mixer.init() | |
mp3_fp.seek(0) | |
mixer.music.load(mp3_fp, "mp3") | |
mixer.music.play() | |
# Play the translated voice | |
# mixer.init() | |
# mixer.music.load('translated_voice.mp3') | |
# mixer.music.play() | |
#playsound('translated_voice.mp3') | |
#if st.button(" CLICK HERE TO TRANSLATE "): | |
translate_speech() |