import streamlit as st import speech_recognition as sr from deep_translator import GoogleTranslator from gtts import gTTS from playsound import playsound from PIL import Image from streamlit_mic_recorder import mic_recorder,speech_to_text from transformers import AutoTokenizer, M2M100ForConditionalGeneration from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer from sentence_transformers import SentenceTransformer, util import torch from pygame import mixer from io import BytesIO import httpcore setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy') # pygame.init() # import logging # import logging.handlers # import queue # import threading # import time # import urllib.request # import os # from collections import deque # from pathlib import Path # from typing import List # import av # import numpy as np # import pydub # import streamlit as st # from twilio.rest import Client # from streamlit_webrtc import WebRtcMode, webrtc_streamer #from streamlit_mic_recorder import mic_recorder,speech_to_text # Initialize the recognizer recognizer = sr.Recognizer() # Initialize the translator # Language dictionary language_dict = { 'bengali': 'bn', 'english': 'en', 'gujarati': 'gu', 'hindi': 'hi', 'malayalam': 'ml', 'marathi': 'mr', 'nepali': 'ne', 'odia': 'or', 'punjabi': 'pa', 'tamil': 'ta', 'telugu': 'te', } # Function to translate speech def translate_speech(): #source_language_name = recognize_speech("Please speak the source language name (e.g., 'English'): ") st.title("BITranSlate") # st.write("Record your voice, and play the recorded audio:") # audio=mic_recorder(start_prompt="⏺️",stop_prompt="⏹️",key='recorder') custom_theme = { "theme": { "primaryColor": "#000000", "backgroundColor": "#89939E", "secondaryBackgroundColor": "#262730", "textColor": "#FFFFFF", "font": "Serif" } } st.markdown( f""" """, unsafe_allow_html=True ) source_language_name = st.selectbox('Please input the source language',language_dict) source_language = language_dict[source_language_name] target_language_name = st.selectbox('Please input the target language',language_dict) target_language = language_dict[target_language_name] c1,c2=st.columns(2) with c1: st.write("Convert speech to text:") with c2: text=speech_to_text(language=source_language,use_container_width=True,just_once=True,key='STT') sentence = text nllb_langs = {'hindi':'hin_Deva', 'english':'eng_Latn', 'punjabi':'pan_Guru', 'odia':'ory_Orya', 'bengali':'ben_Beng', 'telugu':'tel_Tulu', 'tamil':'tam_Taml', 'nepali':'npi_Deva', 'marathi':'mar_Deva', 'malayalam':'mal_Mlym', 'kannada':'kan_Knda', 'gujarati':'guj_Gujr', } # translator_google = Translator(service_urls=[ # 'translate.googleapis.com' # ]) #translator_google = google_translator() translator = pipeline('translation', model=AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M"), tokenizer=AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M"), src_lang=nllb_langs[source_language_name], tgt_lang=nllb_langs[target_language_name], max_length = 4000) text_to_translate = text translated_text = translator(text_to_translate)[0]['translation_text'] translated_text_google = GoogleTranslator(source='auto', target=target_language).translate(text_to_translate) #translated_text_google = translator_google.translate(text_to_translate, lang_tgt=target_language) # translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language) #translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language) model2 = SentenceTransformer("google/muril-base-cased") # Compute embeddings for the sentences embedding = model2.encode(text_to_translate, convert_to_tensor=True) embeddings_nllb = model2.encode(translated_text, convert_to_tensor=True) embeddings_google = model2.encode(translated_text_google, convert_to_tensor=True) # Calculate cosine similarities cosine_score_nllb = util.cos_sim(embedding, embeddings_nllb).item() cosine_score_google = util.cos_sim(embedding, embeddings_google).item() # Select the translation with the higher cosine similarity score selected_translation = translated_text if cosine_score_nllb > cosine_score_google else translated_text_google st.write(f"Source Language: {source_language_name}") st.write(f"Sentence: {sentence}") st.write(f"Destination Language: {target_language_name}") st.write(f"Translated Text from NLLB: {translated_text}") st.write(f"Translated Text from Google Translate: {translated_text_google}") st.write(f"More accurate translation: {selected_translation}") # Using Google-Text-to-Speech to speak the translated text # speak = gTTS(text=translated_text, lang=target_language, slow=False) # speak.save("translated_voice.mp3") mp3_fp = BytesIO() speak = gTTS(text=translated_text, lang=target_language, slow=False) speak.write_to_fp(mp3_fp) # Play the translated voice mixer.init() mp3_fp.seek(0) mixer.music.load(mp3_fp, "mp3") mixer.music.play() # Play the translated voice # mixer.init() # mixer.music.load('translated_voice.mp3') # mixer.music.play() #playsound('translated_voice.mp3') #if st.button(" CLICK HERE TO TRANSLATE "): translate_speech()