odia-hindi / app.py
harupurito's picture
Update app.py
cb631af verified
import streamlit as st
import speech_recognition as sr
from deep_translator import GoogleTranslator
from gtts import gTTS
from playsound import playsound
from PIL import Image
from streamlit_mic_recorder import mic_recorder,speech_to_text
from transformers import AutoTokenizer, M2M100ForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
from sentence_transformers import SentenceTransformer, util
import torch
from pygame import mixer
from io import BytesIO
import httpcore
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
# pygame.init()
# import logging
# import logging.handlers
# import queue
# import threading
# import time
# import urllib.request
# import os
# from collections import deque
# from pathlib import Path
# from typing import List
# import av
# import numpy as np
# import pydub
# import streamlit as st
# from twilio.rest import Client
# from streamlit_webrtc import WebRtcMode, webrtc_streamer
#from streamlit_mic_recorder import mic_recorder,speech_to_text
# Initialize the recognizer
recognizer = sr.Recognizer()
# Initialize the translator
# Language dictionary
language_dict = {
'bengali': 'bn',
'english': 'en',
'gujarati': 'gu',
'hindi': 'hi',
'malayalam': 'ml',
'marathi': 'mr',
'nepali': 'ne',
'odia': 'or',
'punjabi': 'pa',
'tamil': 'ta',
'telugu': 'te',
}
# Function to translate speech
def translate_speech():
#source_language_name = recognize_speech("Please speak the source language name (e.g., 'English'): ")
st.title("BITranSlate")
# st.write("Record your voice, and play the recorded audio:")
# audio=mic_recorder(start_prompt="⏺️",stop_prompt="⏹️",key='recorder')
custom_theme = {
"theme": {
"primaryColor": "#000000",
"backgroundColor": "#89939E",
"secondaryBackgroundColor": "#262730",
"textColor": "#FFFFFF",
"font": "Serif"
}
}
st.markdown(
f"""
<style>
:root {{
--primary-color: {custom_theme["theme"]["primaryColor"]};
--background-color: {custom_theme["theme"]["backgroundColor"]};
--secondary-background-color: {custom_theme["theme"]["secondaryBackgroundColor"]};
--text-color: {custom_theme["theme"]["textColor"]};
--font: {custom_theme["theme"]["font"]};
}}
</style>
""",
unsafe_allow_html=True
)
source_language_name = st.selectbox('Please input the source language',language_dict)
source_language = language_dict[source_language_name]
target_language_name = st.selectbox('Please input the target language',language_dict)
target_language = language_dict[target_language_name]
c1,c2=st.columns(2)
with c1:
st.write("Convert speech to text:")
with c2:
text=speech_to_text(language=source_language,use_container_width=True,just_once=True,key='STT')
sentence = text
nllb_langs = {'hindi':'hin_Deva',
'english':'eng_Latn',
'punjabi':'pan_Guru',
'odia':'ory_Orya',
'bengali':'ben_Beng',
'telugu':'tel_Tulu',
'tamil':'tam_Taml',
'nepali':'npi_Deva',
'marathi':'mar_Deva',
'malayalam':'mal_Mlym',
'kannada':'kan_Knda',
'gujarati':'guj_Gujr',
}
# translator_google = Translator(service_urls=[
# 'translate.googleapis.com'
# ])
#translator_google = google_translator()
translator = pipeline('translation', model=AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M"), tokenizer=AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M"), src_lang=nllb_langs[source_language_name], tgt_lang=nllb_langs[target_language_name], max_length = 4000)
text_to_translate = text
translated_text = translator(text_to_translate)[0]['translation_text']
translated_text_google = GoogleTranslator(source='auto', target=target_language).translate(text_to_translate)
#translated_text_google = translator_google.translate(text_to_translate, lang_tgt=target_language)
# translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language)
#translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language)
model2 = SentenceTransformer("google/muril-base-cased")
# Compute embeddings for the sentences
embedding = model2.encode(text_to_translate, convert_to_tensor=True)
embeddings_nllb = model2.encode(translated_text, convert_to_tensor=True)
embeddings_google = model2.encode(translated_text_google, convert_to_tensor=True)
# Calculate cosine similarities
cosine_score_nllb = util.cos_sim(embedding, embeddings_nllb).item()
cosine_score_google = util.cos_sim(embedding, embeddings_google).item()
# Select the translation with the higher cosine similarity score
selected_translation = translated_text if cosine_score_nllb > cosine_score_google else translated_text_google
st.write(f"Source Language: {source_language_name}")
st.write(f"Sentence: {sentence}")
st.write(f"Destination Language: {target_language_name}")
st.write(f"Translated Text from NLLB: {translated_text}")
st.write(f"Translated Text from Google Translate: {translated_text_google}")
st.write(f"More accurate translation: {selected_translation}")
# Using Google-Text-to-Speech to speak the translated text
# speak = gTTS(text=translated_text, lang=target_language, slow=False)
# speak.save("translated_voice.mp3")
mp3_fp = BytesIO()
speak = gTTS(text=translated_text, lang=target_language, slow=False)
speak.write_to_fp(mp3_fp)
# Play the translated voice
mixer.init()
mp3_fp.seek(0)
mixer.music.load(mp3_fp, "mp3")
mixer.music.play()
# Play the translated voice
# mixer.init()
# mixer.music.load('translated_voice.mp3')
# mixer.music.play()
#playsound('translated_voice.mp3')
#if st.button(" CLICK HERE TO TRANSLATE "):
translate_speech()