File size: 6,409 Bytes
86b0bb9
 
 
 
 
 
 
 
 
 
 
 
 
79c05b3
0599c1f
f84787d
cb631af
86b0bb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51d2632
86b0bb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79c05b3
 
 
86b0bb9
79c05b3
86b0bb9
79c05b3
 
 
 
 
86b0bb9
 
 
 
79c05b3
86b0bb9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import streamlit as st
import speech_recognition as sr
from deep_translator import GoogleTranslator
from gtts import gTTS
from playsound import playsound
from PIL import Image
from streamlit_mic_recorder import mic_recorder,speech_to_text
from transformers import AutoTokenizer, M2M100ForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
from sentence_transformers import SentenceTransformer, util
import torch
from pygame import mixer
from io import BytesIO
import httpcore
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
# pygame.init()
# import logging
# import logging.handlers
# import queue
# import threading
# import time
# import urllib.request
# import os
# from collections import deque
# from pathlib import Path
# from typing import List

# import av
# import numpy as np
# import pydub
# import streamlit as st
# from twilio.rest import Client

# from streamlit_webrtc import WebRtcMode, webrtc_streamer
#from streamlit_mic_recorder import mic_recorder,speech_to_text
# Initialize the recognizer
recognizer = sr.Recognizer()

# Initialize the translator


# Language dictionary
language_dict = {
    'bengali': 'bn',
    'english': 'en',
    'gujarati': 'gu',
    'hindi': 'hi',
    'malayalam': 'ml',
    'marathi': 'mr',
    'nepali': 'ne',
    'odia': 'or',
    'punjabi': 'pa',
    'tamil': 'ta',
    'telugu': 'te',
}


# Function to translate speech
def translate_speech():
    
    #source_language_name = recognize_speech("Please speak the source language name (e.g., 'English'): ")
    
        
    st.title("BITranSlate")
    # st.write("Record your voice, and play the recorded audio:")
    # audio=mic_recorder(start_prompt="⏺️",stop_prompt="⏹️",key='recorder')

    custom_theme = {
            "theme": {
                "primaryColor": "#000000",
                "backgroundColor": "#89939E",
                "secondaryBackgroundColor": "#262730",
                "textColor": "#FFFFFF",
                "font": "Serif"
            }
        }
    st.markdown(
            f"""
            <style>
            :root {{
                --primary-color: {custom_theme["theme"]["primaryColor"]};
                --background-color: {custom_theme["theme"]["backgroundColor"]};
                --secondary-background-color: {custom_theme["theme"]["secondaryBackgroundColor"]};
                --text-color: {custom_theme["theme"]["textColor"]};
                --font: {custom_theme["theme"]["font"]};
            }}
            </style>
            """,
            unsafe_allow_html=True
        )
    source_language_name = st.selectbox('Please input the source language',language_dict)
    source_language = language_dict[source_language_name]
    target_language_name = st.selectbox('Please input the target language',language_dict)
    target_language = language_dict[target_language_name]

    c1,c2=st.columns(2)
    with c1:
        st.write("Convert speech to text:")
    with c2:
        text=speech_to_text(language=source_language,use_container_width=True,just_once=True,key='STT')

    sentence = text
    nllb_langs = {'hindi':'hin_Deva',
                  'english':'eng_Latn',
                  'punjabi':'pan_Guru',
                  'odia':'ory_Orya',
                  'bengali':'ben_Beng',
                  'telugu':'tel_Tulu',
                  'tamil':'tam_Taml',
                  'nepali':'npi_Deva',
                  'marathi':'mar_Deva',
                  'malayalam':'mal_Mlym',
                  'kannada':'kan_Knda',
                  'gujarati':'guj_Gujr',
                  }
    # translator_google = Translator(service_urls=[
    #   'translate.googleapis.com'
    # ])
    #translator_google = google_translator()
    translator = pipeline('translation', model=AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M"), tokenizer=AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M"), src_lang=nllb_langs[source_language_name], tgt_lang=nllb_langs[target_language_name], max_length = 4000)
    text_to_translate = text
    translated_text = translator(text_to_translate)[0]['translation_text']
    translated_text_google = GoogleTranslator(source='auto', target=target_language).translate(text_to_translate)
    #translated_text_google = translator_google.translate(text_to_translate, lang_tgt=target_language)

    # translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language)

    #translated_text_google = translator_google.translate(text_to_translate, src=source_language, dest=target_language)
    model2 = SentenceTransformer("google/muril-base-cased")
            # Compute embeddings for the sentences
    embedding = model2.encode(text_to_translate, convert_to_tensor=True)
    embeddings_nllb = model2.encode(translated_text, convert_to_tensor=True)
    embeddings_google = model2.encode(translated_text_google, convert_to_tensor=True)

    # Calculate cosine similarities
    cosine_score_nllb = util.cos_sim(embedding, embeddings_nllb).item()
    cosine_score_google = util.cos_sim(embedding, embeddings_google).item()

    # Select the translation with the higher cosine similarity score
    selected_translation = translated_text if cosine_score_nllb > cosine_score_google else translated_text_google

    st.write(f"Source Language: {source_language_name}")
    st.write(f"Sentence: {sentence}")
    st.write(f"Destination Language: {target_language_name}")
    st.write(f"Translated Text from NLLB: {translated_text}")
    st.write(f"Translated Text from Google Translate: {translated_text_google}")
    st.write(f"More accurate translation: {selected_translation}")

    # Using Google-Text-to-Speech to speak the translated text
    # speak = gTTS(text=translated_text, lang=target_language, slow=False)
    # speak.save("translated_voice.mp3")
    mp3_fp = BytesIO()
    speak = gTTS(text=translated_text, lang=target_language, slow=False)
    speak.write_to_fp(mp3_fp)

    # Play the translated voice
    mixer.init()
    mp3_fp.seek(0)
    mixer.music.load(mp3_fp, "mp3")
    mixer.music.play()
    # Play the translated voice
    # mixer.init()
    # mixer.music.load('translated_voice.mp3')
    # mixer.music.play()
    #playsound('translated_voice.mp3')

#if st.button("  CLICK HERE TO TRANSLATE  "):
translate_speech()