In [None]:
!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai gradio

In [2]:
import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive, userdata
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
import torch
import gradio as gr

In [3]:
AUDIO_MODEL = 'whisper-1'
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [4]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [5]:
openai_api_key = userdata.get("OPENAI_API_KEY")
openai = OpenAI(api_key=openai_api_key)

In [36]:
def message_prompt(transciption):
 system_message = """
 You are an assistant that translate japanese text into two different languages like 'English' and 'Filipino',
 please display the translated text into markdown and include the original text from japanese using 'Romaji',
 sample format would be - original text (converted to romaji): orignal_translated_text_here \n\n translated to english: translated_english_text_here \n\n translated to filipino: translated_filipino_text_here"
 """

 user_propmpt = f"Here is the transcripted japanese audio and translate it into two languages: '{transciption}'. No explaination just the translated languages only."

 messages = [
 {"role": "system", "content": system_message},
 {"role": "user", "content": user_propmpt}
 ]

 return messages

In [7]:
quant_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_use_double_quant=True,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_compute_dtype=torch.bfloat16
)

In [8]:
def translation(messages):
 tokenizer = AutoTokenizer.from_pretrained(LLAMA)
 tokenizer.pad_token = tokenizer.eos_token
 inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
 streamer = TextStreamer(tokenizer)
 model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)
 outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)

 return tokenizer.decode(outputs[0])

In [37]:
def translate_text(file):
 try:
 audio_file = open(file, "rb")

 transciption = openai.audio.transcriptions.create(
 model=AUDIO_MODEL,
 file=audio_file,
 response_format="text",
 language="ja"
 )

 messages = message_prompt(transciption)
 response = translation(messages)

 return response
 except Exception as e:
 return f"Unexpected error: {str(e)}"

In [None]:
with gr.Blocks() as demo:
 gr.Markdown("# 🎙️ Anime Audio Translator")
 with gr.Row():
 with gr.Column():
 audio_file = gr.Audio(type="filepath", label="Upload Audio")
 button = gr.Button("Translate", variant="primary")

 with gr.Column():
 gr.Label(value="Result of translated text to 'English' and 'Filipino'", label="Character")
 output_text = gr.Markdown()

 button.click(
 fn=translate_text,
 inputs=audio_file,
 outputs=output_text,
 trigger_mode="once"
 )
demo.launch()