{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "kayiMLgsBnVt" }, "outputs": [], "source": [ "!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai gradio" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "executionInfo": { "elapsed": 15255, "status": "ok", "timestamp": 1744678358807, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "ByKEQHyhiLl7" }, "outputs": [], "source": [ "import os\n", "import requests\n", "from IPython.display import Markdown, display, update_display\n", "from openai import OpenAI\n", "from google.colab import drive, userdata\n", "from huggingface_hub import login\n", "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer\n", "import torch\n", "import gradio as gr" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "executionInfo": { "elapsed": 2, "status": "ok", "timestamp": 1744678358815, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "9tzK_t3jiOo1" }, "outputs": [], "source": [ "AUDIO_MODEL = 'whisper-1'\n", "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "executionInfo": { "elapsed": 737, "status": "ok", "timestamp": 1744678360474, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "PYNmGaQniW73" }, "outputs": [], "source": [ "hf_token = userdata.get('HF_TOKEN')\n", "login(hf_token, add_to_git_credential=True)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "executionInfo": { "elapsed": 555, "status": "ok", "timestamp": 1744678362522, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "yGjVTeMEig-b" }, "outputs": [], "source": [ "openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n", "openai = OpenAI(api_key=openai_api_key)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1744679561600, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "6jboyASHilLz" }, "outputs": [], "source": [ "def message_prompt(transciption):\n", " system_message = \"\"\"\n", " You are an assistant that translate japanese text into two different languages like 'English' and 'Filipino',\n", " please display the translated text into markdown and include the original text from japanese using 'Romaji',\n", " sample format would be - original text (converted to romaji): orignal_translated_text_here \\n\\n translated to english: translated_english_text_here \\n\\n translated to filipino: translated_filipino_text_here\"\n", " \"\"\"\n", "\n", " user_propmpt = f\"Here is the transcripted japanese audio and translate it into two languages: '{transciption}'. No explaination just the translated languages only.\"\n", "\n", " messages = [\n", " {\"role\": \"system\", \"content\": system_message},\n", " {\"role\": \"user\", \"content\": user_propmpt}\n", " ]\n", "\n", " return messages" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1744678366113, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "nYrf_wKmmoUs" }, "outputs": [], "source": [ "quant_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1744678367778, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "ESlOaRGioqUQ" }, "outputs": [], "source": [ "def translation(messages):\n", " tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n", " tokenizer.pad_token = tokenizer.eos_token\n", " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", " streamer = TextStreamer(tokenizer)\n", " model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n", " outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)\n", "\n", " return tokenizer.decode(outputs[0])" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1744679567326, "user": { "displayName": "Kenneth Andales", "userId": "04047926009324958530" }, "user_tz": -480 }, "id": "FSGFTvIEys0j" }, "outputs": [], "source": [ "def translate_text(file):\n", " try:\n", " audio_file = open(file, \"rb\")\n", "\n", " transciption = openai.audio.transcriptions.create(\n", " model=AUDIO_MODEL,\n", " file=audio_file,\n", " response_format=\"text\",\n", " language=\"ja\"\n", " )\n", "\n", " messages = message_prompt(transciption)\n", " response = translation(messages)\n", "\n", " return response\n", " except Exception as e:\n", " return f\"Unexpected error: {str(e)}\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "bexgSsWuvUmU" }, "outputs": [], "source": [ "with gr.Blocks() as demo:\n", " gr.Markdown(\"# 🎙️ Anime Audio Translator\")\n", " with gr.Row():\n", " with gr.Column():\n", " audio_file = gr.Audio(type=\"filepath\", label=\"Upload Audio\")\n", " button = gr.Button(\"Translate\", variant=\"primary\")\n", "\n", " with gr.Column():\n", " gr.Label(value=\"Result of translated text to 'English' and 'Filipino'\", label=\"Character\")\n", " output_text = gr.Markdown()\n", "\n", " button.click(\n", " fn=translate_text,\n", " inputs=audio_file,\n", " outputs=output_text,\n", " trigger_mode=\"once\"\n", " )\n", "demo.launch()" ] } ], "metadata": { "accelerator": "GPU", "colab": { "authorship_tag": "ABX9TyO+HrhlkaVchpoGIfmYAHdf", "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python [conda env:base] *", "language": "python", "name": "conda-base-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 4 }