# -*- coding: utf-8 -*-
"""main.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/17Umb-Po_5pESiRv3-dcDRyootgqBjjWM
"""

import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline

device = "cuda:0" if torch.cuda.is_available() else "cpu"

# load model and processor
model_id = "rbcurzon/whisper-small-fil"
pipe = pipeline("automatic-speech-recognition", 
                model=model_id,
                chunk_length_s=30,
                device=device)

"""**FastAPI**"""

import os
import io
from fastapi import FastAPI, WebSocket, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from google import genai
from google.genai import types

client = genai.Client(api_key=os.environ.get("GENAI_API_KEY")) # Do not share api key

def translate(text, srcLang, tgtLang):
    sys_instruct = "You are a professional translator. Generate a translation of the text and return only the result. Return only the translated text."
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        config=types.GenerateContentConfig(
            system_instruction=sys_instruct),
        contents=f"Translate the text from {srcLang} to {tgtLang}: {text} ",
    )
    print(response)
    return response.text

import shutil
import aiofiles
from tempfile import NamedTemporaryFile
from fastapi import UploadFile, Form, File
from pathlib import Path
from typing import Annotated
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, save_audio, collect_chunks

model = load_silero_vad()

app = FastAPI(
    title="Real-Time Audio Processor",
    description="Process and transcribe audio in real-time using Whisper"
)

@app.post("/translateAudio/")
async def test(file: UploadFile = File(...),
               srcLang: str = Form(...),
               tgtLang: str = Form(...)):
    
    # # Download audio
    # async with aiofiles.open(file.filename, 'wb') as out_file:
    #     content = await file.read()  # async read
    #     await out_file.write(content)  # async write
    
    try:
        content = await file.read() 
        with open(file.filename, 'wb') as f:
            f.write(content)
            print(f"Successfully uploaded {file.filename}")

        wav = read_audio(file.filename)
        speech_timestamps = get_speech_timestamps(wav, model)
        save_audio(
            "only_speech.wav", 
            collect_chunks(speech_timestamps, wav), 
            sampling_rate=16000
        )

        result = pipe(
            "only_speech.wav", # Transcribe audio
            batch_size=8, 
            return_timestamps=True,
            generate_kwargs={"language": "tagalog","return_timestamps": True,}
        )
        translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang)
    
        return {
            "transcribed_text": result['text'], 
            "translated_text": translatedResult, 
            "srcLang": srcLang, 
            "tgtLang": tgtLang
        }
   
    except Exception as error:
        print("Error: ", str(error)))
        raise HTTPException(status_code=500, detail=str(error))
        
    finally:
        if file.file:
            file.file.close()
        if os.path.exists(file.filename)
            os.remove(file.filename)
        if os.path.exists("only_speech.wav")
            os.remove("only_speech.wav")
        

@app.post("/translateText/")
async def test(text: str,
               srcLang: str = Form(...),
               tgtLang: str = Form(...)):
    result = translate(text, srcLang, tgtLang)
    print('Raw: ', text)
    print('Translated: ', result)
    return {'translated_text': result}