Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""main.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/17Umb-Po_5pESiRv3-dcDRyootgqBjjWM | |
""" | |
import torch | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
# load model and processor | |
model_id = "rbcurzon/whisper-small-fil" | |
pipe = pipeline("automatic-speech-recognition", model=model_id, device=device) | |
"""**FastAPI**""" | |
import os | |
import io | |
from fastapi import FastAPI, WebSocket, UploadFile, File | |
from fastapi.middleware.cors import CORSMiddleware | |
from pydantic import BaseModel | |
from google import genai | |
from google.genai import types | |
client = genai.Client(api_key=os.environ.get("GENAI_API_KEY")) # Do not share api key | |
def translate(text, srcLang, tgtLang): | |
sys_instruct = "You are a professional translator. Do not give explanation." | |
response = client.models.generate_content( | |
model="gemini-2.0-flash", | |
config=types.GenerateContentConfig( | |
system_instruction=sys_instruct), | |
contents=f"Translate the following from {srcLang} to {tgtLang}. Return nothing but the {tgtLang} translation: {text} ", | |
) | |
print(response) | |
return response.text | |
from tempfile import NamedTemporaryFile | |
from fastapi import UploadFile, Form, File | |
from pathlib import Path | |
from typing import Annotated | |
import shutil | |
import aiofiles | |
# def save_upload_file_tmp(upload_file: UploadFile) -> Path: | |
app = FastAPI( | |
title="Real-Time Audio Processor", | |
description="Process and transcribe audio in real-time using Whisper" | |
) | |
async def test(file: UploadFile=File(...), | |
srcLang: str= Form(...), | |
tgtLang: str= Form(...)): | |
# Download audio | |
async with aiofiles.open(file.filename, 'wb') as out_file: | |
content = await file.read() # async read | |
await out_file.write(content) # async write | |
result = pipe(content, | |
max_new_tokens=256, | |
chunk_length_s=30, | |
batch_size=8, | |
generate_kwargs={"task": "transcribe", "language": "tagalog"}) | |
translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang) | |
return {"transcribed_text":result['text'], "translated_text":translatedResult} | |
async def test(text: str, | |
srcLang: str= Form(...), | |
tgtLang: str= Form(...)): | |
result = translate(text, srcLang, tgtLang) | |
print('Raw: ',text) | |
print('Translated: ', result) | |
return result; | |