speech-to-text / app.py
rbcurzon's picture
Update app.py
c794494 verified
raw
history blame
2.61 kB
# -*- coding: utf-8 -*-
"""main.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/17Umb-Po_5pESiRv3-dcDRyootgqBjjWM
"""
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# load model and processor
model_id = "rbcurzon/whisper-small-fil"
pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
"""**FastAPI**"""
import os
import io
from fastapi import FastAPI, WebSocket, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from google import genai
from google.genai import types
client = genai.Client(api_key=os.environ.get("GENAI_API_KEY")) # Do not share api key
def translate(text, srcLang, tgtLang):
sys_instruct = "You are a professional translator. Do not give explanation."
response = client.models.generate_content(
model="gemini-2.0-flash",
config=types.GenerateContentConfig(
system_instruction=sys_instruct),
contents=f"Translate the following from {srcLang} to {tgtLang}. Return nothing but the {tgtLang} translation: {text} ",
)
print(response)
return response.text
from tempfile import NamedTemporaryFile
from fastapi import UploadFile, Form, File
from pathlib import Path
from typing import Annotated
import shutil
import aiofiles
# def save_upload_file_tmp(upload_file: UploadFile) -> Path:
app = FastAPI(
title="Real-Time Audio Processor",
description="Process and transcribe audio in real-time using Whisper"
)
@app.post("/translateAudio/")
async def test(file: UploadFile=File(...),
srcLang: str= Form(...),
tgtLang: str= Form(...)):
# Download audio
async with aiofiles.open(file.filename, 'wb') as out_file:
content = await file.read() # async read
await out_file.write(content) # async write
result = pipe(content,
max_new_tokens=256,
chunk_length_s=30,
batch_size=8,
generate_kwargs={"task": "transcribe", "language": "tagalog"})
translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang)
return {"transcribed_text":result['text'], "translated_text":translatedResult}
@app.post("/translateText/")
async def test(text: str,
srcLang: str= Form(...),
tgtLang: str= Form(...)):
result = translate(text, srcLang, tgtLang)
print('Raw: ',text)
print('Translated: ', result)
return result;