Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| # -*- coding: utf-8 -*- | |
| """main.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/17Umb-Po_5pESiRv3-dcDRyootgqBjjWM | |
| """ | |
| import torch | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| # load model and processor | |
| model_id = "rbcurzon/whisper-small-fil" | |
| pipe = pipeline("automatic-speech-recognition", model=model_id, device=device) | |
| """**FastAPI**""" | |
| import os | |
| import io | |
| from fastapi import FastAPI, WebSocket, UploadFile, File | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from google import genai | |
| from google.genai import types | |
| client = genai.Client(api_key=os.environ.get("GENAI_API_KEY")) # Do not share api key | |
| def translate(text, srcLang, tgtLang): | |
| sys_instruct = "You are a professional translator. Do not give explanation." | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| config=types.GenerateContentConfig( | |
| system_instruction=sys_instruct), | |
| contents=f"Translate the following from {srcLang} to {tgtLang}. Return nothing but the {tgtLang} translation: {text} ", | |
| ) | |
| print(response) | |
| return response.text | |
| from tempfile import NamedTemporaryFile | |
| from fastapi import UploadFile, Form, File | |
| from pathlib import Path | |
| from typing import Annotated | |
| import shutil | |
| import aiofiles | |
| # def save_upload_file_tmp(upload_file: UploadFile) -> Path: | |
| app = FastAPI( | |
| title="Real-Time Audio Processor", | |
| description="Process and transcribe audio in real-time using Whisper" | |
| ) | |
| async def test(file: UploadFile=File(...), | |
| srcLang: str= Form(...), | |
| tgtLang: str= Form(...)): | |
| # Download audio | |
| async with aiofiles.open(file.filename, 'wb') as out_file: | |
| content = await file.read() # async read | |
| await out_file.write(content) # async write | |
| result = pipe(content, | |
| max_new_tokens=256, | |
| chunk_length_s=30, | |
| batch_size=8, | |
| generate_kwargs={"task": "transcribe", "language": "tagalog"}) | |
| translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang) | |
| return {"transcribed_text":result['text'], "translated_text":translatedResult} | 
