from fastapi import FastAPI, File, UploadFile, HTTPException import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import requests import json import tempfile import os app = FastAPI() # Set up Whisper model device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 model_id = "openai/whisper-large-v3-turbo" model = AutoModelForSpeechSeq2Seq.from_pretrained( model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True ) model.to(device) processor = AutoProcessor.from_pretrained(model_id) pipe = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, torch_dtype=torch_dtype, device=device, ) OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" @app.post("/transcribe-analyze/") async def transcribe_analyze(file: UploadFile = File(...)): try: # Save the uploaded file temporarily with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: temp_audio.write(await file.read()) temp_audio_path = temp_audio.name # Transcribe audio transcription_result = pipe(temp_audio_path, return_timestamps=True) transcription = transcription_result["text"] # Send transcription to AI for classification response = requests.post( url=OPENROUTER_URL, headers={ "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }, data=json.dumps({ "model": "meta-llama/llama-3.1-70b-instruct:free", "messages": [ { "role": "user", "content": f"You are an AI Assistant that is given the transcript between a call agent and a lead, and you must classify if the lead happily agreed to the booking. The response should have 4 parts: 1. Appointment Booked: Yes/No, 2. Short reason for your answer, 3. Short summary of the call, 4. Lead's overall emotion. \n Here is the transcription: {transcription}", } ] }) ) ai_response = response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response from AI.") # Remove temporary file os.remove(temp_audio_path) return {"transcription": transcription, "ai_response": ai_response} except Exception as e: return HTTPException(status_code=500, detail=str(e))