sivakorn-su
fix loop payload
360b400
import os
from fastapi import FastAPI, UploadFile, File, Body
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.concurrency import run_in_threadpool
from fastapi import HTTPException
from datetime import datetime
from collections import Counter
import pandas as pd
import torch
import uvicorn
import asyncio
import logging
from config import UPLOAD_FOLDER, SUPABASE_URL, SUPABASE_KEY
from models import pipelines, models, model_lock, load_model_bundle
from utils import (
save_uploaded_file,
extract_and_normalize_audio,
diarize_audio,
split_segments,
transcribe_segments,
clean_summary,
summarize_texts,
add_llm_spell_corrected_text_column
)
# from supabase import create_client, Client
logger = logging.getLogger(__name__)
app = FastAPI()
app.router.redirect_slashes = False
origins = [o.strip() for o in (os.getenv("CORS_ORIGINS", "").split(",")) if o.strip()] or ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ตั้งค่า Supabase URL และ KEY จาก env
# SUPABASE_URL = os.environ.get("SUPABASE_URL")
# SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
# supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
@app.on_event("startup")
async def startup_event():
async with model_lock:
if pipelines and models is not None:
logger.info("ℹ️ Models already loaded. Skipping startup load.")
return
logger.info("🔁 Loading models at startup...")
try:
pipeline, model = await load_model_bundle()
except Exception as e:
logger.exception(f"❌ Model loading failed: {e}")
import sys; sys.exit(1)
# idempotent set (load_model_bundle ก็ทำแล้ว แต่นี่กันไว้เผื่อเรียกจากที่อื่น)
if not pipelines:
pipelines.append(pipeline)
else:
pipelines[0] = pipeline
if not models:
models.append(model)
else:
models[0] = model
logger.info("✅ Models loaded successfully at startup")
@app.get("/")
async def check_api():
pipelines_loaded = len(pipelines)
models_loaded = len(models)
return {
"status": "running",
"models_loaded": {
"pipelines": pipelines_loaded,
"whisper_models": models_loaded,
"pipeline_ready": pipelines_loaded > 0 and all(pipelines),
"model_ready": models_loaded > 0 and all(models),
},
"cuda": {
"available": torch.cuda.is_available(),
"device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
"device_names": [
torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())
] if torch.cuda.is_available() else []
},
"timestamp": datetime.now().isoformat(),
"debug": {
"pipelines": [str(p) for p in pipelines],
"models": [str(m) for m in models],
},
}
@app.get("/key")
async def check_env():
import os
return {
"env": os.environ.get("ENV", "dev"),
"openai_key_exists": bool(os.environ.get("OPENAI_API_KEY")),
}
# @app.post('/video')
# async def upload_video(file: UploadFile = File(...)):
# video_path = save_uploaded_file(file)
# return video_path
@app.api_route("/upload_video", methods=["POST"])
@app.api_route("/upload_video/", methods=["POST"])
async def process_video(file: UploadFile = File(...)):
try:
payload = await run_in_threadpool(upload_video, file)
return JSONResponse(content=payload)
except Exception as e:
import traceback, sys
traceback.print_exc(file=sys.stderr)
raise HTTPException(status_code=500, detail=f"processing failed: {e}")
def upload_video(file: UploadFile = File(...)):
import os
from config import together_api_key
video_path = save_uploaded_file(file)
audio_path = extract_and_normalize_audio(video_path)
df_diarization = diarize_audio(audio_path)
segment_folder = split_segments(audio_path, df_diarization)
df_transcriptions = transcribe_segments(segment_folder)
min_len = min(len(df_diarization), len(df_transcriptions))
df_merged = pd.concat([
df_diarization.iloc[:min_len].reset_index(drop=True),
df_transcriptions.iloc[:min_len].reset_index(drop=True)
], axis=1)
# df_merged = add_corrected_text_column(df_merged)
df_merged = add_llm_spell_corrected_text_column(df_merged)
# summaries = summarize_texts(df_merged["text"].tolist(), together_api_key, delay=0)
result = df_merged.to_dict(orient="records")
speaker_array = df_diarization["speaker"].unique().tolist()
counter = Counter(df_diarization["speaker"])
result_array = [{"speaker": spk, "count": cnt} for spk, cnt in counter.most_common()]
from pydub import AudioSegment
duration_minutes = len(AudioSegment.from_wav(audio_path)) / 1000 / 60
# save result to supabase
# supabase.table("summaries").insert(result).execute()
return {
"video_path": video_path,
"audio_path": audio_path,
"audio_length": duration_minutes,
"data": result,
"speaker_array": speaker_array,
"count_speaker": result_array,
"num_speakers": len(speaker_array),
"total_sentence": len(df_merged['text']),
"summaries": 'This feature not available',
}
@app.post("/sumarize/")
async def sumarize(array: list = Body(...)):
from config import together_api_key
import pandas as pd
df = pd.DataFrame(array)
summaries = summarize_texts(df["text"].tolist(), together_api_key, delay=2)
# save to supabase
data_to_save = [{"text": row["text"], "summary": summary} for row, summary in zip(array, summaries)]
# supabase.table("summaries").insert(data_to_save).execute()
return JSONResponse(content={
"summaries": summaries,
})
if __name__ == "__main__":
uvicorn.run(
app,
host="0.0.0.0",
port=7860,
reload=False
)