Spaces:
Running
on
T4
Running
on
T4
File size: 6,253 Bytes
9ce2614 b54bde1 78dde53 0e1a412 50e67e8 1caedc9 8fd8533 78dde53 8fd8533 029aff6 b54bde1 b0c0237 8fd8533 fe49fa2 46c4a44 8fd8533 029b982 78dde53 8fd8533 da88694 78dde53 37f4934 8fd8533 78dde53 1152e72 78dde53 b54bde1 029b982 b54bde1 8fd8533 3b8d1bc b0c0237 37f4934 b0c0237 37f4934 8fd8533 b935799 18a8fe5 029aff6 b935799 029aff6 1caedc9 b935799 78dde53 029aff6 8fd8533 78dde53 37f4934 0e1a412 50e67e8 360b400 50e67e8 0e1a412 37f4934 8fd8533 ce543e3 78dde53 ac96fc2 46c4a44 b0c0237 78dde53 8fd8533 78dde53 b54bde1 360b400 78dde53 b0c0237 360b400 37f4934 78dde53 b54bde1 78dde53 2c9f05b 8fd8533 2c9f05b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import os
from fastapi import FastAPI, UploadFile, File, Body
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.concurrency import run_in_threadpool
from fastapi import HTTPException
from datetime import datetime
from collections import Counter
import pandas as pd
import torch
import uvicorn
import asyncio
import logging
from config import UPLOAD_FOLDER, SUPABASE_URL, SUPABASE_KEY
from models import pipelines, models, model_lock, load_model_bundle
from utils import (
save_uploaded_file,
extract_and_normalize_audio,
diarize_audio,
split_segments,
transcribe_segments,
clean_summary,
summarize_texts,
add_llm_spell_corrected_text_column
)
# from supabase import create_client, Client
logger = logging.getLogger(__name__)
app = FastAPI()
app.router.redirect_slashes = False
origins = [o.strip() for o in (os.getenv("CORS_ORIGINS", "").split(",")) if o.strip()] or ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ตั้งค่า Supabase URL และ KEY จาก env
# SUPABASE_URL = os.environ.get("SUPABASE_URL")
# SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
# supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
@app.on_event("startup")
async def startup_event():
async with model_lock:
if pipelines and models is not None:
logger.info("ℹ️ Models already loaded. Skipping startup load.")
return
logger.info("🔁 Loading models at startup...")
try:
pipeline, model = await load_model_bundle()
except Exception as e:
logger.exception(f"❌ Model loading failed: {e}")
import sys; sys.exit(1)
# idempotent set (load_model_bundle ก็ทำแล้ว แต่นี่กันไว้เผื่อเรียกจากที่อื่น)
if not pipelines:
pipelines.append(pipeline)
else:
pipelines[0] = pipeline
if not models:
models.append(model)
else:
models[0] = model
logger.info("✅ Models loaded successfully at startup")
@app.get("/")
async def check_api():
pipelines_loaded = len(pipelines)
models_loaded = len(models)
return {
"status": "running",
"models_loaded": {
"pipelines": pipelines_loaded,
"whisper_models": models_loaded,
"pipeline_ready": pipelines_loaded > 0 and all(pipelines),
"model_ready": models_loaded > 0 and all(models),
},
"cuda": {
"available": torch.cuda.is_available(),
"device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
"device_names": [
torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())
] if torch.cuda.is_available() else []
},
"timestamp": datetime.now().isoformat(),
"debug": {
"pipelines": [str(p) for p in pipelines],
"models": [str(m) for m in models],
},
}
@app.get("/key")
async def check_env():
import os
return {
"env": os.environ.get("ENV", "dev"),
"openai_key_exists": bool(os.environ.get("OPENAI_API_KEY")),
}
# @app.post('/video')
# async def upload_video(file: UploadFile = File(...)):
# video_path = save_uploaded_file(file)
# return video_path
@app.api_route("/upload_video", methods=["POST"])
@app.api_route("/upload_video/", methods=["POST"])
async def process_video(file: UploadFile = File(...)):
try:
payload = await run_in_threadpool(upload_video, file)
return JSONResponse(content=payload)
except Exception as e:
import traceback, sys
traceback.print_exc(file=sys.stderr)
raise HTTPException(status_code=500, detail=f"processing failed: {e}")
def upload_video(file: UploadFile = File(...)):
import os
from config import together_api_key
video_path = save_uploaded_file(file)
audio_path = extract_and_normalize_audio(video_path)
df_diarization = diarize_audio(audio_path)
segment_folder = split_segments(audio_path, df_diarization)
df_transcriptions = transcribe_segments(segment_folder)
min_len = min(len(df_diarization), len(df_transcriptions))
df_merged = pd.concat([
df_diarization.iloc[:min_len].reset_index(drop=True),
df_transcriptions.iloc[:min_len].reset_index(drop=True)
], axis=1)
# df_merged = add_corrected_text_column(df_merged)
df_merged = add_llm_spell_corrected_text_column(df_merged)
# summaries = summarize_texts(df_merged["text"].tolist(), together_api_key, delay=0)
result = df_merged.to_dict(orient="records")
speaker_array = df_diarization["speaker"].unique().tolist()
counter = Counter(df_diarization["speaker"])
result_array = [{"speaker": spk, "count": cnt} for spk, cnt in counter.most_common()]
from pydub import AudioSegment
duration_minutes = len(AudioSegment.from_wav(audio_path)) / 1000 / 60
# save result to supabase
# supabase.table("summaries").insert(result).execute()
return {
"video_path": video_path,
"audio_path": audio_path,
"audio_length": duration_minutes,
"data": result,
"speaker_array": speaker_array,
"count_speaker": result_array,
"num_speakers": len(speaker_array),
"total_sentence": len(df_merged['text']),
"summaries": 'This feature not available',
}
@app.post("/sumarize/")
async def sumarize(array: list = Body(...)):
from config import together_api_key
import pandas as pd
df = pd.DataFrame(array)
summaries = summarize_texts(df["text"].tolist(), together_api_key, delay=2)
# save to supabase
data_to_save = [{"text": row["text"], "summary": summary} for row, summary in zip(array, summaries)]
# supabase.table("summaries").insert(data_to_save).execute()
return JSONResponse(content={
"summaries": summaries,
})
if __name__ == "__main__":
uvicorn.run(
app,
host="0.0.0.0",
port=7860,
reload=False
) |