File size: 1,256 Bytes
220bdec
5712999
 
 
 
220bdec
 
c967100
5712999
c967100
 
5712999
0bfe157
113a936
5712999
c967100
 
 
5712999
 
 
 
efa14b2
c967100
 
5712999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

from fastapi import FastAPI, UploadFile, File
import soundfile as sf
import ctranslate2
from transformers import WhisperProcessor
import os
os.environ["HF_HOME"] = "/tmp/hf_cache"

# 初始化 FastAPI 应用
app = FastAPI()

# 加载 Whisper 处理器和 CTranslate2 模型
print("✅ THIS IS NEW APP.PY VERSION")
processor = WhisperProcessor.from_pretrained("./whisper_processor")
model = ctranslate2.Whisper("ct2_model", compute_type="int8", device="cpu")

@app.get("/")
def root():
    return {
        "message": "CTranslate2 Whisper API is running.",
        "usage": "POST /transcribe with an audio file (.wav, .mp3, etc.)"
    }

@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):
    # 保存上传音频
    temp_path = f"/tmp/{file.filename}"
    with open(temp_path, "wb") as f:
        f.write(await file.read())

    # 加载音频并提取特征
    audio_input, sample_rate = sf.read(temp_path)
    inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="np")
    features = inputs.input_features[0]

    # 运行 CTranslate2 推理
    results = model.generate(features)
    tokens = results[0]["tokens"]
    text = processor.decode(tokens, skip_special_tokens=True)

    return {"text": text}