sivakorn-su commited on
Commit
029aff6
·
1 Parent(s): d6a6fd2

fix cache and load model

Browse files
Files changed (1) hide show
  1. app.py +54 -34
app.py CHANGED
@@ -18,9 +18,17 @@ from pydub import AudioSegment, effects
18
  import pandas as pd
19
  from moviepy.editor import VideoFileClip
20
  from together import Together
 
 
21
 
22
- os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/hf_cache"
23
- os.makedirs("/tmp/hf_cache", exist_ok=True)
 
 
 
 
 
 
24
 
25
  token = os.environ.get('HF_TOKEN')
26
  together_api_key = os.environ.get('TOGETHER_API_KEY')
@@ -28,25 +36,40 @@ ngrok_auth_token = os.environ.get('NGROK_AUTH_TOKEN')
28
 
29
  pipelines, models, others = [], [], []
30
 
31
- n = torch.cuda.device_count()
32
-
33
- if n == 0:
34
- device = "cpu"
35
- pipelines.append(Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=token).to(device))
36
- models.append(whisper.load_model("large").to(device))
37
- elif n == 1:
38
- device = "cuda:0"
39
- pipelines.append(Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=token).to(device))
40
- models.append(whisper.load_model("large").to(device))
41
- else:
42
- device_pyannote = torch.device("cuda:0")
43
- device_whisper = torch.device("cuda:1")
44
- pipeline = Pipeline.from_pretrained(
45
- "pyannote/speaker-diarization-3.1",
46
- use_auth_token=token
47
- )
48
- pipeline.to(device_pyannote)
49
- model = whisper.load_model("large").to(device_whisper)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  nest_asyncio.apply()
52
  together = Together(api_key=together_api_key)
@@ -73,19 +96,16 @@ app.add_middleware(
73
  allow_headers=["*"],
74
  )
75
 
76
- @app.on_event("startup")
77
- def on_startup():
78
- global pipeline, model, device
79
- pipeline, model, device = setup_models()
80
- # ... any other startup logic
81
-
82
  @app.get("/health")
83
- def health_check():
84
- return {
85
- "status": "ok",
86
- "model_loaded": model is not None,
87
- "diarization_pipeline_loaded": pipeline is not None,
88
- "device": device
 
 
 
89
  }
90
 
91
  @app.get("/")
@@ -93,7 +113,7 @@ def check_api():
93
  return {"message": "API is up and running"}
94
 
95
  @app.get("/key")
96
- def check_env():
97
  return {
98
  "env": os.environ.get("ENV", "dev"),
99
  "openai_key_exists": bool(os.environ.get("OPENAI_API_KEY")),
 
18
  import pandas as pd
19
  from moviepy.editor import VideoFileClip
20
  from together import Together
21
+ import asyncio
22
+ import logging
23
 
24
+ HF_CACHE_DIR = "/tmp/hf_cache"
25
+ WHISPER_CACHE_DIR = "/tmp/whisper_cache"
26
+
27
+ os.makedirs(HF_CACHE_DIR, exist_ok=True)
28
+ os.makedirs(WHISPER_CACHE_DIR, exist_ok=True)
29
+
30
+ os.environ["HUGGINGFACE_HUB_CACHE"] = HF_CACHE_DIR
31
+ os.environ["TORCH_HOME"] = WHISPER_CACHE_DIR
32
 
33
  token = os.environ.get('HF_TOKEN')
34
  together_api_key = os.environ.get('TOGETHER_API_KEY')
 
36
 
37
  pipelines, models, others = [], [], []
38
 
39
+ def load_model_bundle():
40
+ """โหลด pyannote + whisper โดยใช้ device ตาม GPU ที่มี"""
41
+ n = torch.cuda.device_count()
42
+ logger.info(f"🖥️ Found {n} CUDA device(s)")
43
+
44
+ if n == 0:
45
+ device = "cpu"
46
+ pipeline = Pipeline.from_pretrained(
47
+ "pyannote/speaker-diarization-3.1",
48
+ use_auth_token=token,
49
+ cache_dir=HF_CACHE_DIR
50
+ ).to(device)
51
+ model = whisper.load_model("large", download_root=WHISPER_CACHE_DIR).to(device)
52
+ elif n == 1:
53
+ device = "cuda:0"
54
+ pipeline = Pipeline.from_pretrained(
55
+ "pyannote/speaker-diarization-3.1",
56
+ use_auth_token=token,
57
+ cache_dir=HF_CACHE_DIR
58
+ ).to(device)
59
+ model = whisper.load_model("large", download_root=WHISPER_CACHE_DIR).to(device)
60
+ else:
61
+ device_pyannote = torch.device("cuda:0")
62
+ device_whisper = torch.device("cuda:1")
63
+ pipeline = Pipeline.from_pretrained(
64
+ "pyannote/speaker-diarization-3.1",
65
+ use_auth_token=token,
66
+ cache_dir=HF_CACHE_DIR
67
+ ).to(device_pyannote)
68
+ model = whisper.load_model("large", download_root=WHISPER_CACHE_DIR).to(device_whisper)
69
+
70
+ pipelines.append(pipeline)
71
+ models.append(model)
72
+
73
 
74
  nest_asyncio.apply()
75
  together = Together(api_key=together_api_key)
 
96
  allow_headers=["*"],
97
  )
98
 
 
 
 
 
 
 
99
  @app.get("/health")
100
+ async def health_check():
101
+ return {
102
+ "status": "running",
103
+ "models_loaded": {
104
+ "pipelines": len(pipelines),
105
+ "whisper_models": len(models)
106
+ },
107
+ "cuda_available": torch.cuda.is_available(),
108
+ "cuda_devices": torch.cuda.device_count() if torch.cuda.is_available() else 0
109
  }
110
 
111
  @app.get("/")
 
113
  return {"message": "API is up and running"}
114
 
115
  @app.get("/key")
116
+ async def check_env():
117
  return {
118
  "env": os.environ.get("ENV", "dev"),
119
  "openai_key_exists": bool(os.environ.get("OPENAI_API_KEY")),