Spaces:
Sleeping
Sleeping
Commit
·
e00482a
1
Parent(s):
b93d19e
Edu-BERT API'si için ana uygulama dosyası eklendi. Model yükleme, tahmin ve video transkripsiyonu için gerekli endpoint'ler oluşturuldu. Ayrıca, requirements.txt dosyasına yeni bağımlılıklar eklendi.
Browse files- main-videopluskazanim.py +247 -0
- requirements.txt +4 -1
main-videopluskazanim.py
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# main.py - Hugging Face Spaces API: ders_id -> model mapping -> batch inference -> kazanımID
|
2 |
+
# Requirements (requirements.txt):
|
3 |
+
# fastapi transformers torch pydantic uvicorn tensorflow
|
4 |
+
#
|
5 |
+
# Directory layout within Space repo:
|
6 |
+
# - main.py (this file)
|
7 |
+
# - model_mapping.json
|
8 |
+
# - kazanim_id_konu_isim_dict_list.py
|
9 |
+
#
|
10 |
+
# 📌 Endpoints:
|
11 |
+
# POST /predict {"model_name": "eraydikyologlu/bert_ayt_matematik", "inputs": ["soru1", "soru2", ...]}
|
12 |
+
# → {"model": "...", "results": [{"kazanım_id": "2873", "label": "LABEL_0", "score": 0.97}, ...]}
|
13 |
+
|
14 |
+
import os
|
15 |
+
import logging
|
16 |
+
logger = logging.getLogger("uvicorn")
|
17 |
+
logger.setLevel(logging.INFO)
|
18 |
+
|
19 |
+
# Hugging Face cache'ini writable dizine yönlendir
|
20 |
+
os.environ["HF_HOME"] = "/tmp/.cache/huggingface"
|
21 |
+
os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache/huggingface"
|
22 |
+
os.environ["HF_HUB_CACHE"] = "/tmp/.cache/huggingface"
|
23 |
+
|
24 |
+
os.environ["TRANSFORMERS_VERBOSITY"] = "info"
|
25 |
+
os.environ["HF_HUB_DISABLE_BIN_TO_SAFETENSORS_CONVERSION"] = "1"
|
26 |
+
try:
|
27 |
+
import tensorflow as tf
|
28 |
+
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
|
29 |
+
except ImportError:
|
30 |
+
pass
|
31 |
+
|
32 |
+
from fastapi import FastAPI, HTTPException, UploadFile, File
|
33 |
+
from pydantic import BaseModel, Field
|
34 |
+
from typing import List, Dict, Optional
|
35 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
36 |
+
import torch
|
37 |
+
import functools
|
38 |
+
import kazanim_id_konu_isim_dict_list as kazanimlar
|
39 |
+
import logging
|
40 |
+
import whisper
|
41 |
+
import tempfile
|
42 |
+
|
43 |
+
app = FastAPI(title="Edu-BERT Multi‑Model API")
|
44 |
+
|
45 |
+
# Hugging Face Space CPU kullandığı için device -1 (CPU)
|
46 |
+
device = 0 if torch.cuda.is_available() else -1
|
47 |
+
|
48 |
+
print(f"🧠 torch: {torch.__version__}, cuda available: {torch.cuda.is_available()}")
|
49 |
+
|
50 |
+
if torch.cuda.is_available():
|
51 |
+
print(f"🚀 CUDA device name: {torch.cuda.get_device_name(0)}")
|
52 |
+
else:
|
53 |
+
print("⚠️ CUDA not available, using CPU.")
|
54 |
+
|
55 |
+
# ---------- Pydantic Schemas ---------- #
|
56 |
+
class PredictRequest(BaseModel):
|
57 |
+
model_name: str = Field(..., description="Model adı (örn: eraydikyologlu/bert_ayt_matematik)")
|
58 |
+
inputs: List[str] = Field(..., description="Soru metinleri listesi")
|
59 |
+
|
60 |
+
class WhisperRequest(BaseModel):
|
61 |
+
model_name: str = Field(default="small", description="Whisper model adı (tiny, base, small, medium, large)")
|
62 |
+
language: str = Field(default="Turkish", description="Dil")
|
63 |
+
batch_size: int = Field(default=8, description="Batch boyutu")
|
64 |
+
|
65 |
+
class QuestionResult(BaseModel):
|
66 |
+
label: str
|
67 |
+
score: float
|
68 |
+
|
69 |
+
class VideoResult(BaseModel):
|
70 |
+
id: str
|
71 |
+
text: str
|
72 |
+
|
73 |
+
class PredictResponse(BaseModel):
|
74 |
+
model: str
|
75 |
+
results: List[QuestionResult]
|
76 |
+
|
77 |
+
class WhisperResponse(BaseModel):
|
78 |
+
model: str
|
79 |
+
results: List[VideoResult]
|
80 |
+
|
81 |
+
# ---------- Helpers ---------- #
|
82 |
+
|
83 |
+
@functools.lru_cache(maxsize=8)
|
84 |
+
def load_pipeline(model_name: str):
|
85 |
+
"""Model pipeline yükleme - minimal approach"""
|
86 |
+
try:
|
87 |
+
print(f"Model yükleniyor: {model_name}")
|
88 |
+
#base_tok = "umutarpayy/tyt_turkce_bert"
|
89 |
+
#model_name = "eraydikyologlu/tyt_turkce_bert_pt"
|
90 |
+
# EXACTLY like your working local code - NO extra parameters
|
91 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
92 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
93 |
+
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
|
94 |
+
|
95 |
+
print(f"Model başarıyla yüklendi: {model_name}")
|
96 |
+
return classifier
|
97 |
+
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Model yükleme hatası ({model_name}): {e}")
|
100 |
+
raise HTTPException(status_code=500, detail=f"Model yükleme hatası: {str(e)}")
|
101 |
+
|
102 |
+
@functools.lru_cache(maxsize=4)
|
103 |
+
def load_whisper_model(model_name: str):
|
104 |
+
"""Whisper model yükleme"""
|
105 |
+
try:
|
106 |
+
print(f"Whisper modeli yükleniyor: {model_name}")
|
107 |
+
model = whisper.load_model(model_name)
|
108 |
+
print(f"Whisper modeli başarıyla yüklendi: {model_name}")
|
109 |
+
return model
|
110 |
+
|
111 |
+
except Exception as e:
|
112 |
+
print(f"Whisper model yükleme hatası ({model_name}): {e}")
|
113 |
+
raise HTTPException(status_code=500, detail=f"Whisper model yükleme hatası: {str(e)}")
|
114 |
+
|
115 |
+
import time, logging, sys
|
116 |
+
logging.basicConfig(stream=sys.stdout,
|
117 |
+
level=logging.INFO,
|
118 |
+
format="%(asctime)s %(levelname)s %(message)s")
|
119 |
+
|
120 |
+
@app.post("/predict", response_model=PredictResponse)
|
121 |
+
async def predict(req: PredictRequest):
|
122 |
+
t0 = time.time()
|
123 |
+
print(f"new request /model = {req.model_name} / n = {len(req.inputs)}")
|
124 |
+
"""Ana endpoint - model_name ile inference"""
|
125 |
+
try:
|
126 |
+
if not req.inputs:
|
127 |
+
raise HTTPException(status_code=400, detail="inputs boş olamaz")
|
128 |
+
|
129 |
+
# Pipeline yükle
|
130 |
+
classifier = load_pipeline(req.model_name)
|
131 |
+
|
132 |
+
# Batch işleme
|
133 |
+
outputs = classifier(req.inputs, truncation=True, padding=True, batch_size=8)
|
134 |
+
dt = time.time() - t0
|
135 |
+
print(f"✅ done | took {dt:.2f}s")
|
136 |
+
results = []
|
137 |
+
for out in outputs:
|
138 |
+
label = out["label"]
|
139 |
+
score = float(out["score"])
|
140 |
+
|
141 |
+
|
142 |
+
results.append(QuestionResult(
|
143 |
+
label=label,
|
144 |
+
score=score
|
145 |
+
))
|
146 |
+
|
147 |
+
print(f"Tamamlandı: {len(results)} sonuç")
|
148 |
+
return PredictResponse(model=req.model_name, results=results)
|
149 |
+
|
150 |
+
except Exception as e:
|
151 |
+
print(f"Hata: {e}")
|
152 |
+
import traceback
|
153 |
+
traceback.print_exc()
|
154 |
+
raise HTTPException(status_code=500, detail=f"Hata: {str(e)}")
|
155 |
+
|
156 |
+
@app.post("/whisper", response_model=WhisperResponse)
|
157 |
+
async def transcribe_videos(files: List[UploadFile] = File(...),
|
158 |
+
model_name: str = "small",
|
159 |
+
language: str = "Turkish"):
|
160 |
+
"""Video dosyalarını metne çevir"""
|
161 |
+
t0 = time.time()
|
162 |
+
print(f"new whisper request /model = {model_name} / n = {len(files)}")
|
163 |
+
|
164 |
+
try:
|
165 |
+
if not files:
|
166 |
+
raise HTTPException(status_code=400, detail="Video dosyaları boş olamaz")
|
167 |
+
|
168 |
+
# Whisper modelini yükle
|
169 |
+
model = load_whisper_model(model_name)
|
170 |
+
|
171 |
+
results = []
|
172 |
+
|
173 |
+
for file in files:
|
174 |
+
if not file.filename.lower().endswith(('.mp4', '.wav', '.mp3', '.m4a', '.flac')):
|
175 |
+
continue
|
176 |
+
|
177 |
+
# Geçici dosya oluştur
|
178 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
|
179 |
+
content = await file.read()
|
180 |
+
temp_file.write(content)
|
181 |
+
temp_file_path = temp_file.name
|
182 |
+
|
183 |
+
try:
|
184 |
+
# Whisper ile transkript et
|
185 |
+
result = model.transcribe(temp_file_path, language=language.lower(), verbose=False)
|
186 |
+
text = result['text'].strip()
|
187 |
+
|
188 |
+
results.append(VideoResult(
|
189 |
+
id=file.filename,
|
190 |
+
text=text
|
191 |
+
))
|
192 |
+
|
193 |
+
except Exception as e:
|
194 |
+
print(f"Video işleme hatası ({file.filename}): {e}")
|
195 |
+
results.append(VideoResult(
|
196 |
+
id=file.filename,
|
197 |
+
text=""
|
198 |
+
))
|
199 |
+
finally:
|
200 |
+
# Geçici dosyayı temizle
|
201 |
+
if os.path.exists(temp_file_path):
|
202 |
+
os.unlink(temp_file_path)
|
203 |
+
|
204 |
+
dt = time.time() - t0
|
205 |
+
print(f"✅ Whisper done | took {dt:.2f}s")
|
206 |
+
print(f"Tamamlandı: {len(results)} video transkript edildi")
|
207 |
+
|
208 |
+
return WhisperResponse(model=model_name, results=results)
|
209 |
+
|
210 |
+
except Exception as e:
|
211 |
+
print(f"Whisper Hatası: {e}")
|
212 |
+
import traceback
|
213 |
+
traceback.print_exc()
|
214 |
+
raise HTTPException(status_code=500, detail=f"Whisper Hatası: {str(e)}")
|
215 |
+
|
216 |
+
@app.get("/")
|
217 |
+
def root():
|
218 |
+
return {"status": "ok", "message": "Edu-BERT API çalışıyor"}
|
219 |
+
|
220 |
+
@app.get("/health")
|
221 |
+
def health_check():
|
222 |
+
"""Sağlık kontrolü endpoint'i"""
|
223 |
+
try:
|
224 |
+
# Hangi donanımda çalıştığımızı belirle
|
225 |
+
if device == -1:
|
226 |
+
device_info = "CPU"
|
227 |
+
else:
|
228 |
+
gpu_name = torch.cuda.get_device_name(0)
|
229 |
+
device_info = f"GPU: {gpu_name}"
|
230 |
+
|
231 |
+
bert_models = load_pipeline.cache_info().currsize if hasattr(load_pipeline, 'cache_info') else 0
|
232 |
+
whisper_models = load_whisper_model.cache_info().currsize if hasattr(load_whisper_model, 'cache_info') else 0
|
233 |
+
|
234 |
+
return {
|
235 |
+
"status": "healthy",
|
236 |
+
"device": device_info,
|
237 |
+
"bert_models_loaded": bert_models,
|
238 |
+
"whisper_models_loaded": whisper_models,
|
239 |
+
"endpoints": ["/predict", "/whisper", "/health"]
|
240 |
+
}
|
241 |
+
except Exception as e:
|
242 |
+
return {"status": "error", "message": f"Sağlık kontrolü hatası: {str(e)}"}
|
243 |
+
|
244 |
+
# Local debug (optional)
|
245 |
+
# if __name__ == "__main__":
|
246 |
+
# import uvicorn
|
247 |
+
# uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
|
requirements.txt
CHANGED
@@ -7,4 +7,7 @@ pydantic
|
|
7 |
uvicorn
|
8 |
python-multipart
|
9 |
requests
|
10 |
-
pandas
|
|
|
|
|
|
|
|
7 |
uvicorn
|
8 |
python-multipart
|
9 |
requests
|
10 |
+
pandas
|
11 |
+
openai-whisper==20250625
|
12 |
+
typing-extensions
|
13 |
+
ffmpeg-python
|