eraydikyologlu commited on
Commit
e00482a
·
1 Parent(s): b93d19e

Edu-BERT API'si için ana uygulama dosyası eklendi. Model yükleme, tahmin ve video transkripsiyonu için gerekli endpoint'ler oluşturuldu. Ayrıca, requirements.txt dosyasına yeni bağımlılıklar eklendi.

Browse files
Files changed (2) hide show
  1. main-videopluskazanim.py +247 -0
  2. requirements.txt +4 -1
main-videopluskazanim.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py - Hugging Face Spaces API: ders_id -> model mapping -> batch inference -> kazanımID
2
+ # Requirements (requirements.txt):
3
+ # fastapi transformers torch pydantic uvicorn tensorflow
4
+ #
5
+ # Directory layout within Space repo:
6
+ # - main.py (this file)
7
+ # - model_mapping.json
8
+ # - kazanim_id_konu_isim_dict_list.py
9
+ #
10
+ # 📌 Endpoints:
11
+ # POST /predict {"model_name": "eraydikyologlu/bert_ayt_matematik", "inputs": ["soru1", "soru2", ...]}
12
+ # → {"model": "...", "results": [{"kazanım_id": "2873", "label": "LABEL_0", "score": 0.97}, ...]}
13
+
14
+ import os
15
+ import logging
16
+ logger = logging.getLogger("uvicorn")
17
+ logger.setLevel(logging.INFO)
18
+
19
+ # Hugging Face cache'ini writable dizine yönlendir
20
+ os.environ["HF_HOME"] = "/tmp/.cache/huggingface"
21
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache/huggingface"
22
+ os.environ["HF_HUB_CACHE"] = "/tmp/.cache/huggingface"
23
+
24
+ os.environ["TRANSFORMERS_VERBOSITY"] = "info"
25
+ os.environ["HF_HUB_DISABLE_BIN_TO_SAFETENSORS_CONVERSION"] = "1"
26
+ try:
27
+ import tensorflow as tf
28
+ tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
29
+ except ImportError:
30
+ pass
31
+
32
+ from fastapi import FastAPI, HTTPException, UploadFile, File
33
+ from pydantic import BaseModel, Field
34
+ from typing import List, Dict, Optional
35
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
36
+ import torch
37
+ import functools
38
+ import kazanim_id_konu_isim_dict_list as kazanimlar
39
+ import logging
40
+ import whisper
41
+ import tempfile
42
+
43
+ app = FastAPI(title="Edu-BERT Multi‑Model API")
44
+
45
+ # Hugging Face Space CPU kullandığı için device -1 (CPU)
46
+ device = 0 if torch.cuda.is_available() else -1
47
+
48
+ print(f"🧠 torch: {torch.__version__}, cuda available: {torch.cuda.is_available()}")
49
+
50
+ if torch.cuda.is_available():
51
+ print(f"🚀 CUDA device name: {torch.cuda.get_device_name(0)}")
52
+ else:
53
+ print("⚠️ CUDA not available, using CPU.")
54
+
55
+ # ---------- Pydantic Schemas ---------- #
56
+ class PredictRequest(BaseModel):
57
+ model_name: str = Field(..., description="Model adı (örn: eraydikyologlu/bert_ayt_matematik)")
58
+ inputs: List[str] = Field(..., description="Soru metinleri listesi")
59
+
60
+ class WhisperRequest(BaseModel):
61
+ model_name: str = Field(default="small", description="Whisper model adı (tiny, base, small, medium, large)")
62
+ language: str = Field(default="Turkish", description="Dil")
63
+ batch_size: int = Field(default=8, description="Batch boyutu")
64
+
65
+ class QuestionResult(BaseModel):
66
+ label: str
67
+ score: float
68
+
69
+ class VideoResult(BaseModel):
70
+ id: str
71
+ text: str
72
+
73
+ class PredictResponse(BaseModel):
74
+ model: str
75
+ results: List[QuestionResult]
76
+
77
+ class WhisperResponse(BaseModel):
78
+ model: str
79
+ results: List[VideoResult]
80
+
81
+ # ---------- Helpers ---------- #
82
+
83
+ @functools.lru_cache(maxsize=8)
84
+ def load_pipeline(model_name: str):
85
+ """Model pipeline yükleme - minimal approach"""
86
+ try:
87
+ print(f"Model yükleniyor: {model_name}")
88
+ #base_tok = "umutarpayy/tyt_turkce_bert"
89
+ #model_name = "eraydikyologlu/tyt_turkce_bert_pt"
90
+ # EXACTLY like your working local code - NO extra parameters
91
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
92
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
93
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
94
+
95
+ print(f"Model başarıyla yüklendi: {model_name}")
96
+ return classifier
97
+
98
+ except Exception as e:
99
+ print(f"Model yükleme hatası ({model_name}): {e}")
100
+ raise HTTPException(status_code=500, detail=f"Model yükleme hatası: {str(e)}")
101
+
102
+ @functools.lru_cache(maxsize=4)
103
+ def load_whisper_model(model_name: str):
104
+ """Whisper model yükleme"""
105
+ try:
106
+ print(f"Whisper modeli yükleniyor: {model_name}")
107
+ model = whisper.load_model(model_name)
108
+ print(f"Whisper modeli başarıyla yüklendi: {model_name}")
109
+ return model
110
+
111
+ except Exception as e:
112
+ print(f"Whisper model yükleme hatası ({model_name}): {e}")
113
+ raise HTTPException(status_code=500, detail=f"Whisper model yükleme hatası: {str(e)}")
114
+
115
+ import time, logging, sys
116
+ logging.basicConfig(stream=sys.stdout,
117
+ level=logging.INFO,
118
+ format="%(asctime)s %(levelname)s %(message)s")
119
+
120
+ @app.post("/predict", response_model=PredictResponse)
121
+ async def predict(req: PredictRequest):
122
+ t0 = time.time()
123
+ print(f"new request /model = {req.model_name} / n = {len(req.inputs)}")
124
+ """Ana endpoint - model_name ile inference"""
125
+ try:
126
+ if not req.inputs:
127
+ raise HTTPException(status_code=400, detail="inputs boş olamaz")
128
+
129
+ # Pipeline yükle
130
+ classifier = load_pipeline(req.model_name)
131
+
132
+ # Batch işleme
133
+ outputs = classifier(req.inputs, truncation=True, padding=True, batch_size=8)
134
+ dt = time.time() - t0
135
+ print(f"✅ done | took {dt:.2f}s")
136
+ results = []
137
+ for out in outputs:
138
+ label = out["label"]
139
+ score = float(out["score"])
140
+
141
+
142
+ results.append(QuestionResult(
143
+ label=label,
144
+ score=score
145
+ ))
146
+
147
+ print(f"Tamamlandı: {len(results)} sonuç")
148
+ return PredictResponse(model=req.model_name, results=results)
149
+
150
+ except Exception as e:
151
+ print(f"Hata: {e}")
152
+ import traceback
153
+ traceback.print_exc()
154
+ raise HTTPException(status_code=500, detail=f"Hata: {str(e)}")
155
+
156
+ @app.post("/whisper", response_model=WhisperResponse)
157
+ async def transcribe_videos(files: List[UploadFile] = File(...),
158
+ model_name: str = "small",
159
+ language: str = "Turkish"):
160
+ """Video dosyalarını metne çevir"""
161
+ t0 = time.time()
162
+ print(f"new whisper request /model = {model_name} / n = {len(files)}")
163
+
164
+ try:
165
+ if not files:
166
+ raise HTTPException(status_code=400, detail="Video dosyaları boş olamaz")
167
+
168
+ # Whisper modelini yükle
169
+ model = load_whisper_model(model_name)
170
+
171
+ results = []
172
+
173
+ for file in files:
174
+ if not file.filename.lower().endswith(('.mp4', '.wav', '.mp3', '.m4a', '.flac')):
175
+ continue
176
+
177
+ # Geçici dosya oluştur
178
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
179
+ content = await file.read()
180
+ temp_file.write(content)
181
+ temp_file_path = temp_file.name
182
+
183
+ try:
184
+ # Whisper ile transkript et
185
+ result = model.transcribe(temp_file_path, language=language.lower(), verbose=False)
186
+ text = result['text'].strip()
187
+
188
+ results.append(VideoResult(
189
+ id=file.filename,
190
+ text=text
191
+ ))
192
+
193
+ except Exception as e:
194
+ print(f"Video işleme hatası ({file.filename}): {e}")
195
+ results.append(VideoResult(
196
+ id=file.filename,
197
+ text=""
198
+ ))
199
+ finally:
200
+ # Geçici dosyayı temizle
201
+ if os.path.exists(temp_file_path):
202
+ os.unlink(temp_file_path)
203
+
204
+ dt = time.time() - t0
205
+ print(f"✅ Whisper done | took {dt:.2f}s")
206
+ print(f"Tamamlandı: {len(results)} video transkript edildi")
207
+
208
+ return WhisperResponse(model=model_name, results=results)
209
+
210
+ except Exception as e:
211
+ print(f"Whisper Hatası: {e}")
212
+ import traceback
213
+ traceback.print_exc()
214
+ raise HTTPException(status_code=500, detail=f"Whisper Hatası: {str(e)}")
215
+
216
+ @app.get("/")
217
+ def root():
218
+ return {"status": "ok", "message": "Edu-BERT API çalışıyor"}
219
+
220
+ @app.get("/health")
221
+ def health_check():
222
+ """Sağlık kontrolü endpoint'i"""
223
+ try:
224
+ # Hangi donanımda çalıştığımızı belirle
225
+ if device == -1:
226
+ device_info = "CPU"
227
+ else:
228
+ gpu_name = torch.cuda.get_device_name(0)
229
+ device_info = f"GPU: {gpu_name}"
230
+
231
+ bert_models = load_pipeline.cache_info().currsize if hasattr(load_pipeline, 'cache_info') else 0
232
+ whisper_models = load_whisper_model.cache_info().currsize if hasattr(load_whisper_model, 'cache_info') else 0
233
+
234
+ return {
235
+ "status": "healthy",
236
+ "device": device_info,
237
+ "bert_models_loaded": bert_models,
238
+ "whisper_models_loaded": whisper_models,
239
+ "endpoints": ["/predict", "/whisper", "/health"]
240
+ }
241
+ except Exception as e:
242
+ return {"status": "error", "message": f"Sağlık kontrolü hatası: {str(e)}"}
243
+
244
+ # Local debug (optional)
245
+ # if __name__ == "__main__":
246
+ # import uvicorn
247
+ # uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
requirements.txt CHANGED
@@ -7,4 +7,7 @@ pydantic
7
  uvicorn
8
  python-multipart
9
  requests
10
- pandas
 
 
 
 
7
  uvicorn
8
  python-multipart
9
  requests
10
+ pandas
11
+ openai-whisper==20250625
12
+ typing-extensions
13
+ ffmpeg-python