# main.py - Hugging Face Spaces API: ders_id -> model mapping -> batch inference -> kazanımID # Requirements (requirements.txt): # fastapi transformers torch pydantic uvicorn tensorflow # # Directory layout within Space repo: # - main.py (this file) # - model_mapping.json # - kazanim_id_konu_isim_dict_list.py # # 📌 Endpoints: # POST /predict {"model_name": "eraydikyologlu/bert_ayt_matematik", "inputs": ["soru1", "soru2", ...]} # → {"model": "...", "results": [{"kazanım_id": "2873", "label": "LABEL_0", "score": 0.97}, ...]} import os import logging logger = logging.getLogger("uvicorn") logger.setLevel(logging.INFO) # Hugging Face cache'ini writable dizine yönlendir os.environ["HF_HOME"] = "/tmp/.cache/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache/huggingface" os.environ["HF_HUB_CACHE"] = "/tmp/.cache/huggingface" os.environ["TRANSFORMERS_VERBOSITY"] = "info" os.environ["HF_HUB_DISABLE_BIN_TO_SAFETENSORS_CONVERSION"] = "1" try: import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) except ImportError: pass from fastapi import FastAPI, HTTPException from pydantic import BaseModel, Field from typing import List, Dict from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline import torch import functools import kazanim_id_konu_isim_dict_list as kazanimlar import logging app = FastAPI(title="Edu-BERT Multi‑Model API") # Hugging Face Space CPU kullandığı için device -1 (CPU) device = 0 if torch.cuda.is_available() else -1 print(f"🧠 torch: {torch.__version__}, cuda available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"🚀 CUDA device name: {torch.cuda.get_device_name(0)}") else: print("⚠️ CUDA not available, using CPU.") # ---------- Pydantic Schemas ---------- # class PredictRequest(BaseModel): model_name: str = Field(..., description="Model adı (örn: eraydikyologlu/bert_ayt_matematik)") inputs: List[str] = Field(..., description="Soru metinleri listesi") class QuestionResult(BaseModel): label: str score: float class PredictResponse(BaseModel): model: str results: List[QuestionResult] # ---------- Helpers ---------- # @functools.lru_cache(maxsize=8) def load_pipeline(model_name: str): """Model pipeline yükleme - minimal approach""" try: print(f"Model yükleniyor: {model_name}") #base_tok = "umutarpayy/tyt_turkce_bert" #model_name = "eraydikyologlu/tyt_turkce_bert_pt" # EXACTLY like your working local code - NO extra parameters tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device) print(f"Model başarıyla yüklendi: {model_name}") return classifier except Exception as e: print(f"Model yükleme hatası ({model_name}): {e}") raise HTTPException(status_code=500, detail=f"Model yükleme hatası: {str(e)}") import time, logging, sys logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") @app.post("/predict", response_model=PredictResponse) async def predict(req: PredictRequest): t0 = time.time() print(f"new request /model = {req.model_name} / n = {len(req.inputs)}") """Ana endpoint - model_name ile inference""" try: if not req.inputs: raise HTTPException(status_code=400, detail="inputs boş olamaz") # Pipeline yükle classifier = load_pipeline(req.model_name) # Batch işleme outputs = classifier(req.inputs, truncation=True, padding=True, batch_size=8) dt = time.time() - t0 print(f"✅ done | took {dt:.2f}s") results = [] for out in outputs: label = out["label"] score = float(out["score"]) results.append(QuestionResult( label=label, score=score )) print(f"Tamamlandı: {len(results)} sonuç") return PredictResponse(model=req.model_name, results=results) except Exception as e: print(f"Hata: {e}") import traceback traceback.print_exc() raise HTTPException(status_code=500, detail=f"Hata: {str(e)}") @app.get("/") def root(): return {"status": "ok", "message": "Edu-BERT API çalışıyor"} @app.get("/health") def health_check(): """Sağlık kontrolü endpoint'i""" try: # Hangi donanımda çalıştığımızı belirle if device == -1: device_info = "CPU" else: gpu_name = torch.cuda.get_device_name(0) device_info = f"GPU: {gpu_name}" return { "status": "healthy", "device": device_info, "models_loaded": len(load_pipeline.cache_info().currsize) if hasattr(load_pipeline, 'cache_info') else 0 } except Exception as e: return {"status": "error", "message": f"Sağlık kontrolü hatası: {str(e)}"} # Local debug (optional) # if __name__ == "__main__": # import uvicorn # uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)