Spaces:

ImparkAI
/

EduBert-Impark

Sleeping

App Files Files Community

eraydikyologlu commited on Aug 5

Commit

39c78ba

1 Parent(s): ece1c42

Video işleme fonksiyonuna thread güvenliği eklendi. Model kullanımını senkronize etmek için lock mekanizması kullanıldı. Hata yönetimi ve işlem sonuçları için detaylı istatistikler eklendi.

Browse files

Files changed (1) hide show

main-videopluskazanim.py +42 -15

main-videopluskazanim.py CHANGED Viewed

@@ -37,6 +37,7 @@ import torch
 import functools
 import asyncio
 import concurrent.futures
 import kazanim_id_konu_isim_dict_list as kazanimlar
 import logging
 import whisper
@@ -143,8 +144,11 @@ logging.basicConfig(stream=sys.stdout,
 # HF Space'lerde çok aggressive olmamak için worker sayısını azalttık
 executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
 async def process_single_video(file: UploadFile, model, language: str) -> VideoResult:
-    """Tek bir video dosyasını işle - paralel kullanım için"""
     if not file.filename.lower().endswith(('.mp4', '.wav', '.mp3', '.m4a', '.flac')):
         return VideoResult(id=file.filename, text="")
@@ -155,20 +159,28 @@ async def process_single_video(file: UploadFile, model, language: str) -> VideoR
         temp_file_path = temp_file.name
     def transcribe_sync():
-        """Senkron transcription - thread pool'da çalışacak - HF Space optimized"""
         try:
-            # HF Space için daha conservative ayarlar
-            result = model.transcribe(
-                temp_file_path,
-                language=language.lower(),
-                verbose=False,
-                fp16=False,  # HF Space'te daha stabil
-                temperature=0.0  # Deterministic output
-            )
-            return result['text'].strip()
         except Exception as e:
-            print(f"Video işleme hatası ({file.filename}): {e}")
             return ""
         finally:
             # Geçici dosyayı temizle
             if os.path.exists(temp_file_path):
@@ -249,23 +261,38 @@ async def transcribe_videos(files: List[UploadFile] = File(...),
             tasks = [process_single_video(file, model, language) for file in chunk]
             chunk_results = await asyncio.gather(*tasks, return_exceptions=True)
-            # Exception'ları handle et
             for j, result in enumerate(chunk_results):
                 if isinstance(result, Exception):
                     print(f"❌ Dosya {chunk[j].filename} işlenirken hata: {result}")
                     final_results.append(VideoResult(id=chunk[j].filename, text=""))
                 else:
                     final_results.append(result)
-            print(f"✅ Chunk {i//chunk_size + 1} tamamlandı!")
             # Memory'yi rahatlatmak için küçük bir bekleme (HF Space için)
             if i + chunk_size < len(files):
                 await asyncio.sleep(0.1)
         dt = time.time() - t0
         print(f"✅ Whisper PARALEL done | took {dt:.2f}s")
-        print(f"🎯 Tamamlandı: {len(final_results)} video PARALEL olarak transkript edildi")
         return WhisperResponse(model=model_name, results=final_results)

 import functools
 import asyncio
 import concurrent.futures
+import threading
 import kazanim_id_konu_isim_dict_list as kazanimlar
 import logging
 import whisper
 # HF Space'lerde çok aggressive olmamak için worker sayısını azalttık
 executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
+# Thread-safe model kullanımı için lock
+whisper_lock = threading.Lock()
 async def process_single_video(file: UploadFile, model, language: str) -> VideoResult:
+    """Tek bir video dosyasını işle - THREAD SAFE paralel kullanım için"""
     if not file.filename.lower().endswith(('.mp4', '.wav', '.mp3', '.m4a', '.flac')):
         return VideoResult(id=file.filename, text="")
         temp_file_path = temp_file.name
     def transcribe_sync():
+        """Senkron transcription - thread pool'da çalışacak - THREAD SAFE"""
         try:
+            # 🔒 THREAD SAFETY: Model kullanımını serialize et
+            with whisper_lock:
+                print(f"🔄 {file.filename}: Model kullanımı başlıyor...")
+                result = model.transcribe(
+                    temp_file_path,
+                    language=language.lower(),
+                    verbose=False
+                )
+                text = result['text'].strip()
+                # Model çıktısının bir kısmını logla (debug için)
+                preview = text[:150] + "..." if len(text) > 150 else text
+                print(f"📝 {file.filename}: {preview}")
+            return text
         except Exception as e:
+            print(f"❌ Video işleme hatası ({file.filename}): {e}")
             return ""
         finally:
             # Geçici dosyayı temizle
             if os.path.exists(temp_file_path):
             tasks = [process_single_video(file, model, language) for file in chunk]
             chunk_results = await asyncio.gather(*tasks, return_exceptions=True)
+            # Exception'ları handle et + SUCCESS/ERROR COUNT
+            success_count = 0
+            error_count = 0
             for j, result in enumerate(chunk_results):
                 if isinstance(result, Exception):
                     print(f"❌ Dosya {chunk[j].filename} işlenirken hata: {result}")
                     final_results.append(VideoResult(id=chunk[j].filename, text=""))
+                    error_count += 1
                 else:
                     final_results.append(result)
+                    # Başarılı sayısı = boş olmayan text'ler
+                    if result.text.strip():
+                        success_count += 1
+                    else:
+                        error_count += 1
+            print(f"✅ Chunk {i//chunk_size + 1} tamamlandı! ✅{success_count} ❌{error_count}")
             # Memory'yi rahatlatmak için küçük bir bekleme (HF Space için)
             if i + chunk_size < len(files):
                 await asyncio.sleep(0.1)
         dt = time.time() - t0
+        # Final istatistikler
+        total_success = len([r for r in final_results if r.text.strip()])
+        total_failed = len(final_results) - total_success
+        success_rate = (total_success / len(final_results) * 100) if final_results else 0
         print(f"✅ Whisper PARALEL done | took {dt:.2f}s")
+        print(f"🎯 SONUÇ: {len(final_results)} video | ✅{total_success} başarılı | ❌{total_failed} hatalı | 📊{success_rate:.1f}% başarı oranı")
+        print(f"⚡ Hız: {len(final_results)/dt:.1f} video/saniye (chunk_size={chunk_size})")
         return WhisperResponse(model=model_name, results=final_results)