Spaces:

nsfwalex
/

whisper-transcribe-new

Running on Zero

App Files Files Community

liuyang commited on Jul 5

Commit

1656d98

1 Parent(s): 233e4b4

transcribe by word

Browse files

Files changed (1) hide show

app.py +1 -123

app.py CHANGED Viewed

@@ -36,125 +36,9 @@ pipe = pipeline(
     torch_dtype=torch.float16,
     device="cuda",
     model_kwargs={"attn_implementation": "flash_attention_2"},
-    return_timestamps=True,
 )
-def comprehensive_flash_attention_verification():
-    """Comprehensive verification of flash attention setup"""
-    print("🔍 Running Flash Attention Verification...")
-    print("=" * 50)
-    verification_results = {}
-    # Check 1: Package Installation
-    print("🔍 Checking Python packages...")
-    try:
-        import flash_attn
-        print(f"✅ flash-attn: {flash_attn.__version__}")
-        verification_results["flash_attn_installed"] = True
-    except ImportError:
-        print("❌ flash-attn: Not installed")
-        verification_results["flash_attn_installed"] = False
-    try:
-        import transformers
-        print(f"✅ transformers: {transformers.__version__}")
-        verification_results["transformers_available"] = True
-    except ImportError:
-        print("❌ transformers: Not installed")
-        verification_results["transformers_available"] = False
-    # Check 2: CUDA Availability
-    print("\n🔍 Checking CUDA availability...")
-    cuda_available = torch.cuda.is_available()
-    print(f"✅ CUDA available: {cuda_available}")
-    if cuda_available:
-        print(f"✅ CUDA version: {torch.version.cuda}")
-        print(f"✅ GPU count: {torch.cuda.device_count()}")
-        for i in range(torch.cuda.device_count()):
-            print(f"✅ GPU {i}: {torch.cuda.get_device_name(i)}")
-    verification_results["cuda_available"] = cuda_available
-    # Check 3: Flash Attention Import
-    print("\n🔍 Testing flash attention imports...")
-    try:
-        from flash_attn import flash_attn_func
-        print("✅ flash_attn_func imported successfully")
-        if flash_attn_func is None:
-            print("❌ flash_attn_func is None")
-            verification_results["flash_attn_import"] = False
-        else:
-            print("✅ flash_attn_func is callable")
-            verification_results["flash_attn_import"] = True
-    except ImportError as e:
-        print(f"❌ Import error: {e}")
-        verification_results["flash_attn_import"] = False
-    except Exception as e:
-        print(f"❌ Unexpected error: {e}")
-        verification_results["flash_attn_import"] = False
-    # Check 4: Flash Attention Functionality Test
-    print("\n🔍 Testing flash attention functionality...")
-    if not cuda_available:
-        print("⚠️ Skipping functionality test - CUDA not available")
-        verification_results["flash_attn_functional"] = False
-    elif not verification_results.get("flash_attn_import", False):
-        print("⚠️ Skipping functionality test - Import failed")
-        verification_results["flash_attn_functional"] = False
-    else:
-        try:
-            from flash_attn import flash_attn_func
-            # Create small dummy tensors
-            batch_size, seq_len, num_heads, head_dim = 1, 16, 4, 32
-            device = "cuda:0"
-            dtype = torch.float16
-            print(f"Creating tensors: batch={batch_size}, seq_len={seq_len}, heads={num_heads}, dim={head_dim}")
-            q = torch.randn(batch_size, seq_len, num_heads, head_dim, dtype=dtype, device=device)
-            k = torch.randn(batch_size, seq_len, num_heads, head_dim, dtype=dtype, device=device)
-            v = torch.randn(batch_size, seq_len, num_heads, head_dim, dtype=dtype, device=device)
-            print("✅ Tensors created successfully")
-            # Test flash attention
-            output = flash_attn_func(q, k, v, dropout_p=0.0, causal=False)
-            print(f"✅ Flash attention output shape: {output.shape}")
-            print("✅ Flash attention test passed!")
-            verification_results["flash_attn_functional"] = True
-        except Exception as e:
-            print(f"❌ Flash attention test failed: {e}")
-            import traceback
-            traceback.print_exc()
-            verification_results["flash_attn_functional"] = False
-    # Summary
-    print("\n" + "=" * 50)
-    print("📊 VERIFICATION SUMMARY")
-    print("=" * 50)
-    all_passed = True
-    for check_name, result in verification_results.items():
-        status = "✅ PASS" if result else "❌ FAIL"
-        print(f"{check_name}: {status}")
-        if not result:
-            all_passed = False
-    if all_passed:
-        print("\n🎉 All checks passed! Flash attention should work.")
-        return True
-    else:
-        print("\n⚠️ Some checks failed. Flash attention may not work properly.")
-        print("\nRecommendations:")
-        print("1. Try reinstalling flash-attn: pip uninstall flash-attn && pip install flash-attn --no-build-isolation")
-        print("2. Check CUDA compatibility with your PyTorch version")
-        print("3. Consider using default attention as fallback")
-        return False
 class WhisperTranscriber:
     def __init__(self):
         self.pipe = pipe  # Use global pipeline
@@ -207,12 +91,6 @@ class WhisperTranscriber:
     def transcribe_audio(self, audio_path, language=None, translate=False, prompt=None):
         """Transcribe audio using Whisper with flash attention"""
-        # Run comprehensive flash attention verification
-        #flash_attention_working = comprehensive_flash_attention_verification()
-        #if not flash_attention_working:
-        #    print("⚠️ Flash attention verification failed, but proceeding with transcription...")
-        #    print("You may encounter the TypeError: 'NoneType' object is not callable error")
         '''
         #if self.pipe is None:
         #    self.setup_models()

     torch_dtype=torch.float16,
     device="cuda",
     model_kwargs={"attn_implementation": "flash_attention_2"},
+    return_timestamps="word",
 )
 class WhisperTranscriber:
     def __init__(self):
         self.pipe = pipe  # Use global pipeline
     def transcribe_audio(self, audio_path, language=None, translate=False, prompt=None):
         """Transcribe audio using Whisper with flash attention"""
         '''
         #if self.pipe is None:
         #    self.setup_models()