Spaces:

mippia
/

AI-Music-Detection-FST

Running on Zero

App Files Files Community

Seonghyeon Go commited on Aug 28

Commit

e836611

1 Parent(s): 0ede85b

add spaces decorator

Browse files

Files changed (7) hide show

__pycache__/dataset_f.cpython-312.pyc +0 -0
__pycache__/inference.cpython-312.pyc +0 -0
__pycache__/model.cpython-312.pyc +0 -0
__pycache__/networks.cpython-312.pyc +0 -0
__pycache__/preprocess.cpython-312.pyc +0 -0
app.py +102 -40
requirements.txt +2 -1

__pycache__/dataset_f.cpython-312.pyc ADDED Viewed

Binary file (8.74 kB). View file

__pycache__/inference.cpython-312.pyc ADDED Viewed

Binary file (9.82 kB). View file

__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (52.7 kB). View file

__pycache__/networks.cpython-312.pyc ADDED Viewed

Binary file (25.7 kB). View file

__pycache__/preprocess.cpython-312.pyc ADDED Viewed

Binary file (8.89 kB). View file

app.py CHANGED Viewed

@@ -3,40 +3,98 @@ import torch
 import librosa
 import numpy as np
 from inference import inference
-def detect_ai_audio(audio_file):
     """
-    Detect whether the uploaded audio file was generated by AI
     """
-    result = inference(audio_file)
-    print(result)
-    # Format result with better styling
-    if "AI" in str(result).upper() or "artificial" in str(result).lower():
-        status = "AI Generated"
-        color = "#ff6b6b"
-    else:
-        status = "Human Generated"
-        color = "#51cf66"
-    formatted_result = f"""
-    <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, {color}22, {color}11);">
-        <div style="font-size: 24px; font-weight: bold; color: {color}; margin-bottom: 8px;">{status}</div>
-        <div style="font-size: 16px; color: #666;">Analysis Result: {result}</div>
-    </div>
-    """
-    return formatted_result
-# 커스텀 CSS
 custom_css = """
-/* 전체 배경 그라디언트 */
 .gradio-container {
     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
     min-height: 100vh;
 }
-/* 메인 컨테이너 스타일링 */
 .main-container {
     background: rgba(255, 255, 255, 0.95) !important;
     backdrop-filter: blur(10px) !important;
@@ -46,7 +104,7 @@ custom_css = """
     padding: 30px !important;
 }
-/* 제목 스타일링 */
 h1 {
     background: linear-gradient(135deg, #667eea, #764ba2) !important;
     -webkit-background-clip: text !important;
@@ -55,10 +113,9 @@ h1 {
     font-size: 3em !important;
     font-weight: 800 !important;
     margin-bottom: 10px !important;
-    text-shadow: 2px 2px 4px rgba(0,0,0,0.1) !important;
 }
-/* 설명 텍스트 */
 .gradio-markdown p {
     text-align: center !important;
     font-size: 1.2em !important;
@@ -66,7 +123,7 @@ h1 {
     margin-bottom: 30px !important;
 }
-/* 오디오 업로드 컴포넌트 */
 .upload-container {
     background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
     border-radius: 15px !important;
@@ -81,7 +138,7 @@ h1 {
     box-shadow: 0 15px 40px rgba(240, 147, 251, 0.4) !important;
 }
-/* 결과 출력 영역 */
 .output-container {
     background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%) !important;
     border-radius: 15px !important;
@@ -91,16 +148,7 @@ h1 {
     min-height: 150px !important;
 }
-/* 예시 파일 섹션 */
-.examples-container {
-    background: rgba(255, 255, 255, 0.7) !important;
-    border-radius: 15px !important;
-    padding: 20px !important;
-    margin-top: 30px !important;
-    box-shadow: 0 5px 15px rgba(0,0,0,0.08) !important;
-}
-/* 버튼 스타일링 */
 .gr-button {
     background: linear-gradient(135deg, #667eea, #764ba2) !important;
     border: none !important;
@@ -117,7 +165,7 @@ h1 {
     box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
 }
-/* 애니메이션 추가 */
 @keyframes fadeInUp {
     from {
         opacity: 0;
@@ -133,7 +181,7 @@ h1 {
     animation: fadeInUp 0.8s ease-out !important;
 }
-/* 반응형 디자인 */
 @media (max-width: 768px) {
     h1 {
         font-size: 2em !important;
@@ -146,7 +194,20 @@ h1 {
 }
 """
-# Gradio 인터페이스 생성
 demo = gr.Interface(
     fn=detect_ai_audio,
     inputs=gr.Audio(
@@ -163,7 +224,8 @@ demo = gr.Interface(
     <div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;">
         <p><strong>Advanced AI technology</strong> to accurately detect whether uploaded audio was generated by AI!</p>
         <p>Supported formats: MP3, WAV, M4A, FLAC and various audio formats</p>
-        <p>Fast and accurate real-time analysis</p>
     </div>
     """,
     examples=[

 import librosa
 import numpy as np
 from inference import inference
+from huggingface_hub import hf_hub_download
+import os
+from pathlib import Path
+import spaces
+def download_models_from_hub():
     """
+    Download model checkpoints from Hugging Face Model Hub
     """
+    model_dir = Path("checkpoints")
+    model_dir.mkdir(exist_ok=True)
+    # Original checkpoint filenames on HF Hub
+    models = {
+        "main": "EmbeddingModel_MERT_768-epoch=0073-val_loss=0.1058-val_acc=0.9585-val_f1=0.9366-val_precision=0.9936-val_recall=0.8857.ckpt",
+        "backup": "step=007000-val_loss=0.1831-val_acc=0.9278.ckpt"
+    }
+    downloaded_models = {}
+    for model_name, filename in models.items():
+        local_path = model_dir / filename
+        if not local_path.exists():
+            print(f"📥 Downloading {model_name} model from Hugging Face Hub...")
+            model_path = hf_hub_download(
+                repo_id="mippia/FST-checkpoints",
+                filename=filename,
+                local_dir=str(model_dir),
+                local_dir_use_symlinks=False
+            )
+            print(f"✅ {model_name} model downloaded successfully!")
+            downloaded_models[model_name] = str(local_path)
+        else:
+            print(f"✅ {model_name} model already exists locally")
+            downloaded_models[model_name] = str(local_path)
+    return downloaded_models
+@spaces.GPU
+def detect_ai_audio(audio_file):
+    """
+    Detect whether the uploaded audio file was generated by AI
+    """
+    if audio_file is None:
+        return """
+        <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);">
+            <div style="font-size: 18px; color: #ff6b6b;">⚠️ Please upload an audio file</div>
+        </div>
+        """
+    try:
+        result = inference(audio_file)
+        # Format result with better styling
+        if "AI" in str(result).upper() or "artificial" in str(result).lower() or "fake" in str(result).lower():
+            status = "AI Generated"
+            color = "#ff6b6b"
+            confidence = "High confidence this audio was generated by AI"
+        else:
+            status = "Human Generated"
+            color = "#51cf66"
+            confidence = "High confidence this audio was created by humans"
+        formatted_result = f"""
+        <div style="text-align: center; padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {color}22, {color}11); border: 2px solid {color}33;">
+            <div style="font-size: 28px; font-weight: bold; color: {color}; margin-bottom: 10px;">{status}</div>
+            <div style="font-size: 16px; color: #666; margin-bottom: 8px;">{confidence}</div>
+            <div style="font-size: 14px; color: #888;">Raw output: {result}</div>
+        </div>
+        """
+        return formatted_result
+    except Exception as e:
+        error_result = f"""
+        <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);">
+            <div style="font-size: 20px; font-weight: bold; color: #ff6b6b; margin-bottom: 8px;">Error</div>
+            <div style="font-size: 14px; color: #666;">Failed to process audio: {str(e)}</div>
+        </div>
+        """
+        return error_result
+# Custom CSS for modern design
 custom_css = """
+/* Global background gradient */
 .gradio-container {
     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
     min-height: 100vh;
 }
+/* Main container styling */
 .main-container {
     background: rgba(255, 255, 255, 0.95) !important;
     backdrop-filter: blur(10px) !important;
     padding: 30px !important;
 }
+/* Title styling */
 h1 {
     background: linear-gradient(135deg, #667eea, #764ba2) !important;
     -webkit-background-clip: text !important;
     font-size: 3em !important;
     font-weight: 800 !important;
     margin-bottom: 10px !important;
 }
+/* Description text */
 .gradio-markdown p {
     text-align: center !important;
     font-size: 1.2em !important;
     margin-bottom: 30px !important;
 }
+/* Audio upload component */
 .upload-container {
     background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
     border-radius: 15px !important;
     box-shadow: 0 15px 40px rgba(240, 147, 251, 0.4) !important;
 }
+/* Output container */
 .output-container {
     background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%) !important;
     border-radius: 15px !important;
     min-height: 150px !important;
 }
+/* Button styling */
 .gr-button {
     background: linear-gradient(135deg, #667eea, #764ba2) !important;
     border: none !important;
     box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
 }
+/* Animation */
 @keyframes fadeInUp {
     from {
         opacity: 0;
     animation: fadeInUp 0.8s ease-out !important;
 }
+/* Responsive design */
 @media (max-width: 768px) {
     h1 {
         font-size: 2em !important;
 }
 """
+# Initialize the app
+print("🚀 Starting FST AI Audio Detection App...")
+print("📦 Initializing models...")
+# Download models at startup
+models = download_models_from_hub()
+# Check if main model is available
+if models.get("main"):
+    print("✅ Main model ready for inference")
+else:
+    print("⚠️ Warning: Main model not available, app may not work properly")
+# Create Gradio interface
 demo = gr.Interface(
     fn=detect_ai_audio,
     inputs=gr.Audio(
     <div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;">
         <p><strong>Advanced AI technology</strong> to accurately detect whether uploaded audio was generated by AI!</p>
         <p>Supported formats: MP3, WAV, M4A, FLAC and various audio formats</p>
+        <p>Powered by Fusion Segment Transformer (FST) - ISMIR 2025</p>
+        <p style="font-size: 0.9em; color: #777;">🔬 Research-grade accuracy with MERT-768 backbone</p>
     </div>
     """,
     examples=[

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ numpy>=1.24.0
 scipy>=1.10.0
 soundfile>=0.12.0
 datasets>=2.0.0
-accelerate>=0.20.0

 scipy>=1.10.0
 soundfile>=0.12.0
 datasets>=2.0.0
+accelerate>=0.20.0
+spaces