Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import librosa | |
| import numpy as np | |
| from inference import inference | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| from pathlib import Path | |
| import spaces | |
| def download_models_from_hub(): | |
| """ | |
| Download model checkpoints from Hugging Face Model Hub | |
| """ | |
| model_dir = Path("checkpoints") | |
| model_dir.mkdir(exist_ok=True) | |
| # Original checkpoint filenames on HF Hub | |
| models = { | |
| "main": "EmbeddingModel_MERT_768-epoch=0073-val_loss=0.1058-val_acc=0.9585-val_f1=0.9366-val_precision=0.9936-val_recall=0.8857.ckpt", | |
| "backup": "step=007000-val_loss=0.1831-val_acc=0.9278.ckpt" | |
| } | |
| downloaded_models = {} | |
| for model_name, filename in models.items(): | |
| local_path = model_dir / filename | |
| if not local_path.exists(): | |
| print(f"π₯ Downloading {model_name} model from Hugging Face Hub...") | |
| model_path = hf_hub_download( | |
| repo_id="mippia/FST-checkpoints", | |
| filename=filename, | |
| local_dir=str(model_dir), | |
| local_dir_use_symlinks=False | |
| ) | |
| print(f"β {model_name} model downloaded successfully!") | |
| downloaded_models[model_name] = str(local_path) | |
| else: | |
| print(f"β {model_name} model already exists locally") | |
| downloaded_models[model_name] = str(local_path) | |
| return downloaded_models | |
| def detect_ai_audio(audio_file): | |
| """ | |
| Detect whether the uploaded audio file was generated by AI | |
| """ | |
| if audio_file is None: | |
| return """ | |
| <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);"> | |
| <div style="font-size: 18px; color: #ff6b6b;">β οΈ Please upload an audio file</div> | |
| </div> | |
| """ | |
| try: | |
| result = inference(audio_file) | |
| # Format result with better styling | |
| if "AI" in str(result).upper() or "artificial" in str(result).lower() or "fake" in str(result).lower(): | |
| status = "AI Generated" | |
| color = "#ff6b6b" | |
| confidence = "High confidence this audio was generated by AI" | |
| else: | |
| status = "Human Generated" | |
| color = "#51cf66" | |
| confidence = "High confidence this audio was created by humans" | |
| formatted_result = f""" | |
| <div style="text-align: center; padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {color}22, {color}11); border: 2px solid {color}33;"> | |
| <div style="font-size: 28px; font-weight: bold; color: {color}; margin-bottom: 10px;">{status}</div> | |
| <div style="font-size: 16px; color: #666; margin-bottom: 8px;">{confidence}</div> | |
| <div style="font-size: 14px; color: #888;">Raw output: {result}</div> | |
| </div> | |
| """ | |
| return formatted_result | |
| except Exception as e: | |
| error_result = f""" | |
| <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);"> | |
| <div style="font-size: 20px; font-weight: bold; color: #ff6b6b; margin-bottom: 8px;">Error</div> | |
| <div style="font-size: 14px; color: #666;">Failed to process audio: {str(e)}</div> | |
| </div> | |
| """ | |
| return error_result | |
| # Custom CSS for modern design | |
| custom_css = """ | |
| /* Global background gradient */ | |
| .gradio-container { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
| min-height: 100vh; | |
| } | |
| /* Main container styling */ | |
| .main-container { | |
| background: rgba(255, 255, 255, 0.95) !important; | |
| backdrop-filter: blur(10px) !important; | |
| border-radius: 20px !important; | |
| box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important; | |
| margin: 20px !important; | |
| padding: 30px !important; | |
| } | |
| /* Title styling */ | |
| h1 { | |
| background: linear-gradient(135deg, #667eea, #764ba2) !important; | |
| -webkit-background-clip: text !important; | |
| -webkit-text-fill-color: transparent !important; | |
| text-align: center !important; | |
| font-size: 3em !important; | |
| font-weight: 800 !important; | |
| margin-bottom: 10px !important; | |
| } | |
| /* Description text */ | |
| .gradio-markdown p { | |
| text-align: center !important; | |
| font-size: 1.2em !important; | |
| color: #555 !important; | |
| margin-bottom: 30px !important; | |
| } | |
| /* Audio upload component */ | |
| .upload-container { | |
| background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important; | |
| border-radius: 15px !important; | |
| padding: 20px !important; | |
| border: none !important; | |
| box-shadow: 0 10px 30px rgba(240, 147, 251, 0.3) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .upload-container:hover { | |
| transform: translateY(-5px) !important; | |
| box-shadow: 0 15px 40px rgba(240, 147, 251, 0.4) !important; | |
| } | |
| /* Output container */ | |
| .output-container { | |
| background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%) !important; | |
| border-radius: 15px !important; | |
| padding: 20px !important; | |
| border: none !important; | |
| box-shadow: 0 10px 30px rgba(168, 237, 234, 0.3) !important; | |
| min-height: 150px !important; | |
| } | |
| /* Button styling */ | |
| .gr-button { | |
| background: linear-gradient(135deg, #667eea, #764ba2) !important; | |
| border: none !important; | |
| border-radius: 25px !important; | |
| padding: 12px 30px !important; | |
| font-weight: 600 !important; | |
| color: white !important; | |
| box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .gr-button:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important; | |
| } | |
| /* Animation */ | |
| @keyframes fadeInUp { | |
| from { | |
| opacity: 0; | |
| transform: translateY(30px); | |
| } | |
| to { | |
| opacity: 1; | |
| transform: translateY(0); | |
| } | |
| } | |
| .gradio-container > div { | |
| animation: fadeInUp 0.8s ease-out !important; | |
| } | |
| /* Responsive design */ | |
| @media (max-width: 768px) { | |
| h1 { | |
| font-size: 2em !important; | |
| } | |
| .main-container { | |
| margin: 10px !important; | |
| padding: 20px !important; | |
| } | |
| } | |
| """ | |
| # Initialize the app | |
| print("π Starting FST AI Audio Detection App...") | |
| print("π¦ Initializing models...") | |
| # Download models at startup | |
| models = download_models_from_hub() | |
| # Check if main model is available | |
| if models.get("main"): | |
| print("β Main model ready for inference") | |
| else: | |
| print("β οΈ Warning: Main model not available, app may not work properly") | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=detect_ai_audio, | |
| inputs=gr.Audio( | |
| type="filepath", | |
| label="Upload Audio File", | |
| elem_classes=["upload-container"] | |
| ), | |
| outputs=gr.HTML( | |
| label="Detection Result", | |
| elem_classes=["output-container"] | |
| ), | |
| title="AI Audio Detector", | |
| description=""" | |
| <div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;"> | |
| <p><strong>Advanced AI technology</strong> to accurately detect whether uploaded audio was generated by AI!</p> | |
| <p>Supported formats: MP3, WAV, M4A, FLAC and various audio formats</p> | |
| <p>Powered by Fusion Segment Transformer (FST) - ISMIR 2025</p> | |
| <p style="font-size: 0.9em; color: #777;">π¬ Research-grade accuracy with MERT-768 backbone</p> | |
| </div> | |
| """, | |
| examples=[ | |
| ["example-ncs-light it up(human).mp3"], | |
| ["example-Strumming Heartbeats(suno v4).mp3"] | |
| ], | |
| css=custom_css, | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="purple", | |
| neutral_hue="gray", | |
| font=[gr.themes.GoogleFont("Inter"), "Arial", "sans-serif"] | |
| ), | |
| elem_classes=["main-container"] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| show_api=False, | |
| show_error=True | |
| ) |