Seonghyeon Go commited on
Commit
e836611
Β·
1 Parent(s): 0ede85b

add spaces decorator

Browse files
__pycache__/dataset_f.cpython-312.pyc ADDED
Binary file (8.74 kB). View file
 
__pycache__/inference.cpython-312.pyc ADDED
Binary file (9.82 kB). View file
 
__pycache__/model.cpython-312.pyc ADDED
Binary file (52.7 kB). View file
 
__pycache__/networks.cpython-312.pyc ADDED
Binary file (25.7 kB). View file
 
__pycache__/preprocess.cpython-312.pyc ADDED
Binary file (8.89 kB). View file
 
app.py CHANGED
@@ -3,40 +3,98 @@ import torch
3
  import librosa
4
  import numpy as np
5
  from inference import inference
 
 
 
 
6
 
7
- def detect_ai_audio(audio_file):
8
  """
9
- Detect whether the uploaded audio file was generated by AI
10
  """
11
- result = inference(audio_file)
12
- print(result)
13
 
14
- # Format result with better styling
15
- if "AI" in str(result).upper() or "artificial" in str(result).lower():
16
- status = "AI Generated"
17
- color = "#ff6b6b"
18
- else:
19
- status = "Human Generated"
20
- color = "#51cf66"
21
 
22
- formatted_result = f"""
23
- <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, {color}22, {color}11);">
24
- <div style="font-size: 24px; font-weight: bold; color: {color}; margin-bottom: 8px;">{status}</div>
25
- <div style="font-size: 16px; color: #666;">Analysis Result: {result}</div>
26
- </div>
27
- """
28
 
29
- return formatted_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # μ»€μŠ€ν…€ CSS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  custom_css = """
33
- /* 전체 λ°°κ²½ κ·ΈλΌλ””μ–ΈνŠΈ */
34
  .gradio-container {
35
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
36
  min-height: 100vh;
37
  }
38
 
39
- /* 메인 μ»¨ν…Œμ΄λ„ˆ μŠ€νƒ€μΌλ§ */
40
  .main-container {
41
  background: rgba(255, 255, 255, 0.95) !important;
42
  backdrop-filter: blur(10px) !important;
@@ -46,7 +104,7 @@ custom_css = """
46
  padding: 30px !important;
47
  }
48
 
49
- /* 제λͺ© μŠ€νƒ€μΌλ§ */
50
  h1 {
51
  background: linear-gradient(135deg, #667eea, #764ba2) !important;
52
  -webkit-background-clip: text !important;
@@ -55,10 +113,9 @@ h1 {
55
  font-size: 3em !important;
56
  font-weight: 800 !important;
57
  margin-bottom: 10px !important;
58
- text-shadow: 2px 2px 4px rgba(0,0,0,0.1) !important;
59
  }
60
 
61
- /* μ„€λͺ… ν…μŠ€νŠΈ */
62
  .gradio-markdown p {
63
  text-align: center !important;
64
  font-size: 1.2em !important;
@@ -66,7 +123,7 @@ h1 {
66
  margin-bottom: 30px !important;
67
  }
68
 
69
- /* μ˜€λ””μ˜€ μ—…λ‘œλ“œ μ»΄ν¬λ„ŒνŠΈ */
70
  .upload-container {
71
  background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
72
  border-radius: 15px !important;
@@ -81,7 +138,7 @@ h1 {
81
  box-shadow: 0 15px 40px rgba(240, 147, 251, 0.4) !important;
82
  }
83
 
84
- /* κ²°κ³Ό 좜λ ₯ μ˜μ—­ */
85
  .output-container {
86
  background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%) !important;
87
  border-radius: 15px !important;
@@ -91,16 +148,7 @@ h1 {
91
  min-height: 150px !important;
92
  }
93
 
94
- /* μ˜ˆμ‹œ 파일 μ„Ήμ…˜ */
95
- .examples-container {
96
- background: rgba(255, 255, 255, 0.7) !important;
97
- border-radius: 15px !important;
98
- padding: 20px !important;
99
- margin-top: 30px !important;
100
- box-shadow: 0 5px 15px rgba(0,0,0,0.08) !important;
101
- }
102
-
103
- /* λ²„νŠΌ μŠ€νƒ€μΌλ§ */
104
  .gr-button {
105
  background: linear-gradient(135deg, #667eea, #764ba2) !important;
106
  border: none !important;
@@ -117,7 +165,7 @@ h1 {
117
  box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
118
  }
119
 
120
- /* μ• λ‹ˆλ©”μ΄μ…˜ μΆ”κ°€ */
121
  @keyframes fadeInUp {
122
  from {
123
  opacity: 0;
@@ -133,7 +181,7 @@ h1 {
133
  animation: fadeInUp 0.8s ease-out !important;
134
  }
135
 
136
- /* λ°˜μ‘ν˜• λ””μžμΈ */
137
  @media (max-width: 768px) {
138
  h1 {
139
  font-size: 2em !important;
@@ -146,7 +194,20 @@ h1 {
146
  }
147
  """
148
 
149
- # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  demo = gr.Interface(
151
  fn=detect_ai_audio,
152
  inputs=gr.Audio(
@@ -163,7 +224,8 @@ demo = gr.Interface(
163
  <div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;">
164
  <p><strong>Advanced AI technology</strong> to accurately detect whether uploaded audio was generated by AI!</p>
165
  <p>Supported formats: MP3, WAV, M4A, FLAC and various audio formats</p>
166
- <p>Fast and accurate real-time analysis</p>
 
167
  </div>
168
  """,
169
  examples=[
 
3
  import librosa
4
  import numpy as np
5
  from inference import inference
6
+ from huggingface_hub import hf_hub_download
7
+ import os
8
+ from pathlib import Path
9
+ import spaces
10
 
11
+ def download_models_from_hub():
12
  """
13
+ Download model checkpoints from Hugging Face Model Hub
14
  """
15
+ model_dir = Path("checkpoints")
16
+ model_dir.mkdir(exist_ok=True)
17
 
18
+ # Original checkpoint filenames on HF Hub
19
+ models = {
20
+ "main": "EmbeddingModel_MERT_768-epoch=0073-val_loss=0.1058-val_acc=0.9585-val_f1=0.9366-val_precision=0.9936-val_recall=0.8857.ckpt",
21
+ "backup": "step=007000-val_loss=0.1831-val_acc=0.9278.ckpt"
22
+ }
 
 
23
 
24
+ downloaded_models = {}
 
 
 
 
 
25
 
26
+ for model_name, filename in models.items():
27
+ local_path = model_dir / filename
28
+
29
+ if not local_path.exists():
30
+ print(f"πŸ“₯ Downloading {model_name} model from Hugging Face Hub...")
31
+ model_path = hf_hub_download(
32
+ repo_id="mippia/FST-checkpoints",
33
+ filename=filename,
34
+ local_dir=str(model_dir),
35
+ local_dir_use_symlinks=False
36
+ )
37
+ print(f"βœ… {model_name} model downloaded successfully!")
38
+ downloaded_models[model_name] = str(local_path)
39
+ else:
40
+ print(f"βœ… {model_name} model already exists locally")
41
+ downloaded_models[model_name] = str(local_path)
42
+
43
+ return downloaded_models
44
 
45
+ @spaces.GPU
46
+ def detect_ai_audio(audio_file):
47
+ """
48
+ Detect whether the uploaded audio file was generated by AI
49
+ """
50
+ if audio_file is None:
51
+ return """
52
+ <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);">
53
+ <div style="font-size: 18px; color: #ff6b6b;">⚠️ Please upload an audio file</div>
54
+ </div>
55
+ """
56
+
57
+ try:
58
+ result = inference(audio_file)
59
+
60
+ # Format result with better styling
61
+ if "AI" in str(result).upper() or "artificial" in str(result).lower() or "fake" in str(result).lower():
62
+ status = "AI Generated"
63
+ color = "#ff6b6b"
64
+ confidence = "High confidence this audio was generated by AI"
65
+ else:
66
+ status = "Human Generated"
67
+ color = "#51cf66"
68
+ confidence = "High confidence this audio was created by humans"
69
+
70
+ formatted_result = f"""
71
+ <div style="text-align: center; padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {color}22, {color}11); border: 2px solid {color}33;">
72
+ <div style="font-size: 28px; font-weight: bold; color: {color}; margin-bottom: 10px;">{status}</div>
73
+ <div style="font-size: 16px; color: #666; margin-bottom: 8px;">{confidence}</div>
74
+ <div style="font-size: 14px; color: #888;">Raw output: {result}</div>
75
+ </div>
76
+ """
77
+
78
+ return formatted_result
79
+
80
+ except Exception as e:
81
+ error_result = f"""
82
+ <div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);">
83
+ <div style="font-size: 20px; font-weight: bold; color: #ff6b6b; margin-bottom: 8px;">Error</div>
84
+ <div style="font-size: 14px; color: #666;">Failed to process audio: {str(e)}</div>
85
+ </div>
86
+ """
87
+ return error_result
88
+
89
+ # Custom CSS for modern design
90
  custom_css = """
91
+ /* Global background gradient */
92
  .gradio-container {
93
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
94
  min-height: 100vh;
95
  }
96
 
97
+ /* Main container styling */
98
  .main-container {
99
  background: rgba(255, 255, 255, 0.95) !important;
100
  backdrop-filter: blur(10px) !important;
 
104
  padding: 30px !important;
105
  }
106
 
107
+ /* Title styling */
108
  h1 {
109
  background: linear-gradient(135deg, #667eea, #764ba2) !important;
110
  -webkit-background-clip: text !important;
 
113
  font-size: 3em !important;
114
  font-weight: 800 !important;
115
  margin-bottom: 10px !important;
 
116
  }
117
 
118
+ /* Description text */
119
  .gradio-markdown p {
120
  text-align: center !important;
121
  font-size: 1.2em !important;
 
123
  margin-bottom: 30px !important;
124
  }
125
 
126
+ /* Audio upload component */
127
  .upload-container {
128
  background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
129
  border-radius: 15px !important;
 
138
  box-shadow: 0 15px 40px rgba(240, 147, 251, 0.4) !important;
139
  }
140
 
141
+ /* Output container */
142
  .output-container {
143
  background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%) !important;
144
  border-radius: 15px !important;
 
148
  min-height: 150px !important;
149
  }
150
 
151
+ /* Button styling */
 
 
 
 
 
 
 
 
 
152
  .gr-button {
153
  background: linear-gradient(135deg, #667eea, #764ba2) !important;
154
  border: none !important;
 
165
  box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
166
  }
167
 
168
+ /* Animation */
169
  @keyframes fadeInUp {
170
  from {
171
  opacity: 0;
 
181
  animation: fadeInUp 0.8s ease-out !important;
182
  }
183
 
184
+ /* Responsive design */
185
  @media (max-width: 768px) {
186
  h1 {
187
  font-size: 2em !important;
 
194
  }
195
  """
196
 
197
+ # Initialize the app
198
+ print("πŸš€ Starting FST AI Audio Detection App...")
199
+ print("πŸ“¦ Initializing models...")
200
+
201
+ # Download models at startup
202
+ models = download_models_from_hub()
203
+
204
+ # Check if main model is available
205
+ if models.get("main"):
206
+ print("βœ… Main model ready for inference")
207
+ else:
208
+ print("⚠️ Warning: Main model not available, app may not work properly")
209
+
210
+ # Create Gradio interface
211
  demo = gr.Interface(
212
  fn=detect_ai_audio,
213
  inputs=gr.Audio(
 
224
  <div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;">
225
  <p><strong>Advanced AI technology</strong> to accurately detect whether uploaded audio was generated by AI!</p>
226
  <p>Supported formats: MP3, WAV, M4A, FLAC and various audio formats</p>
227
+ <p>Powered by Fusion Segment Transformer (FST) - ISMIR 2025</p>
228
+ <p style="font-size: 0.9em; color: #777;">πŸ”¬ Research-grade accuracy with MERT-768 backbone</p>
229
  </div>
230
  """,
231
  examples=[
requirements.txt CHANGED
@@ -9,4 +9,5 @@ numpy>=1.24.0
9
  scipy>=1.10.0
10
  soundfile>=0.12.0
11
  datasets>=2.0.0
12
- accelerate>=0.20.0
 
 
9
  scipy>=1.10.0
10
  soundfile>=0.12.0
11
  datasets>=2.0.0
12
+ accelerate>=0.20.0
13
+ spaces