varunkul commited on
Commit
6ecef58
·
verified ·
1 Parent(s): 3632e7a

Upload 8 files

Browse files
Files changed (8) hide show
  1. .env +2 -0
  2. .gitattributes +3 -35
  3. README.md +9 -17
  4. gen_clips.py +282 -0
  5. index.html +440 -0
  6. packages.txt +1 -0
  7. requirements.txt +28 -3
  8. streamlit_app.py +132 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ELEVEN_API_KEY=sk_a32c9d8914a5550267df4c3df132619cab9e4dbfb1de3e0c
2
+ ELEVEN_VOICE_ID=
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ .pth filter=lfs diff=lfs merge=lfs -text
2
+ .pt filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,20 +1,12 @@
1
- ---
2
- title: Voice Guard
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- license: apache-2.0
13
- ---
14
 
15
- # Welcome to Streamlit!
16
 
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
 
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ # AI Voice Detector — Human vs AI Speech (Hack-Ready)
3
 
4
+ **Goal:** Detect whether an audio clip is **AI-generated or human** in real time, with an **explainable heatmap** over the spectrogram and **(optional) provenance check** via ElevenLabs API.
5
 
6
+ ## Quickstart
7
+
8
+ ```bash
9
+ python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate
10
+ pip install -r requirements.txt
11
+ python app/app.py
12
+ ```
gen_clips.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ from app.elevenlabs_tools import generate_tts_dataset
5
+ from app.utils.convert_mp3_to_wav import mp3_to_wav16k
6
+
7
+ # --- Load API key ---
8
+ load_dotenv()
9
+ if not os.getenv("ELEVEN_API_KEY"):
10
+ raise ValueError("ELEVEN_API_KEY not found in .env file")
11
+
12
+ # --- Your ElevenLabs voices ---
13
+ VOICES = {
14
+ "Adam": "pNInz6obpgDQGcFmaJgB",
15
+ "Alice": "Xb7hH8MSUJpSbSDYk0k2",
16
+ "Aria": "9BWtsMINqrJLrRacOk9x",
17
+ "Brian": "nPczCjzI2devNBz1zQrb",
18
+ "Bill": "pqHfZKP75CvOlQylNhV4",
19
+ "Charlotte": "XB0fDUnXU5powFXDhCwa",
20
+ "Clyde": "2EiwWnXFnvU5JabPnv8n",
21
+ "Drew": "29vD33N1CtxCmqQRPOHJ",
22
+ "Freya": "jsCqWAovK2LkecY7zXl4",
23
+ "Gigi": "jBpfuIE2acCO8z3wKNLl",
24
+ }
25
+
26
+ MP3_ROOT = "data/raw/ai_mp3"
27
+ WAV_ROOT = "data/raw/ai"
28
+
29
+ # --- 200 sentences (20 per voice) ---
30
+ SENTS = {
31
+ # Alice = News Anchor (F) — 20
32
+ "Alice": [
33
+ "Good evening. Here are today's top stories from campus and around Arlington.",
34
+ "City officials approved new bike lanes, citing safety and climate benefits.",
35
+ "Temperatures rise tomorrow, with scattered showers likely late in the afternoon.",
36
+ "The Mavericks sealed a comeback win after a tense fourth quarter tonight.",
37
+ "Economists project steady growth this quarter despite lingering supply constraints.",
38
+ "Construction on Cooper Street resumes Monday; expect delays and posted detours.",
39
+ "A UTA research team announced a battery recycling breakthrough this morning.",
40
+ "Flights at DFW remain on schedule, with only minor delays reported.",
41
+ "Early voting saw record turnout across several precincts, officials confirmed.",
42
+ "That is the latest update; we will return with more at eleven.",
43
+ "Transit officials unveiled a pilot for free weekend rides across the city.",
44
+ "State health leaders reported declining flu cases headed into next week.",
45
+ "Energy prices dipped slightly today amid forecasts for milder temperatures.",
46
+ "The council passed a balanced budget, prioritizing schools and road repairs.",
47
+ "Local firefighters rescued a hiker after an overnight search near the lake.",
48
+ "A new scholarship program will support first-generation students beginning this fall.",
49
+ "Sports headlines: the women's team advances to the regional semifinals tomorrow.",
50
+ "Expect strong winds overnight; secure loose items and check travel advisories.",
51
+ "Police announced an amnesty weekend to safely turn in prohibited fireworks.",
52
+ "That wraps the hour; for breaking updates, follow our digital live blog.",
53
+ ],
54
+ # Adam = Friendly Conversational (M) — 20
55
+ "Adam": [
56
+ "Hey, I grabbed coffee already. Want me to save you a seat?",
57
+ "I couldn't find your charger, but I left a spare cable on the desk.",
58
+ "Traffic is heavy near the stadium; let's park early and walk together.",
59
+ "Your demo looked great. The UI felt clean, fast, and friendly.",
60
+ "Let's split the grocery list: produce for you, pantry items for me.",
61
+ "I'll ping the group chat once I reach the venue, no worries.",
62
+ "Your slides are solid; add a quick metric slide before the demo.",
63
+ "The new cafe downtown has almond croissants that blew my mind.",
64
+ "I'm heading out now; text me if you need anything from Target.",
65
+ "Thanks again for yesterday. You genuinely saved our timeline.",
66
+ "I booked the study room at six; bring markers and sticky notes.",
67
+ "We can pair on tests tonight, then merge before midnight.",
68
+ "I'll water the plants while you're away; just leave the key.",
69
+ "Your playlist slapped; share it so I can loop it tomorrow.",
70
+ "The package arrived early, so I'll drop it off after class.",
71
+ "Let's meal prep Sunday afternoon and avoid takeout next week.",
72
+ "I left comments in the doc; happy to chat through suggestions.",
73
+ "Shall we run by the lake at seven and grab smoothies after?",
74
+ "I set the reminder; we'll check results first thing in the morning.",
75
+ "Great news: the refund cleared, and the receipt is in your email.",
76
+ ],
77
+ # Clyde = British Formal (M) — 20
78
+ "Clyde": [
79
+ "Kindly ensure the documentation is reviewed before the committee convenes Thursday afternoon.",
80
+ "Your reservation is confirmed; a private room will be prepared upon your arrival.",
81
+ "Please submit the revised manuscript, adhering to the journal's formatting guidelines.",
82
+ "The seminar commences at nine precisely; late admittance may not be accommodated.",
83
+ "We appreciate your patience while maintenance completes the scheduled electrical inspection.",
84
+ "Do verify the figures; precision remains paramount in this investigation.",
85
+ "The board welcomes your proposal and invites a concise presentation next week.",
86
+ "Do accept my apologies; the courier appears to have misplaced the parcel.",
87
+ "The contract shall be executed once both parties acknowledge the amended clause.",
88
+ "I trust the arrangements meet your expectations; advise if alterations are required.",
89
+ "Minutes from the previous meeting are circulated for your timely acknowledgment.",
90
+ "Your membership will be renewed upon completion of the enclosed application.",
91
+ "The gallery preview opens at six; appropriate attire is kindly requested.",
92
+ "Please confer with procurement before engaging additional external suppliers.",
93
+ "We remain grateful for your counsel and continuing professional partnership.",
94
+ "The timetable reflects minor adjustments to accommodate laboratory availability.",
95
+ "Kindly return the archive keys to reception at the close of business.",
96
+ "A modest reception will follow the lecture in the Great Hall foyer.",
97
+ "Your diligence is noted; the supervisory panel commends your progress.",
98
+ "Should difficulties arise, do not hesitate to contact the department secretary.",
99
+ ],
100
+ # Charlotte = Energetic Young (F) — 20
101
+ "Charlotte": [
102
+ "Let's go team, hack time! Push that commit and ship the killer feature.",
103
+ "I'm hyped for finals; caffeine plus playlists equals unstoppable study mode.",
104
+ "Your reel looked amazing; post it now before the algorithm naps.",
105
+ "The new sneakers dropped today, and the colors are ridiculously clean.",
106
+ "We are sprinting to the finish; grab snacks and let's smash these tasks.",
107
+ "Quick check-in: are we vibing with blue accents or neon gradients?",
108
+ "That trailer went hard; I'm watching the premiere on night one.",
109
+ "Toss me the aux; I have the perfect focus track for crunch.",
110
+ "Class got canceled; brunch and brainstorming at ten sound perfect.",
111
+ "Big win, everyone! Screenshots, gifs, and celebratory donuts for the squad.",
112
+ "Mic check at five, lights at six, and we go live at seven.",
113
+ "I updated the banner; the new glow makes the title pop.",
114
+ "Can we swap the hero image? The neon skyline absolutely slaps.",
115
+ "Tiny bug spotted; I'm patching it now and pushing a hotfix.",
116
+ "The vibe is immaculate; let's ride the momentum and overdeliver.",
117
+ "Okay, squad goals: demo flawless, judges smiling, trophy secured.",
118
+ "I queued the soundtrack; it builds perfectly into the reveal moment.",
119
+ "Let's loop the b-roll while we talk through the metrics slide.",
120
+ "The confetti emoji is ready; I am saving it for the finale.",
121
+ "Final stretch energy: deep breath, big smile, and hit deploy.",
122
+ ],
123
+ # Freya = Calm Meditation (F) — 20
124
+ "Freya": [
125
+ "Breathe in gently, noticing cool air filling your chest and shoulders.",
126
+ "Exhale slowly, allowing the tension around your eyes to soften.",
127
+ "Let your attention rest on the rhythm of your breath, steady and quiet.",
128
+ "Imagine warm sunlight touching your face, inviting ease into your morning.",
129
+ "Release today's concerns; your body knows how to return to balance.",
130
+ "Sense the ground beneath you, steady, supportive, and completely reliable.",
131
+ "With each inhale, welcome spaciousness; with each exhale, welcome calm.",
132
+ "Thank your busy mind for its effort, and invite it to rest.",
133
+ "Notice your heartbeat, patient and gentle, guiding you toward presence.",
134
+ "Carry this softness forward; you are grounded, clear, and ready.",
135
+ "Let your shoulders drop slightly, as if set down from a kind weight.",
136
+ "Picture a wide horizon; there is time to move with kindness.",
137
+ "Let stray thoughts pass like clouds, changing shape and drifting away.",
138
+ "Soften the jaw; let the tongue rest, calm and unhurried.",
139
+ "Feel the breath arrive, then leave, like waves returning to sea.",
140
+ "Offer gratitude to this moment, exactly as it is appearing.",
141
+ "Invite quiet where worry stood; let steady breath fill that space.",
142
+ "Imagine your spine lengthening, lifting you gently into balance.",
143
+ "Hold kindness in the chest; exhale and share it outward.",
144
+ "Return to the breath whenever the mind asks for a handhold.",
145
+ ],
146
+ # Bill = Elderly Storyteller (M) — 20
147
+ "Bill": [
148
+ "When summer storms rolled in, we counted seconds between lightning and thunder.",
149
+ "Your grandmother kept recipes on cards, stained with sweet berry memories.",
150
+ "We built radios from kits, chasing distant stations after sundown.",
151
+ "The library smelled of paper and varnish, refuge on rainy afternoons.",
152
+ "I learned patience fixing bicycles, one stubborn bolt at a time.",
153
+ "We mapped the night sky, tracing stories across cold constellations.",
154
+ "A firm handshake once sealed agreements stronger than signed paper.",
155
+ "I still hear that tune drifting from open windows each spring.",
156
+ "The river taught respect; quiet water can hide a heavy current.",
157
+ "Keep your curiosity; it carries farther than cleverness alone.",
158
+ "We patched leaky roofs with laughter and tar on summer mornings.",
159
+ "Neighbors traded tools, stories, and peaches over the backyard fence.",
160
+ "A pocketknife and twine solved more problems than any fancy kit.",
161
+ "The best advice I got was simple: listen longer than you speak.",
162
+ "We saved bottle caps for games that lasted until the porch lights.",
163
+ "Patience is a bridge you build before the flood ever arrives.",
164
+ "The kindest teachers led with questions, not with thunder.",
165
+ "I kept a notebook of firsts: first snowfall, first bicycle, first apology.",
166
+ "Luck visits briefly; preparation invites it to stay for tea.",
167
+ "If you tend your friendships, they will flower even in winter.",
168
+ ],
169
+ # Brian = Tech Presenter (M) — 20
170
+ "Brian": [
171
+ "Today we will deploy a tiny model to the edge with real-time inference.",
172
+ "Our pipeline standardizes audio at sixteen kilohertz for consistent features.",
173
+ "We log predictions and latencies, then visualize drift on weekly dashboards.",
174
+ "Feature store versioning prevents training-serving skew across environments.",
175
+ "We will run A B tests, tracking equal error rate and calibration.",
176
+ "The container image stays under two hundred megabytes for minimal cold starts.",
177
+ "Webhooks post verdicts to Slack, enabling rapid human review.",
178
+ "Augmentation simulates noise, speed changes, and codec artifacts during training.",
179
+ "Grad CAM highlights mel regions influencing final predictions the most.",
180
+ "We export reports as CSV and HTML for compliance and audits.",
181
+ "A rolling window monitors precision and recall across recent deployments.",
182
+ "Canary releases protect users while we validate new thresholds in production.",
183
+ "We encrypt artifacts at rest and rotate keys on a fixed cadence.",
184
+ "Offline evaluation includes ablations to isolate the contribution of features.",
185
+ "A retraining job triggers automatically when drift exceeds our alert budget.",
186
+ "We tag datasets with immutable hashes to ensure reproducibility.",
187
+ "Telemetry includes device model, operating system, and inference time buckets.",
188
+ "A fallback heuristic keeps the product usable if models misbehave.",
189
+ "Dashboards display confidence histograms to surface calibration issues.",
190
+ "We close with a demo and share the public notebook for transparency.",
191
+ ],
192
+ # Gigi = Audiobook Warm (F) — 20
193
+ "Gigi": [
194
+ "She folded the letter carefully, as if gentleness might change its meaning.",
195
+ "The lighthouse turned, patient and steady, casting silver across the harbor.",
196
+ "He packed the last box, breathing dust, cedar, and something like courage.",
197
+ "Morning arrived with rain and hibiscus, petals bright against the fence.",
198
+ "The attic kept summers in jars, peaches, sunlight, and untold stories.",
199
+ "She traced the map's worn edges, wondering where the river begins.",
200
+ "He laughed softly, warm as cinnamon and autumn kitchens.",
201
+ "The train hummed northward, carrying secrets and a pocket of hopes.",
202
+ "Night gathered gently, a shawl of stars over the sleeping town.",
203
+ "She realized beginnings often wear the same shoes as endings.",
204
+ "Wind braided through the pines, whispering names they thought forgotten.",
205
+ "He watched the porch light flicker, a heartbeat for the quiet house.",
206
+ "They shared strawberries on the curb, red thumbs and easy grins.",
207
+ "She kept a seashell on the desk to remember the tides.",
208
+ "He learned patience from bread dough, rising in its own time.",
209
+ "The street woke slowly, clinking bottles and morning radios.",
210
+ "She carried a postcard everywhere, proof that distance could be kind.",
211
+ "Rain wrote cursive on the window, a lesson in soft persistence.",
212
+ "He folded the map again, trusting the road would teach directions.",
213
+ "They left the lamp lit, so tomorrow could find its way back.",
214
+ ],
215
+ # Drew = Sports Commentator (M) — 20
216
+ "Drew": [
217
+ "He fires from deep, nothing but net, and the crowd erupts again!",
218
+ "The relay exchange was flawless, shaving precious milliseconds off the record.",
219
+ "She clears the bar with ease; that is a season best.",
220
+ "The keeper guesses right, stretches wide, and palms it away brilliantly.",
221
+ "They are pressing high now, forcing turnovers and controlling the tempo.",
222
+ "Off the corner, a thunderous header rockets into the upper ninety.",
223
+ "With two laps remaining, strategy and patience decide this championship.",
224
+ "The rookie delivers under pressure, a clutch performance in overtime.",
225
+ "He splits the defense, step backs, and drills a cold dagger.",
226
+ "The stadium is shaking; fans know they are witnessing something special.",
227
+ "A perfect pick frees the shooter, and he nails the mid-range.",
228
+ "She accelerates on the back stretch, pulling clear of the pack.",
229
+ "Defense rotates quickly and denies the easy layup at the rim.",
230
+ "The captain rallies the bench, demanding focus for the final minutes.",
231
+ "A crafty nutmeg draws gasps from the away section.",
232
+ "He reads the screen, jumps the passing lane, and steals it clean.",
233
+ "The serve kisses the line; challenge confirms an inch to spare.",
234
+ "She nails the dismount, and the judges reward the precision.",
235
+ "A booming kick flips field position and buys valuable time.",
236
+ "Timeout here; the next possession will write tonight's headline.",
237
+ ],
238
+ # Aria = Childlike Curious (Neutral/F) — 20
239
+ "Aria": [
240
+ "Do clouds ever get tired from floating and making so many shapes?",
241
+ "If shadows are quiet, do they still count as parts of sunlight?",
242
+ "How many raindrops fit on a ladybug's back before it tickles?",
243
+ "Why do cats blink slowly, like they are telling secrets with eyelids?",
244
+ "If trees could vote, would they choose longer springs or louder birds?",
245
+ "Do stars practice shining, or are they born already busy and bright?",
246
+ "What happens to sound after it stops; does it nap somewhere cozy?",
247
+ "Can a thought be heavy enough to pull socks from drawers?",
248
+ "If books could taste words, would poems be chocolate or strawberries tonight?",
249
+ "When the moon hides, does it giggle behind clouds or play peekaboo?",
250
+ "Do puddles remember the sky they borrowed for a little while?",
251
+ "If a kite lets go, does the wind promise to bring it back?",
252
+ "Are whispers just brave words that prefer smaller adventures?",
253
+ "Can colors be friends, or do they argue over favorite sunsets?",
254
+ "If time wore shoes, would it sprint weekdays and stroll Sundays?",
255
+ "Do fireflies save their glow for midnight parties in the grass?",
256
+ "If snowflakes could vote, would they pick twirls or soft landings?",
257
+ "Where does a yawn travel to after everyone catches it?",
258
+ "Can a memory wave hello when you pass the same corner?",
259
+ "If a dream goes missing, does it send postcards from tomorrow?",
260
+ ],
261
+ }
262
+
263
+ def main():
264
+ os.makedirs(MP3_ROOT, exist_ok=True)
265
+ os.makedirs(WAV_ROOT, exist_ok=True)
266
+
267
+ total = 0
268
+ for voice_name, lines in SENTS.items():
269
+ voice_id = VOICES[voice_name]
270
+ out_mp3 = os.path.join(MP3_ROOT, voice_name)
271
+ print(f"\nGenerating {len(lines)} clips for {voice_name} -> {out_mp3}")
272
+ generate_tts_dataset(texts=lines, voice_id=voice_id, out_dir=out_mp3)
273
+ print(f"Converting MP3 -> WAV(16k mono) into {WAV_ROOT} ...")
274
+ mp3_to_wav16k(src_dir=out_mp3, dst_dir=WAV_ROOT)
275
+ total += len(lines)
276
+
277
+ print(f"\nDone. Generated {total} clips total.")
278
+ print(f"MP3s in: {MP3_ROOT}")
279
+ print(f"WAVs in: {WAV_ROOT}")
280
+
281
+ if __name__ == "__main__":
282
+ main()
index.html ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <title>Voice Guard — AI Voice Detector</title>
7
+
8
+ <!-- Tailwind (CDN) -->
9
+ <script src="https://cdn.tailwindcss.com"></script>
10
+ <script>
11
+ tailwind.config = {
12
+ theme: {
13
+ extend: {
14
+ fontFamily: { sans: ['Inter','ui-sans-serif','system-ui'] },
15
+ colors: {
16
+ brand: { 400:'#ff8e34', 500:'#ff6a00' }
17
+ },
18
+ boxShadow: { glass: '0 10px 30px rgba(0,0,0,.35), inset 0 1px 0 rgba(255,255,255,.05)' }
19
+ }
20
+ }
21
+ }
22
+ </script>
23
+
24
+ <!-- Optional config file that sets window.BACKEND_URL -->
25
+ <script src="config.js"></script>
26
+
27
+ <!-- Inter font -->
28
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap" rel="stylesheet">
29
+
30
+ <style>
31
+ .glass{
32
+ background: rgba(28,28,33,.55);
33
+ border: 1px solid rgba(255,255,255,.06);
34
+ box-shadow: var(--glass, 0 10px 30px rgba(0,0,0,.35)), inset 0 1px 0 rgba(255,255,255,.05);
35
+ backdrop-filter: blur(10px);
36
+ }
37
+ .donut{
38
+ --val:.34; --col:#ff6a00;
39
+ background: conic-gradient(var(--col) calc(var(--val)*360deg), #2c2c2c 0);
40
+ mask: radial-gradient(farthest-side, #0000 62%, #000 63%);
41
+ -webkit-mask: radial-gradient(farthest-side, #0000 62%, #000 63%);
42
+ transition: background .35s ease;
43
+ }
44
+ ::-webkit-scrollbar { width: 10px; height: 10px; }
45
+ ::-webkit-scrollbar-thumb { background: #23242a; border-radius: 999px; }
46
+ </style>
47
+ </head>
48
+ <body class="bg-[#0C0D10] text-white font-sans">
49
+
50
+ <!-- Soft gradient background -->
51
+ <div class="fixed inset-0 -z-10">
52
+ <div class="absolute -top-24 -right-24 w-[600px] h-[600px] rounded-full blur-3xl opacity-30"
53
+ style="background: radial-gradient(closest-side,#ff6a00,transparent 70%);"></div>
54
+ <div class="absolute -bottom-24 -left-20 w-[500px] h-[500px] rounded-full blur-3xl opacity-20"
55
+ style="background: radial-gradient(closest-side,#5eead4,transparent 70%);"></div>
56
+ </div>
57
+
58
+ <!-- Header -->
59
+ <header class="mx-auto max-w-7xl px-6 py-4">
60
+ <div class="flex items-center justify-between rounded-2xl glass px-4 py-3">
61
+ <div class="flex items-center gap-3">
62
+ <div class="h-9 w-9 rounded-xl bg-brand-500/10 ring-1 ring-brand-500/40 grid place-content-center">
63
+ <!-- Shield waveform icon -->
64
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none">
65
+ <path d="M12 2l7 3v6c0 5.25-3.5 9.75-7 11-3.5-1.25-7-5.75-7-11V5l7-3Z" stroke="#ff6a00" stroke-width="1.5"/>
66
+ <path d="M7 12h2l1-4 2 8 1-6 1 3h3" stroke="#fff" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
67
+ </svg>
68
+ </div>
69
+ <div>
70
+ <h1 class="text-lg font-semibold">Voice Guard</h1>
71
+ <p class="text-xs text-white/50 -mt-0.5">Human vs AI Speech</p>
72
+ </div>
73
+ </div>
74
+
75
+ <button id="analyzeBtn"
76
+ class="rounded-xl px-4 py-2.5 text-sm font-semibold bg-brand-500 hover:bg-brand-400 text-white shadow-lg">
77
+ Analyze
78
+ </button>
79
+ </div>
80
+ </header>
81
+
82
+ <!-- Main -->
83
+ <main class="mx-auto max-w-7xl px-6 pb-24">
84
+ <div class="grid grid-cols-12 gap-6">
85
+
86
+ <!-- LEFT: Inputs + status -->
87
+ <section class="col-span-4 space-y-6">
88
+ <div class="glass rounded-2xl p-5">
89
+ <div class="flex items-center justify-between">
90
+ <h2 class="text-sm font-semibold text-white/80">Input</h2>
91
+ <span class="text-xs px-2 py-1 rounded-full bg-white/5 border border-white/10">3–7s</span>
92
+ </div>
93
+
94
+ <div class="mt-4 grid grid-cols-2 gap-2">
95
+ <button id="tabMic" class="w-full rounded-xl border border-white/10 bg-white/5 px-3 py-2 text-sm font-medium">
96
+ Microphone
97
+ </button>
98
+ <button id="tabUpload" class="w-full rounded-xl border border-white/10 bg-transparent px-3 py-2 text-sm font-medium hover:bg-white/5">
99
+ Upload
100
+ </button>
101
+ </div>
102
+
103
+ <!-- Mic panel -->
104
+ <div id="micPanel" class="mt-4 space-y-4">
105
+ <div class="flex items-center gap-3">
106
+ <button id="recBtn" class="rounded-lg bg-white/10 hover:bg-white/20 px-3 py-2 text-sm border border-white/10">● Record</button>
107
+ <span id="recStatus" class="text-xs text-white/60">Idle</span>
108
+ </div>
109
+ <div class="rounded-xl border border-white/10 bg-black/30 h-24 overflow-hidden">
110
+ <canvas id="meter" class="w-full h-full"></canvas>
111
+ </div>
112
+ </div>
113
+
114
+ <!-- Upload panel -->
115
+ <div id="uploadPanel" class="mt-4 hidden space-y-3">
116
+ <label class="block text-sm text-white/70">Choose audio (.wav/.mp3)</label>
117
+ <input id="fileInput" type="file" accept="audio/*"
118
+ class="w-full rounded-xl bg-black/30 border border-white/10 p-3 text-sm file:mr-4 file:rounded-lg file:border-0 file:bg-brand-500 file:px-4 file:py-2 file:text-white file:text-sm hover:file:bg-brand-400"/>
119
+ <p id="fileName" class="text-xs text-white/50"></p>
120
+ </div>
121
+ </div>
122
+
123
+ <div class="grid grid-cols-2 gap-4">
124
+ <div class="glass rounded-2xl p-4">
125
+ <p class="text-xs text-white/60">Source</p>
126
+ <p id="srcLabel" class="mt-1 text-lg font-semibold">Microphone</p>
127
+ </div>
128
+ <div class="glass rounded-2xl p-4">
129
+ <p class="text-xs text-white/60">Latency</p>
130
+ <p id="latency" class="mt-1 text-lg font-semibold">—</p>
131
+ </div>
132
+ </div>
133
+
134
+ <div class="glass rounded-2xl p-5">
135
+ <div class="flex items-center justify-between">
136
+ <h3 class="text-sm font-semibold text-white/80">Recent</h3>
137
+ <button id="clearRecent" class="text-xs text-white/50 hover:text-white/80">Clear</button>
138
+ </div>
139
+ <ul id="recentList" class="mt-3 space-y-2 max-h-60 overflow-auto"></ul>
140
+ </div>
141
+ </section>
142
+
143
+ <!-- RIGHT: Heatmap + donuts + label -->
144
+ <section class="col-span-8 space-y-6">
145
+ <div class="glass rounded-2xl p-5">
146
+ <div class="flex items-center justify-between">
147
+ <h2 class="text-sm font-semibold text-white/80">Explanation Heatmap</h2>
148
+ <div class="text-xs text-white/50">Spectrogram importance</div>
149
+ </div>
150
+
151
+ <div class="mt-4 h-[340px] rounded-xl border border-white/10 overflow-hidden bg-black/30 grid place-items-center">
152
+ <img id="heatmapImg" class="w-full h-full object-contain" alt="Heatmap"/>
153
+ <span id="heatmapPlaceholder" class="text-white/50 text-sm">No analysis yet</span>
154
+ </div>
155
+ </div>
156
+
157
+ <div class="grid grid-cols-3 gap-6">
158
+ <div class="glass rounded-2xl p-5">
159
+ <div class="flex items-center justify-between">
160
+ <p class="text-sm text-white/70">Human</p>
161
+ <span class="text-xs rounded-full bg-emerald-400/15 text-emerald-300 px-2 py-0.5 border border-emerald-400/30">Class 0</span>
162
+ </div>
163
+ <div class="mt-4 flex items-center gap-6">
164
+ <div class="donut size-28 rounded-full" id="donutHuman" style="--val:.50; --col:#34d399"></div>
165
+ <div>
166
+ <p class="text-3xl font-extrabold"><span id="humanPct">50</span>%</p>
167
+ <p class="text-xs text-white/60 mt-1">Likelihood</p>
168
+ </div>
169
+ </div>
170
+ </div>
171
+
172
+ <div class="glass rounded-2xl p-5">
173
+ <div class="flex items-center justify-between">
174
+ <p class="text-sm text-white/70">AI</p>
175
+ <span class="text-xs rounded-full bg-rose-400/15 text-rose-300 px-2 py-0.5 border border-rose-400/30">Class 1</span>
176
+ </div>
177
+ <div class="mt-4 flex items-center gap-6">
178
+ <div class="donut size-28 rounded-full" id="donutAI" style="--val:.50; --col:#fb7185"></div>
179
+ <div>
180
+ <p class="text-3xl font-extrabold"><span id="aiPct">50</span>%</p>
181
+ <p class="text-xs text-white/60 mt-1">Likelihood</p>
182
+ </div>
183
+ </div>
184
+ </div>
185
+
186
+ <div class="glass rounded-2xl p-5">
187
+ <p class="text-sm text-white/70">Final Label</p>
188
+ <div class="mt-3 flex items-center gap-3">
189
+ <div id="badgeLabel"
190
+ class="px-3 py-1.5 text-sm font-semibold rounded-xl border border-emerald-400/30 bg-emerald-400/15 text-emerald-300">
191
+ HUMAN
192
+ </div>
193
+ <span id="threshold" class="text-xs text-white/50">thr 0.60</span>
194
+ </div>
195
+ <!-- NEW: why line -->
196
+ <p id="whyText" class="text-xs text-white/60 mt-2"></p>
197
+ <p class="text-xs text-white/60 mt-3 leading-relaxed">
198
+ Click Analyze to send audio to the API and render the real heatmap.
199
+ </p>
200
+ </div>
201
+ </div>
202
+ </section>
203
+
204
+ </div>
205
+ </main>
206
+
207
+ <script>
208
+ // ===== Config =====
209
+ const BACKEND_URL = window.BACKEND_URL || "http://127.0.0.1:8000/analyze";
210
+
211
+ // ===== Elements =====
212
+ const tabMic = document.getElementById('tabMic');
213
+ const tabUpload = document.getElementById('tabUpload');
214
+ const micPanel = document.getElementById('micPanel');
215
+ const uploadPanel = document.getElementById('uploadPanel');
216
+ const srcLabel = document.getElementById('srcLabel');
217
+
218
+ const recBtn = document.getElementById('recBtn');
219
+ const recStatus = document.getElementById('recStatus');
220
+
221
+ const analyzeBtn = document.getElementById('analyzeBtn');
222
+ const latency = document.getElementById('latency');
223
+
224
+ const heatmapImg = document.getElementById('heatmapImg');
225
+ const heatmapPlaceholder = document.getElementById('heatmapPlaceholder');
226
+
227
+ const donutHuman = document.getElementById('donutHuman');
228
+ const donutAI = document.getElementById('donutAI');
229
+ const humanPct = document.getElementById('humanPct');
230
+ const aiPct = document.getElementById('aiPct');
231
+ const badge = document.getElementById('badgeLabel');
232
+ const thresholdEl = document.getElementById('threshold');
233
+ const whyText = document.getElementById('whyText');
234
+
235
+ const fileInput = document.getElementById('fileInput');
236
+ const fileName = document.getElementById('fileName');
237
+ const recentList = document.getElementById('recentList');
238
+ const clearRecent = document.getElementById('clearRecent');
239
+
240
+ // ===== Tabs =====
241
+ function setTab(which){
242
+ if(which==='mic'){
243
+ tabMic.classList.add('bg-white/5');
244
+ tabUpload.classList.remove('bg-white/5');
245
+ micPanel.classList.remove('hidden');
246
+ uploadPanel.classList.add('hidden');
247
+ srcLabel.textContent = 'Microphone';
248
+ }else{
249
+ tabUpload.classList.add('bg-white/5');
250
+ tabMic.classList.remove('bg-white/5');
251
+ uploadPanel.classList.remove('hidden');
252
+ micPanel.classList.add('hidden');
253
+ srcLabel.textContent = 'Upload';
254
+ }
255
+ }
256
+ tabMic.onclick = ()=> setTab('mic');
257
+ tabUpload.onclick = ()=> setTab('upload');
258
+ setTab('mic');
259
+
260
+ // ===== Upload label =====
261
+ fileInput.onchange = ()=> fileName.textContent = fileInput.files?.[0]?.name || '';
262
+
263
+ // ===== Mic + meter =====
264
+ const meterCanvas = document.getElementById('meter');
265
+ const mctx = meterCanvas.getContext('2d');
266
+ const resizeMeter = ()=>{ meterCanvas.width = meterCanvas.clientWidth; meterCanvas.height = meterCanvas.clientHeight; };
267
+ resizeMeter(); addEventListener('resize', resizeMeter);
268
+
269
+ let mediaRecorder, chunks=[], micStream=null, audioCtx=null, analyser=null, raf=null, lastRecordedBlob=null;
270
+
271
+ function loopMeter(){
272
+ const w=meterCanvas.width, h=meterCanvas.height;
273
+ const data = new Uint8Array(analyser.frequencyBinCount);
274
+ const draw = ()=>{
275
+ analyser.getByteFrequencyData(data);
276
+ mctx.fillStyle = '#0b0b0f'; mctx.fillRect(0,0,w,h);
277
+ const bars = 48, barW = w/bars;
278
+ for (let i=0;i<bars;i++){
279
+ const v=data[i]/255, bh=v*h*0.9, x=i*barW+2, y=h-bh;
280
+ mctx.fillStyle = `rgba(255,106,0,${0.35+0.65*v})`;
281
+ mctx.fillRect(x,y,barW-4,bh);
282
+ }
283
+ raf = requestAnimationFrame(draw);
284
+ };
285
+ draw();
286
+ }
287
+
288
+ async function startRecording(){
289
+ if(micStream) return;
290
+ micStream = await navigator.mediaDevices.getUserMedia({audio:true});
291
+ mediaRecorder = new MediaRecorder(micStream, {mimeType: 'audio/webm'});
292
+ mediaRecorder.ondataavailable = e => { if(e.data.size>0) chunks.push(e.data); };
293
+ mediaRecorder.onstop = () => { lastRecordedBlob = new Blob(chunks, {type:'audio/webm'}); chunks = []; recStatus.textContent='Recorded'; };
294
+ mediaRecorder.start();
295
+ recStatus.textContent = 'Recording…';
296
+ recBtn.textContent = '■ Stop';
297
+
298
+ audioCtx = new (window.AudioContext||window.webkitAudioContext)();
299
+ const source = audioCtx.createMediaStreamSource(micStream);
300
+ analyser = audioCtx.createAnalyser(); analyser.fftSize = 1024;
301
+ source.connect(analyser); loopMeter();
302
+ }
303
+ function stopRecording(){
304
+ if(!micStream) return;
305
+ mediaRecorder?.stop();
306
+ micStream.getTracks().forEach(t => t.stop());
307
+ micStream=null;
308
+ cancelAnimationFrame(raf); audioCtx.close();
309
+ recBtn.textContent='● Record'; recStatus.textContent='Idle';
310
+ }
311
+ recBtn.onclick = ()=> micStream ? stopRecording() : startRecording();
312
+
313
+ // ===== Audio helpers: decode -> resample(16k mono) -> PCM16 -> WAV =====
314
+ async function blobToPCM(blob){
315
+ const arr = await blob.arrayBuffer();
316
+ const ctx = new (window.AudioContext||window.webkitAudioContext)();
317
+ const buf = await ctx.decodeAudioData(arr);
318
+ let pcm = buf.getChannelData(0);
319
+ if (buf.numberOfChannels>1){
320
+ const r = buf.getChannelData(1);
321
+ const n = Math.min(pcm.length, r.length);
322
+ const m = new Float32Array(n);
323
+ for (let i=0;i<n;i++) m[i] = 0.5*(pcm[i]+r[i]);
324
+ pcm = m;
325
+ }
326
+ await ctx.close();
327
+ return {pcm, sr: buf.sampleRate};
328
+ }
329
+ function resampleLinear(pcm, fromSr, toSr=16000){
330
+ if (fromSr===toSr) return pcm;
331
+ const ratio=toSr/fromSr, n=Math.round(pcm.length*ratio), out=new Float32Array(n);
332
+ for (let i=0;i<n;i++){
333
+ const x=i/ratio, i0=Math.floor(x), i1=Math.min(i0+1, pcm.length-1), t=x-i0;
334
+ out[i]=(1-t)*pcm[i0]+t*pcm[i1];
335
+ }
336
+ return out;
337
+ }
338
+ function floatTo16(pcm){
339
+ const out = new Int16Array(pcm.length);
340
+ for (let i=0;i<pcm.length;i++){ let s=Math.max(-1,Math.min(1,pcm[i])); out[i]=s<0?s*0x8000:s*0x7fff; }
341
+ return out;
342
+ }
343
+ function wavEncodePCM16(int16, sampleRate=16000, numChannels=1){
344
+ const byteRate=sampleRate*numChannels*2, blockAlign=numChannels*2;
345
+ const buffer=new ArrayBuffer(44 + int16.length*2), view=new DataView(buffer); let off=0;
346
+ const WU8=s=>{for(let i=0;i<s.length;i++) view.setUint8(off++, s.charCodeAt(i));}
347
+ const W32=v=>{view.setUint32(off,v,true); off+=4}, W16=v=>{view.setUint16(off,v,true); off+=2}
348
+ WU8('RIFF'); W32(36+int16.length*2); WU8('WAVE'); WU8('fmt '); W32(16);
349
+ W16(1); W16(numChannels); W32(sampleRate); W32(byteRate); W16(blockAlign); W16(16);
350
+ WU8('data'); W32(int16.length*2);
351
+ new Int16Array(buffer,44).set(int16);
352
+ return new Blob([buffer], {type:'audio/wav'});
353
+ }
354
+
355
+ // ===== UI helpers =====
356
+ function setBadgeFromBackend(out){
357
+ const isAI = (out.label || '').toLowerCase() === 'ai';
358
+ badge.textContent = isAI ? 'AI' : 'HUMAN';
359
+ badge.className = isAI
360
+ ? "px-3 py-1.5 text-sm font-semibold rounded-xl border border-rose-400/30 bg-rose-400/15 text-rose-300"
361
+ : "px-3 py-1.5 text-sm font-semibold rounded-xl border border-emerald-400/30 bg-emerald-400/15 text-emerald-300";
362
+ }
363
+ function setWhyLine(out){
364
+ const src = out.threshold_source || '—';
365
+ const dec = out.decision || 'threshold';
366
+ const rs = (typeof out.replay_score === 'number') ? out.replay_score.toFixed(2) : '—';
367
+ const aiP = (out.ai*100).toFixed(1);
368
+ const thr = out.threshold;
369
+ const thrPct = (thr*100).toFixed(0);
370
+ const margin = (out.ai - thr).toFixed(2);
371
+ whyText.textContent = `Decision: ${dec} | AI=${aiP}% | thr(${src})=${thrPct}% | margin=${margin} | replay=${rs}`;
372
+ }
373
+ function addRecent({src,label,ph,pa}){
374
+ const li = document.createElement('li');
375
+ li.className = "flex items-center justify-between rounded-xl border border-white/10 bg-white/5 px-3 py-2";
376
+ li.innerHTML = `
377
+ <div class="flex items-center gap-3">
378
+ <span class="text-xs px-2 py-0.5 rounded-full border ${src==='Mic'?'border-indigo-400/40 bg-indigo-400/15 text-indigo-300':'border-amber-400/40 bg-amber-400/15 text-amber-300'}">${src}</span>
379
+ <span class="text-sm">${Math.round(ph*100)}% human / ${Math.round(pa*100)}% AI</span>
380
+ </div>
381
+ <span class="text-xs px-2 py-0.5 rounded-lg ${label==='AI'?'bg-rose-400/15 text-rose-300 border border-rose-400/30':'bg-emerald-400/15 text-emerald-300 border border-emerald-400/30'}">${label}</span>
382
+ `;
383
+ recentList.prepend(li);
384
+ }
385
+ clearRecent.onclick = ()=> { recentList.innerHTML=''; };
386
+
387
+ // ===== Analyze =====
388
+ analyzeBtn.onclick = async ()=>{
389
+ const t0 = performance.now();
390
+ try{
391
+ const isMic = !micPanel.classList.contains('hidden');
392
+ let blob = null;
393
+ if (isMic){
394
+ if(!lastRecordedBlob){ alert('Record 3–7 seconds first.'); return; }
395
+ blob = lastRecordedBlob;
396
+ } else {
397
+ if(!fileInput.files?.length){ alert('Choose an audio file.'); return; }
398
+ blob = fileInput.files[0];
399
+ }
400
+
401
+ // Decode -> resample -> WAV
402
+ const {pcm, sr} = await blobToPCM(blob);
403
+ const pcm16k = resampleLinear(pcm, sr, 16000);
404
+ const int16 = floatTo16(pcm16k);
405
+ const wavBlob = wavEncodePCM16(int16, 16000, 1);
406
+
407
+ const form = new FormData();
408
+ form.append('file', new File([wavBlob], 'audio.wav', {type:'audio/wav'}));
409
+ form.append('source_hint', isMic ? 'microphone' : 'upload');
410
+
411
+ const res = await fetch(BACKEND_URL, { method:'POST', body: form });
412
+ if(!res.ok) throw new Error(`API ${res.status}`);
413
+ const out = await res.json();
414
+
415
+ // Heatmap
416
+ heatmapImg.src = out.heatmap_b64;
417
+ heatmapPlaceholder.style.display = 'none';
418
+
419
+ // Donuts & numbers
420
+ const ph = out.human, pa = out.ai, thr = out.threshold;
421
+ humanPct.textContent = Math.round(ph*100);
422
+ aiPct.textContent = Math.round(pa*100);
423
+ donutHuman.style.setProperty('--val', ph.toFixed(3));
424
+ donutAI.style.setProperty('--val', pa.toFixed(3));
425
+ thresholdEl.textContent = `thr ${thr.toFixed(2)}`;
426
+
427
+ // Final label: TRUST BACKEND
428
+ setBadgeFromBackend(out);
429
+ setWhyLine(out);
430
+
431
+ latency.textContent = `${Math.round(performance.now()-t0)} ms`;
432
+ addRecent({src: isMic?'Mic':'Upload', label: out.label.toUpperCase(), ph, pa});
433
+ }catch(err){
434
+ console.error(err);
435
+ alert('Analyze failed. Check console & backend URL in config.js');
436
+ }
437
+ };
438
+ </script>
439
+ </body>
440
+ </html>
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libsndfile1
requirements.txt CHANGED
@@ -1,3 +1,28 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=2.2.0
2
+ torchaudio>=2.2.0
3
+ numpy>=1.26.4
4
+ scipy>=1.11.4
5
+ librosa>=0.10.1
6
+ soundfile>=0.12.1
7
+ audiomentations>=0.37.0
8
+ gradio>=4.44.0
9
+ fastapi>=0.111.0
10
+ uvicorn>=0.30.0
11
+ python-dotenv>=1.0.1
12
+ pydantic>=2.6.4
13
+ requests>=2.32.0
14
+ matplotlib>=3.8.4
15
+ # faster-whisper
16
+ # speechbrain
17
+ black>=24.4.2
18
+ fastapi
19
+ uvicorn[standard]
20
+ python-multipart
21
+ pillow
22
+ matplotlib
23
+ transformers
24
+ torchaudio
25
+ soundfile
26
+ audiomentations
27
+ streamlit>=1.33
28
+ audio-recorder-streamlit==0.0.8
streamlit_app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_app.py
2
+ import os, io, base64, urllib.request, pathlib
3
+ import numpy as np
4
+ import streamlit as st
5
+ from PIL import Image
6
+ from matplotlib import cm
7
+
8
+ # ------- wiring to your detector -------
9
+ # We prefer the wav2vec2 detector; fall back to the CNN one if needed.
10
+ BACKENDS_TRY = ["app.inference_wav2vec", "app.inference"]
11
+ Detector = None
12
+ err = None
13
+ for mod in BACKENDS_TRY:
14
+ try:
15
+ Detector = __import__(mod, fromlist=["Detector"]).Detector
16
+ BREAK = True
17
+ break
18
+ except Exception as e:
19
+ err = e
20
+ if Detector is None:
21
+ st.error("Could not import Detector from app/. Make sure your repo contains app/inference_wav2vec.py (or app/inference.py).")
22
+ st.stop()
23
+
24
+ # ------- config / weights -------
25
+ def ensure_weights():
26
+ wp = os.environ.get("MODEL_WEIGHTS_PATH", st.secrets.get("MODEL_WEIGHTS_PATH", "app/models/weights/wav2vec2_classifier.pth"))
27
+ url = os.environ.get("MODEL_WEIGHTS_URL", st.secrets.get("MODEL_WEIGHTS_URL", ""))
28
+
29
+ if url and not os.path.exists(wp):
30
+ pathlib.Path(wp).parent.mkdir(parents=True, exist_ok=True)
31
+ with st.spinner("Downloading model weights…"):
32
+ urllib.request.urlretrieve(url, wp)
33
+ return wp
34
+
35
+ @st.cache_resource
36
+ def load_detector():
37
+ wp = ensure_weights()
38
+ det = Detector(weights_path=wp)
39
+ return det
40
+
41
+ det = load_detector()
42
+
43
+ # ------- helpers -------
44
+ def cam_to_png_bytes(cam: np.ndarray) -> bytes:
45
+ cam = np.array(cam, dtype=np.float32)
46
+ cam = np.clip(cam, 0.0, 1.0)
47
+ rgb = (cm.magma(cam)[..., :3] * 255).astype(np.uint8)
48
+ im = Image.fromarray(rgb)
49
+ buf = io.BytesIO()
50
+ im.save(buf, format="PNG")
51
+ return buf.getvalue()
52
+
53
+ def analyze(wav_bytes: bytes, source_hint: str):
54
+ proba = det.predict_proba(wav_bytes, source_hint=source_hint)
55
+ exp = det.explain(wav_bytes, source_hint=source_hint)
56
+ return proba, exp
57
+
58
+ # ------- UI -------
59
+ st.set_page_config(page_title="Voice Guard", page_icon="🛡️", layout="wide")
60
+ st.title("🛡️ Voice Guard — Human vs AI Speech (Streamlit)")
61
+
62
+ left, right = st.columns([1,2])
63
+
64
+ with left:
65
+ st.subheader("Input")
66
+ tab_rec, tab_up = st.tabs(["🎙️ Microphone", "📁 Upload"])
67
+
68
+ wav_bytes = None
69
+ source_hint = None
70
+
71
+ with tab_rec:
72
+ st.caption("If the recorder component fails on your browser, use Upload.")
73
+ try:
74
+ # light, zero-config recorder component
75
+ from audio_recorder_streamlit import audio_recorder
76
+ audio = audio_recorder(
77
+ text="Record",
78
+ recording_color="#ff6a00",
79
+ neutral_color="#2b2b2b",
80
+ icon_size="2x",
81
+ )
82
+ if audio:
83
+ wav_bytes = audio # component returns WAV bytes
84
+ source_hint = "microphone"
85
+ st.audio(wav_bytes, format="audio/wav")
86
+ except Exception:
87
+ st.info("Recorder component not available; please use the Upload tab.")
88
+
89
+ with tab_up:
90
+ f = st.file_uploader("Upload an audio file (wav/mp3/m4a)", type=["wav","mp3","m4a","aac"])
91
+ if f is not None:
92
+ wav_bytes = f.read()
93
+ source_hint = "upload"
94
+ st.audio(wav_bytes)
95
+
96
+ st.markdown("---")
97
+ run = st.button("🔍 Analyze", use_container_width=True, type="primary", disabled=wav_bytes is None)
98
+
99
+ with right:
100
+ st.subheader("Results")
101
+ placeholder = st.empty()
102
+
103
+ if run and wav_bytes:
104
+ with st.spinner("Analyzing…"):
105
+ proba, exp = analyze(wav_bytes, source_hint or "auto")
106
+
107
+ ph = proba["human"]; pa = proba["ai"]
108
+ label = proba["label"].upper()
109
+ thr = proba.get("threshold", 0.5)
110
+ rule = proba.get("decision", "threshold")
111
+ rscore = proba.get("replay_score", None)
112
+ thr_src = proba.get("threshold_source", "—")
113
+
114
+ col1, col2, col3 = st.columns(3)
115
+ with col1:
116
+ st.metric("Human", f"{ph*100:.1f} %")
117
+ with col2:
118
+ st.metric("AI", f"{pa*100:.1f} %")
119
+ with col3:
120
+ color = "#22c55e" if label=="HUMAN" else "#fb7185"
121
+ st.markdown(f"**Final Label:** <span style='color:{color}'>{label}</span>", unsafe_allow_html=True)
122
+ st.caption(f"thr({thr_src})={thr:.2f} • rule={rule} • replay={('-' if rscore is None else f'{rscore:.2f}')}")
123
+
124
+ st.markdown("##### Explanation Heatmap")
125
+ cam = np.array(exp["cam"], dtype=np.float32)
126
+ st.image(cam_to_png_bytes(cam), caption="Spectrogram importance", use_column_width=True)
127
+
128
+ st.markdown("---")
129
+ with st.expander("Raw JSON (debug)"):
130
+ st.json({"proba": proba, "explain": {"cam_shape": list(cam.shape)}})
131
+
132
+ st.caption("Tip: If the mic recorder fails, upload a short 3–7s clip instead.")