Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- .env +2 -0
- .gitattributes +3 -35
- README.md +9 -17
- gen_clips.py +282 -0
- index.html +440 -0
- packages.txt +1 -0
- requirements.txt +28 -3
- streamlit_app.py +132 -0
.env
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ELEVEN_API_KEY=sk_a32c9d8914a5550267df4c3df132619cab9e4dbfb1de3e0c
|
| 2 |
+
ELEVEN_VOICE_ID=
|
.gitattributes
CHANGED
|
@@ -1,35 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
.pth filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
.pt filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,20 +1,12 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Voice Guard
|
| 3 |
-
emoji: 🚀
|
| 4 |
-
colorFrom: red
|
| 5 |
-
colorTo: red
|
| 6 |
-
sdk: docker
|
| 7 |
-
app_port: 8501
|
| 8 |
-
tags:
|
| 9 |
-
- streamlit
|
| 10 |
-
pinned: false
|
| 11 |
-
short_description: Streamlit template space
|
| 12 |
-
license: apache-2.0
|
| 13 |
-
---
|
| 14 |
|
| 15 |
-
#
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
+
# AI Voice Detector — Human vs AI Speech (Hack-Ready)
|
| 3 |
|
| 4 |
+
**Goal:** Detect whether an audio clip is **AI-generated or human** in real time, with an **explainable heatmap** over the spectrogram and **(optional) provenance check** via ElevenLabs API.
|
| 5 |
|
| 6 |
+
## Quickstart
|
| 7 |
+
|
| 8 |
+
```bash
|
| 9 |
+
python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate
|
| 10 |
+
pip install -r requirements.txt
|
| 11 |
+
python app/app.py
|
| 12 |
+
```
|
gen_clips.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
from app.elevenlabs_tools import generate_tts_dataset
|
| 5 |
+
from app.utils.convert_mp3_to_wav import mp3_to_wav16k
|
| 6 |
+
|
| 7 |
+
# --- Load API key ---
|
| 8 |
+
load_dotenv()
|
| 9 |
+
if not os.getenv("ELEVEN_API_KEY"):
|
| 10 |
+
raise ValueError("ELEVEN_API_KEY not found in .env file")
|
| 11 |
+
|
| 12 |
+
# --- Your ElevenLabs voices ---
|
| 13 |
+
VOICES = {
|
| 14 |
+
"Adam": "pNInz6obpgDQGcFmaJgB",
|
| 15 |
+
"Alice": "Xb7hH8MSUJpSbSDYk0k2",
|
| 16 |
+
"Aria": "9BWtsMINqrJLrRacOk9x",
|
| 17 |
+
"Brian": "nPczCjzI2devNBz1zQrb",
|
| 18 |
+
"Bill": "pqHfZKP75CvOlQylNhV4",
|
| 19 |
+
"Charlotte": "XB0fDUnXU5powFXDhCwa",
|
| 20 |
+
"Clyde": "2EiwWnXFnvU5JabPnv8n",
|
| 21 |
+
"Drew": "29vD33N1CtxCmqQRPOHJ",
|
| 22 |
+
"Freya": "jsCqWAovK2LkecY7zXl4",
|
| 23 |
+
"Gigi": "jBpfuIE2acCO8z3wKNLl",
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
MP3_ROOT = "data/raw/ai_mp3"
|
| 27 |
+
WAV_ROOT = "data/raw/ai"
|
| 28 |
+
|
| 29 |
+
# --- 200 sentences (20 per voice) ---
|
| 30 |
+
SENTS = {
|
| 31 |
+
# Alice = News Anchor (F) — 20
|
| 32 |
+
"Alice": [
|
| 33 |
+
"Good evening. Here are today's top stories from campus and around Arlington.",
|
| 34 |
+
"City officials approved new bike lanes, citing safety and climate benefits.",
|
| 35 |
+
"Temperatures rise tomorrow, with scattered showers likely late in the afternoon.",
|
| 36 |
+
"The Mavericks sealed a comeback win after a tense fourth quarter tonight.",
|
| 37 |
+
"Economists project steady growth this quarter despite lingering supply constraints.",
|
| 38 |
+
"Construction on Cooper Street resumes Monday; expect delays and posted detours.",
|
| 39 |
+
"A UTA research team announced a battery recycling breakthrough this morning.",
|
| 40 |
+
"Flights at DFW remain on schedule, with only minor delays reported.",
|
| 41 |
+
"Early voting saw record turnout across several precincts, officials confirmed.",
|
| 42 |
+
"That is the latest update; we will return with more at eleven.",
|
| 43 |
+
"Transit officials unveiled a pilot for free weekend rides across the city.",
|
| 44 |
+
"State health leaders reported declining flu cases headed into next week.",
|
| 45 |
+
"Energy prices dipped slightly today amid forecasts for milder temperatures.",
|
| 46 |
+
"The council passed a balanced budget, prioritizing schools and road repairs.",
|
| 47 |
+
"Local firefighters rescued a hiker after an overnight search near the lake.",
|
| 48 |
+
"A new scholarship program will support first-generation students beginning this fall.",
|
| 49 |
+
"Sports headlines: the women's team advances to the regional semifinals tomorrow.",
|
| 50 |
+
"Expect strong winds overnight; secure loose items and check travel advisories.",
|
| 51 |
+
"Police announced an amnesty weekend to safely turn in prohibited fireworks.",
|
| 52 |
+
"That wraps the hour; for breaking updates, follow our digital live blog.",
|
| 53 |
+
],
|
| 54 |
+
# Adam = Friendly Conversational (M) — 20
|
| 55 |
+
"Adam": [
|
| 56 |
+
"Hey, I grabbed coffee already. Want me to save you a seat?",
|
| 57 |
+
"I couldn't find your charger, but I left a spare cable on the desk.",
|
| 58 |
+
"Traffic is heavy near the stadium; let's park early and walk together.",
|
| 59 |
+
"Your demo looked great. The UI felt clean, fast, and friendly.",
|
| 60 |
+
"Let's split the grocery list: produce for you, pantry items for me.",
|
| 61 |
+
"I'll ping the group chat once I reach the venue, no worries.",
|
| 62 |
+
"Your slides are solid; add a quick metric slide before the demo.",
|
| 63 |
+
"The new cafe downtown has almond croissants that blew my mind.",
|
| 64 |
+
"I'm heading out now; text me if you need anything from Target.",
|
| 65 |
+
"Thanks again for yesterday. You genuinely saved our timeline.",
|
| 66 |
+
"I booked the study room at six; bring markers and sticky notes.",
|
| 67 |
+
"We can pair on tests tonight, then merge before midnight.",
|
| 68 |
+
"I'll water the plants while you're away; just leave the key.",
|
| 69 |
+
"Your playlist slapped; share it so I can loop it tomorrow.",
|
| 70 |
+
"The package arrived early, so I'll drop it off after class.",
|
| 71 |
+
"Let's meal prep Sunday afternoon and avoid takeout next week.",
|
| 72 |
+
"I left comments in the doc; happy to chat through suggestions.",
|
| 73 |
+
"Shall we run by the lake at seven and grab smoothies after?",
|
| 74 |
+
"I set the reminder; we'll check results first thing in the morning.",
|
| 75 |
+
"Great news: the refund cleared, and the receipt is in your email.",
|
| 76 |
+
],
|
| 77 |
+
# Clyde = British Formal (M) — 20
|
| 78 |
+
"Clyde": [
|
| 79 |
+
"Kindly ensure the documentation is reviewed before the committee convenes Thursday afternoon.",
|
| 80 |
+
"Your reservation is confirmed; a private room will be prepared upon your arrival.",
|
| 81 |
+
"Please submit the revised manuscript, adhering to the journal's formatting guidelines.",
|
| 82 |
+
"The seminar commences at nine precisely; late admittance may not be accommodated.",
|
| 83 |
+
"We appreciate your patience while maintenance completes the scheduled electrical inspection.",
|
| 84 |
+
"Do verify the figures; precision remains paramount in this investigation.",
|
| 85 |
+
"The board welcomes your proposal and invites a concise presentation next week.",
|
| 86 |
+
"Do accept my apologies; the courier appears to have misplaced the parcel.",
|
| 87 |
+
"The contract shall be executed once both parties acknowledge the amended clause.",
|
| 88 |
+
"I trust the arrangements meet your expectations; advise if alterations are required.",
|
| 89 |
+
"Minutes from the previous meeting are circulated for your timely acknowledgment.",
|
| 90 |
+
"Your membership will be renewed upon completion of the enclosed application.",
|
| 91 |
+
"The gallery preview opens at six; appropriate attire is kindly requested.",
|
| 92 |
+
"Please confer with procurement before engaging additional external suppliers.",
|
| 93 |
+
"We remain grateful for your counsel and continuing professional partnership.",
|
| 94 |
+
"The timetable reflects minor adjustments to accommodate laboratory availability.",
|
| 95 |
+
"Kindly return the archive keys to reception at the close of business.",
|
| 96 |
+
"A modest reception will follow the lecture in the Great Hall foyer.",
|
| 97 |
+
"Your diligence is noted; the supervisory panel commends your progress.",
|
| 98 |
+
"Should difficulties arise, do not hesitate to contact the department secretary.",
|
| 99 |
+
],
|
| 100 |
+
# Charlotte = Energetic Young (F) — 20
|
| 101 |
+
"Charlotte": [
|
| 102 |
+
"Let's go team, hack time! Push that commit and ship the killer feature.",
|
| 103 |
+
"I'm hyped for finals; caffeine plus playlists equals unstoppable study mode.",
|
| 104 |
+
"Your reel looked amazing; post it now before the algorithm naps.",
|
| 105 |
+
"The new sneakers dropped today, and the colors are ridiculously clean.",
|
| 106 |
+
"We are sprinting to the finish; grab snacks and let's smash these tasks.",
|
| 107 |
+
"Quick check-in: are we vibing with blue accents or neon gradients?",
|
| 108 |
+
"That trailer went hard; I'm watching the premiere on night one.",
|
| 109 |
+
"Toss me the aux; I have the perfect focus track for crunch.",
|
| 110 |
+
"Class got canceled; brunch and brainstorming at ten sound perfect.",
|
| 111 |
+
"Big win, everyone! Screenshots, gifs, and celebratory donuts for the squad.",
|
| 112 |
+
"Mic check at five, lights at six, and we go live at seven.",
|
| 113 |
+
"I updated the banner; the new glow makes the title pop.",
|
| 114 |
+
"Can we swap the hero image? The neon skyline absolutely slaps.",
|
| 115 |
+
"Tiny bug spotted; I'm patching it now and pushing a hotfix.",
|
| 116 |
+
"The vibe is immaculate; let's ride the momentum and overdeliver.",
|
| 117 |
+
"Okay, squad goals: demo flawless, judges smiling, trophy secured.",
|
| 118 |
+
"I queued the soundtrack; it builds perfectly into the reveal moment.",
|
| 119 |
+
"Let's loop the b-roll while we talk through the metrics slide.",
|
| 120 |
+
"The confetti emoji is ready; I am saving it for the finale.",
|
| 121 |
+
"Final stretch energy: deep breath, big smile, and hit deploy.",
|
| 122 |
+
],
|
| 123 |
+
# Freya = Calm Meditation (F) — 20
|
| 124 |
+
"Freya": [
|
| 125 |
+
"Breathe in gently, noticing cool air filling your chest and shoulders.",
|
| 126 |
+
"Exhale slowly, allowing the tension around your eyes to soften.",
|
| 127 |
+
"Let your attention rest on the rhythm of your breath, steady and quiet.",
|
| 128 |
+
"Imagine warm sunlight touching your face, inviting ease into your morning.",
|
| 129 |
+
"Release today's concerns; your body knows how to return to balance.",
|
| 130 |
+
"Sense the ground beneath you, steady, supportive, and completely reliable.",
|
| 131 |
+
"With each inhale, welcome spaciousness; with each exhale, welcome calm.",
|
| 132 |
+
"Thank your busy mind for its effort, and invite it to rest.",
|
| 133 |
+
"Notice your heartbeat, patient and gentle, guiding you toward presence.",
|
| 134 |
+
"Carry this softness forward; you are grounded, clear, and ready.",
|
| 135 |
+
"Let your shoulders drop slightly, as if set down from a kind weight.",
|
| 136 |
+
"Picture a wide horizon; there is time to move with kindness.",
|
| 137 |
+
"Let stray thoughts pass like clouds, changing shape and drifting away.",
|
| 138 |
+
"Soften the jaw; let the tongue rest, calm and unhurried.",
|
| 139 |
+
"Feel the breath arrive, then leave, like waves returning to sea.",
|
| 140 |
+
"Offer gratitude to this moment, exactly as it is appearing.",
|
| 141 |
+
"Invite quiet where worry stood; let steady breath fill that space.",
|
| 142 |
+
"Imagine your spine lengthening, lifting you gently into balance.",
|
| 143 |
+
"Hold kindness in the chest; exhale and share it outward.",
|
| 144 |
+
"Return to the breath whenever the mind asks for a handhold.",
|
| 145 |
+
],
|
| 146 |
+
# Bill = Elderly Storyteller (M) — 20
|
| 147 |
+
"Bill": [
|
| 148 |
+
"When summer storms rolled in, we counted seconds between lightning and thunder.",
|
| 149 |
+
"Your grandmother kept recipes on cards, stained with sweet berry memories.",
|
| 150 |
+
"We built radios from kits, chasing distant stations after sundown.",
|
| 151 |
+
"The library smelled of paper and varnish, refuge on rainy afternoons.",
|
| 152 |
+
"I learned patience fixing bicycles, one stubborn bolt at a time.",
|
| 153 |
+
"We mapped the night sky, tracing stories across cold constellations.",
|
| 154 |
+
"A firm handshake once sealed agreements stronger than signed paper.",
|
| 155 |
+
"I still hear that tune drifting from open windows each spring.",
|
| 156 |
+
"The river taught respect; quiet water can hide a heavy current.",
|
| 157 |
+
"Keep your curiosity; it carries farther than cleverness alone.",
|
| 158 |
+
"We patched leaky roofs with laughter and tar on summer mornings.",
|
| 159 |
+
"Neighbors traded tools, stories, and peaches over the backyard fence.",
|
| 160 |
+
"A pocketknife and twine solved more problems than any fancy kit.",
|
| 161 |
+
"The best advice I got was simple: listen longer than you speak.",
|
| 162 |
+
"We saved bottle caps for games that lasted until the porch lights.",
|
| 163 |
+
"Patience is a bridge you build before the flood ever arrives.",
|
| 164 |
+
"The kindest teachers led with questions, not with thunder.",
|
| 165 |
+
"I kept a notebook of firsts: first snowfall, first bicycle, first apology.",
|
| 166 |
+
"Luck visits briefly; preparation invites it to stay for tea.",
|
| 167 |
+
"If you tend your friendships, they will flower even in winter.",
|
| 168 |
+
],
|
| 169 |
+
# Brian = Tech Presenter (M) — 20
|
| 170 |
+
"Brian": [
|
| 171 |
+
"Today we will deploy a tiny model to the edge with real-time inference.",
|
| 172 |
+
"Our pipeline standardizes audio at sixteen kilohertz for consistent features.",
|
| 173 |
+
"We log predictions and latencies, then visualize drift on weekly dashboards.",
|
| 174 |
+
"Feature store versioning prevents training-serving skew across environments.",
|
| 175 |
+
"We will run A B tests, tracking equal error rate and calibration.",
|
| 176 |
+
"The container image stays under two hundred megabytes for minimal cold starts.",
|
| 177 |
+
"Webhooks post verdicts to Slack, enabling rapid human review.",
|
| 178 |
+
"Augmentation simulates noise, speed changes, and codec artifacts during training.",
|
| 179 |
+
"Grad CAM highlights mel regions influencing final predictions the most.",
|
| 180 |
+
"We export reports as CSV and HTML for compliance and audits.",
|
| 181 |
+
"A rolling window monitors precision and recall across recent deployments.",
|
| 182 |
+
"Canary releases protect users while we validate new thresholds in production.",
|
| 183 |
+
"We encrypt artifacts at rest and rotate keys on a fixed cadence.",
|
| 184 |
+
"Offline evaluation includes ablations to isolate the contribution of features.",
|
| 185 |
+
"A retraining job triggers automatically when drift exceeds our alert budget.",
|
| 186 |
+
"We tag datasets with immutable hashes to ensure reproducibility.",
|
| 187 |
+
"Telemetry includes device model, operating system, and inference time buckets.",
|
| 188 |
+
"A fallback heuristic keeps the product usable if models misbehave.",
|
| 189 |
+
"Dashboards display confidence histograms to surface calibration issues.",
|
| 190 |
+
"We close with a demo and share the public notebook for transparency.",
|
| 191 |
+
],
|
| 192 |
+
# Gigi = Audiobook Warm (F) — 20
|
| 193 |
+
"Gigi": [
|
| 194 |
+
"She folded the letter carefully, as if gentleness might change its meaning.",
|
| 195 |
+
"The lighthouse turned, patient and steady, casting silver across the harbor.",
|
| 196 |
+
"He packed the last box, breathing dust, cedar, and something like courage.",
|
| 197 |
+
"Morning arrived with rain and hibiscus, petals bright against the fence.",
|
| 198 |
+
"The attic kept summers in jars, peaches, sunlight, and untold stories.",
|
| 199 |
+
"She traced the map's worn edges, wondering where the river begins.",
|
| 200 |
+
"He laughed softly, warm as cinnamon and autumn kitchens.",
|
| 201 |
+
"The train hummed northward, carrying secrets and a pocket of hopes.",
|
| 202 |
+
"Night gathered gently, a shawl of stars over the sleeping town.",
|
| 203 |
+
"She realized beginnings often wear the same shoes as endings.",
|
| 204 |
+
"Wind braided through the pines, whispering names they thought forgotten.",
|
| 205 |
+
"He watched the porch light flicker, a heartbeat for the quiet house.",
|
| 206 |
+
"They shared strawberries on the curb, red thumbs and easy grins.",
|
| 207 |
+
"She kept a seashell on the desk to remember the tides.",
|
| 208 |
+
"He learned patience from bread dough, rising in its own time.",
|
| 209 |
+
"The street woke slowly, clinking bottles and morning radios.",
|
| 210 |
+
"She carried a postcard everywhere, proof that distance could be kind.",
|
| 211 |
+
"Rain wrote cursive on the window, a lesson in soft persistence.",
|
| 212 |
+
"He folded the map again, trusting the road would teach directions.",
|
| 213 |
+
"They left the lamp lit, so tomorrow could find its way back.",
|
| 214 |
+
],
|
| 215 |
+
# Drew = Sports Commentator (M) — 20
|
| 216 |
+
"Drew": [
|
| 217 |
+
"He fires from deep, nothing but net, and the crowd erupts again!",
|
| 218 |
+
"The relay exchange was flawless, shaving precious milliseconds off the record.",
|
| 219 |
+
"She clears the bar with ease; that is a season best.",
|
| 220 |
+
"The keeper guesses right, stretches wide, and palms it away brilliantly.",
|
| 221 |
+
"They are pressing high now, forcing turnovers and controlling the tempo.",
|
| 222 |
+
"Off the corner, a thunderous header rockets into the upper ninety.",
|
| 223 |
+
"With two laps remaining, strategy and patience decide this championship.",
|
| 224 |
+
"The rookie delivers under pressure, a clutch performance in overtime.",
|
| 225 |
+
"He splits the defense, step backs, and drills a cold dagger.",
|
| 226 |
+
"The stadium is shaking; fans know they are witnessing something special.",
|
| 227 |
+
"A perfect pick frees the shooter, and he nails the mid-range.",
|
| 228 |
+
"She accelerates on the back stretch, pulling clear of the pack.",
|
| 229 |
+
"Defense rotates quickly and denies the easy layup at the rim.",
|
| 230 |
+
"The captain rallies the bench, demanding focus for the final minutes.",
|
| 231 |
+
"A crafty nutmeg draws gasps from the away section.",
|
| 232 |
+
"He reads the screen, jumps the passing lane, and steals it clean.",
|
| 233 |
+
"The serve kisses the line; challenge confirms an inch to spare.",
|
| 234 |
+
"She nails the dismount, and the judges reward the precision.",
|
| 235 |
+
"A booming kick flips field position and buys valuable time.",
|
| 236 |
+
"Timeout here; the next possession will write tonight's headline.",
|
| 237 |
+
],
|
| 238 |
+
# Aria = Childlike Curious (Neutral/F) — 20
|
| 239 |
+
"Aria": [
|
| 240 |
+
"Do clouds ever get tired from floating and making so many shapes?",
|
| 241 |
+
"If shadows are quiet, do they still count as parts of sunlight?",
|
| 242 |
+
"How many raindrops fit on a ladybug's back before it tickles?",
|
| 243 |
+
"Why do cats blink slowly, like they are telling secrets with eyelids?",
|
| 244 |
+
"If trees could vote, would they choose longer springs or louder birds?",
|
| 245 |
+
"Do stars practice shining, or are they born already busy and bright?",
|
| 246 |
+
"What happens to sound after it stops; does it nap somewhere cozy?",
|
| 247 |
+
"Can a thought be heavy enough to pull socks from drawers?",
|
| 248 |
+
"If books could taste words, would poems be chocolate or strawberries tonight?",
|
| 249 |
+
"When the moon hides, does it giggle behind clouds or play peekaboo?",
|
| 250 |
+
"Do puddles remember the sky they borrowed for a little while?",
|
| 251 |
+
"If a kite lets go, does the wind promise to bring it back?",
|
| 252 |
+
"Are whispers just brave words that prefer smaller adventures?",
|
| 253 |
+
"Can colors be friends, or do they argue over favorite sunsets?",
|
| 254 |
+
"If time wore shoes, would it sprint weekdays and stroll Sundays?",
|
| 255 |
+
"Do fireflies save their glow for midnight parties in the grass?",
|
| 256 |
+
"If snowflakes could vote, would they pick twirls or soft landings?",
|
| 257 |
+
"Where does a yawn travel to after everyone catches it?",
|
| 258 |
+
"Can a memory wave hello when you pass the same corner?",
|
| 259 |
+
"If a dream goes missing, does it send postcards from tomorrow?",
|
| 260 |
+
],
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
def main():
|
| 264 |
+
os.makedirs(MP3_ROOT, exist_ok=True)
|
| 265 |
+
os.makedirs(WAV_ROOT, exist_ok=True)
|
| 266 |
+
|
| 267 |
+
total = 0
|
| 268 |
+
for voice_name, lines in SENTS.items():
|
| 269 |
+
voice_id = VOICES[voice_name]
|
| 270 |
+
out_mp3 = os.path.join(MP3_ROOT, voice_name)
|
| 271 |
+
print(f"\nGenerating {len(lines)} clips for {voice_name} -> {out_mp3}")
|
| 272 |
+
generate_tts_dataset(texts=lines, voice_id=voice_id, out_dir=out_mp3)
|
| 273 |
+
print(f"Converting MP3 -> WAV(16k mono) into {WAV_ROOT} ...")
|
| 274 |
+
mp3_to_wav16k(src_dir=out_mp3, dst_dir=WAV_ROOT)
|
| 275 |
+
total += len(lines)
|
| 276 |
+
|
| 277 |
+
print(f"\nDone. Generated {total} clips total.")
|
| 278 |
+
print(f"MP3s in: {MP3_ROOT}")
|
| 279 |
+
print(f"WAVs in: {WAV_ROOT}")
|
| 280 |
+
|
| 281 |
+
if __name__ == "__main__":
|
| 282 |
+
main()
|
index.html
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
+
<title>Voice Guard — AI Voice Detector</title>
|
| 7 |
+
|
| 8 |
+
<!-- Tailwind (CDN) -->
|
| 9 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 10 |
+
<script>
|
| 11 |
+
tailwind.config = {
|
| 12 |
+
theme: {
|
| 13 |
+
extend: {
|
| 14 |
+
fontFamily: { sans: ['Inter','ui-sans-serif','system-ui'] },
|
| 15 |
+
colors: {
|
| 16 |
+
brand: { 400:'#ff8e34', 500:'#ff6a00' }
|
| 17 |
+
},
|
| 18 |
+
boxShadow: { glass: '0 10px 30px rgba(0,0,0,.35), inset 0 1px 0 rgba(255,255,255,.05)' }
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
</script>
|
| 23 |
+
|
| 24 |
+
<!-- Optional config file that sets window.BACKEND_URL -->
|
| 25 |
+
<script src="config.js"></script>
|
| 26 |
+
|
| 27 |
+
<!-- Inter font -->
|
| 28 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 29 |
+
|
| 30 |
+
<style>
|
| 31 |
+
.glass{
|
| 32 |
+
background: rgba(28,28,33,.55);
|
| 33 |
+
border: 1px solid rgba(255,255,255,.06);
|
| 34 |
+
box-shadow: var(--glass, 0 10px 30px rgba(0,0,0,.35)), inset 0 1px 0 rgba(255,255,255,.05);
|
| 35 |
+
backdrop-filter: blur(10px);
|
| 36 |
+
}
|
| 37 |
+
.donut{
|
| 38 |
+
--val:.34; --col:#ff6a00;
|
| 39 |
+
background: conic-gradient(var(--col) calc(var(--val)*360deg), #2c2c2c 0);
|
| 40 |
+
mask: radial-gradient(farthest-side, #0000 62%, #000 63%);
|
| 41 |
+
-webkit-mask: radial-gradient(farthest-side, #0000 62%, #000 63%);
|
| 42 |
+
transition: background .35s ease;
|
| 43 |
+
}
|
| 44 |
+
::-webkit-scrollbar { width: 10px; height: 10px; }
|
| 45 |
+
::-webkit-scrollbar-thumb { background: #23242a; border-radius: 999px; }
|
| 46 |
+
</style>
|
| 47 |
+
</head>
|
| 48 |
+
<body class="bg-[#0C0D10] text-white font-sans">
|
| 49 |
+
|
| 50 |
+
<!-- Soft gradient background -->
|
| 51 |
+
<div class="fixed inset-0 -z-10">
|
| 52 |
+
<div class="absolute -top-24 -right-24 w-[600px] h-[600px] rounded-full blur-3xl opacity-30"
|
| 53 |
+
style="background: radial-gradient(closest-side,#ff6a00,transparent 70%);"></div>
|
| 54 |
+
<div class="absolute -bottom-24 -left-20 w-[500px] h-[500px] rounded-full blur-3xl opacity-20"
|
| 55 |
+
style="background: radial-gradient(closest-side,#5eead4,transparent 70%);"></div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<!-- Header -->
|
| 59 |
+
<header class="mx-auto max-w-7xl px-6 py-4">
|
| 60 |
+
<div class="flex items-center justify-between rounded-2xl glass px-4 py-3">
|
| 61 |
+
<div class="flex items-center gap-3">
|
| 62 |
+
<div class="h-9 w-9 rounded-xl bg-brand-500/10 ring-1 ring-brand-500/40 grid place-content-center">
|
| 63 |
+
<!-- Shield waveform icon -->
|
| 64 |
+
<svg width="20" height="20" viewBox="0 0 24 24" fill="none">
|
| 65 |
+
<path d="M12 2l7 3v6c0 5.25-3.5 9.75-7 11-3.5-1.25-7-5.75-7-11V5l7-3Z" stroke="#ff6a00" stroke-width="1.5"/>
|
| 66 |
+
<path d="M7 12h2l1-4 2 8 1-6 1 3h3" stroke="#fff" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
|
| 67 |
+
</svg>
|
| 68 |
+
</div>
|
| 69 |
+
<div>
|
| 70 |
+
<h1 class="text-lg font-semibold">Voice Guard</h1>
|
| 71 |
+
<p class="text-xs text-white/50 -mt-0.5">Human vs AI Speech</p>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
|
| 75 |
+
<button id="analyzeBtn"
|
| 76 |
+
class="rounded-xl px-4 py-2.5 text-sm font-semibold bg-brand-500 hover:bg-brand-400 text-white shadow-lg">
|
| 77 |
+
Analyze
|
| 78 |
+
</button>
|
| 79 |
+
</div>
|
| 80 |
+
</header>
|
| 81 |
+
|
| 82 |
+
<!-- Main -->
|
| 83 |
+
<main class="mx-auto max-w-7xl px-6 pb-24">
|
| 84 |
+
<div class="grid grid-cols-12 gap-6">
|
| 85 |
+
|
| 86 |
+
<!-- LEFT: Inputs + status -->
|
| 87 |
+
<section class="col-span-4 space-y-6">
|
| 88 |
+
<div class="glass rounded-2xl p-5">
|
| 89 |
+
<div class="flex items-center justify-between">
|
| 90 |
+
<h2 class="text-sm font-semibold text-white/80">Input</h2>
|
| 91 |
+
<span class="text-xs px-2 py-1 rounded-full bg-white/5 border border-white/10">3–7s</span>
|
| 92 |
+
</div>
|
| 93 |
+
|
| 94 |
+
<div class="mt-4 grid grid-cols-2 gap-2">
|
| 95 |
+
<button id="tabMic" class="w-full rounded-xl border border-white/10 bg-white/5 px-3 py-2 text-sm font-medium">
|
| 96 |
+
Microphone
|
| 97 |
+
</button>
|
| 98 |
+
<button id="tabUpload" class="w-full rounded-xl border border-white/10 bg-transparent px-3 py-2 text-sm font-medium hover:bg-white/5">
|
| 99 |
+
Upload
|
| 100 |
+
</button>
|
| 101 |
+
</div>
|
| 102 |
+
|
| 103 |
+
<!-- Mic panel -->
|
| 104 |
+
<div id="micPanel" class="mt-4 space-y-4">
|
| 105 |
+
<div class="flex items-center gap-3">
|
| 106 |
+
<button id="recBtn" class="rounded-lg bg-white/10 hover:bg-white/20 px-3 py-2 text-sm border border-white/10">● Record</button>
|
| 107 |
+
<span id="recStatus" class="text-xs text-white/60">Idle</span>
|
| 108 |
+
</div>
|
| 109 |
+
<div class="rounded-xl border border-white/10 bg-black/30 h-24 overflow-hidden">
|
| 110 |
+
<canvas id="meter" class="w-full h-full"></canvas>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
|
| 114 |
+
<!-- Upload panel -->
|
| 115 |
+
<div id="uploadPanel" class="mt-4 hidden space-y-3">
|
| 116 |
+
<label class="block text-sm text-white/70">Choose audio (.wav/.mp3)</label>
|
| 117 |
+
<input id="fileInput" type="file" accept="audio/*"
|
| 118 |
+
class="w-full rounded-xl bg-black/30 border border-white/10 p-3 text-sm file:mr-4 file:rounded-lg file:border-0 file:bg-brand-500 file:px-4 file:py-2 file:text-white file:text-sm hover:file:bg-brand-400"/>
|
| 119 |
+
<p id="fileName" class="text-xs text-white/50"></p>
|
| 120 |
+
</div>
|
| 121 |
+
</div>
|
| 122 |
+
|
| 123 |
+
<div class="grid grid-cols-2 gap-4">
|
| 124 |
+
<div class="glass rounded-2xl p-4">
|
| 125 |
+
<p class="text-xs text-white/60">Source</p>
|
| 126 |
+
<p id="srcLabel" class="mt-1 text-lg font-semibold">Microphone</p>
|
| 127 |
+
</div>
|
| 128 |
+
<div class="glass rounded-2xl p-4">
|
| 129 |
+
<p class="text-xs text-white/60">Latency</p>
|
| 130 |
+
<p id="latency" class="mt-1 text-lg font-semibold">—</p>
|
| 131 |
+
</div>
|
| 132 |
+
</div>
|
| 133 |
+
|
| 134 |
+
<div class="glass rounded-2xl p-5">
|
| 135 |
+
<div class="flex items-center justify-between">
|
| 136 |
+
<h3 class="text-sm font-semibold text-white/80">Recent</h3>
|
| 137 |
+
<button id="clearRecent" class="text-xs text-white/50 hover:text-white/80">Clear</button>
|
| 138 |
+
</div>
|
| 139 |
+
<ul id="recentList" class="mt-3 space-y-2 max-h-60 overflow-auto"></ul>
|
| 140 |
+
</div>
|
| 141 |
+
</section>
|
| 142 |
+
|
| 143 |
+
<!-- RIGHT: Heatmap + donuts + label -->
|
| 144 |
+
<section class="col-span-8 space-y-6">
|
| 145 |
+
<div class="glass rounded-2xl p-5">
|
| 146 |
+
<div class="flex items-center justify-between">
|
| 147 |
+
<h2 class="text-sm font-semibold text-white/80">Explanation Heatmap</h2>
|
| 148 |
+
<div class="text-xs text-white/50">Spectrogram importance</div>
|
| 149 |
+
</div>
|
| 150 |
+
|
| 151 |
+
<div class="mt-4 h-[340px] rounded-xl border border-white/10 overflow-hidden bg-black/30 grid place-items-center">
|
| 152 |
+
<img id="heatmapImg" class="w-full h-full object-contain" alt="Heatmap"/>
|
| 153 |
+
<span id="heatmapPlaceholder" class="text-white/50 text-sm">No analysis yet</span>
|
| 154 |
+
</div>
|
| 155 |
+
</div>
|
| 156 |
+
|
| 157 |
+
<div class="grid grid-cols-3 gap-6">
|
| 158 |
+
<div class="glass rounded-2xl p-5">
|
| 159 |
+
<div class="flex items-center justify-between">
|
| 160 |
+
<p class="text-sm text-white/70">Human</p>
|
| 161 |
+
<span class="text-xs rounded-full bg-emerald-400/15 text-emerald-300 px-2 py-0.5 border border-emerald-400/30">Class 0</span>
|
| 162 |
+
</div>
|
| 163 |
+
<div class="mt-4 flex items-center gap-6">
|
| 164 |
+
<div class="donut size-28 rounded-full" id="donutHuman" style="--val:.50; --col:#34d399"></div>
|
| 165 |
+
<div>
|
| 166 |
+
<p class="text-3xl font-extrabold"><span id="humanPct">50</span>%</p>
|
| 167 |
+
<p class="text-xs text-white/60 mt-1">Likelihood</p>
|
| 168 |
+
</div>
|
| 169 |
+
</div>
|
| 170 |
+
</div>
|
| 171 |
+
|
| 172 |
+
<div class="glass rounded-2xl p-5">
|
| 173 |
+
<div class="flex items-center justify-between">
|
| 174 |
+
<p class="text-sm text-white/70">AI</p>
|
| 175 |
+
<span class="text-xs rounded-full bg-rose-400/15 text-rose-300 px-2 py-0.5 border border-rose-400/30">Class 1</span>
|
| 176 |
+
</div>
|
| 177 |
+
<div class="mt-4 flex items-center gap-6">
|
| 178 |
+
<div class="donut size-28 rounded-full" id="donutAI" style="--val:.50; --col:#fb7185"></div>
|
| 179 |
+
<div>
|
| 180 |
+
<p class="text-3xl font-extrabold"><span id="aiPct">50</span>%</p>
|
| 181 |
+
<p class="text-xs text-white/60 mt-1">Likelihood</p>
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
+
</div>
|
| 185 |
+
|
| 186 |
+
<div class="glass rounded-2xl p-5">
|
| 187 |
+
<p class="text-sm text-white/70">Final Label</p>
|
| 188 |
+
<div class="mt-3 flex items-center gap-3">
|
| 189 |
+
<div id="badgeLabel"
|
| 190 |
+
class="px-3 py-1.5 text-sm font-semibold rounded-xl border border-emerald-400/30 bg-emerald-400/15 text-emerald-300">
|
| 191 |
+
HUMAN
|
| 192 |
+
</div>
|
| 193 |
+
<span id="threshold" class="text-xs text-white/50">thr 0.60</span>
|
| 194 |
+
</div>
|
| 195 |
+
<!-- NEW: why line -->
|
| 196 |
+
<p id="whyText" class="text-xs text-white/60 mt-2"></p>
|
| 197 |
+
<p class="text-xs text-white/60 mt-3 leading-relaxed">
|
| 198 |
+
Click Analyze to send audio to the API and render the real heatmap.
|
| 199 |
+
</p>
|
| 200 |
+
</div>
|
| 201 |
+
</div>
|
| 202 |
+
</section>
|
| 203 |
+
|
| 204 |
+
</div>
|
| 205 |
+
</main>
|
| 206 |
+
|
| 207 |
+
<script>
|
| 208 |
+
// ===== Config =====
|
| 209 |
+
const BACKEND_URL = window.BACKEND_URL || "http://127.0.0.1:8000/analyze";
|
| 210 |
+
|
| 211 |
+
// ===== Elements =====
|
| 212 |
+
const tabMic = document.getElementById('tabMic');
|
| 213 |
+
const tabUpload = document.getElementById('tabUpload');
|
| 214 |
+
const micPanel = document.getElementById('micPanel');
|
| 215 |
+
const uploadPanel = document.getElementById('uploadPanel');
|
| 216 |
+
const srcLabel = document.getElementById('srcLabel');
|
| 217 |
+
|
| 218 |
+
const recBtn = document.getElementById('recBtn');
|
| 219 |
+
const recStatus = document.getElementById('recStatus');
|
| 220 |
+
|
| 221 |
+
const analyzeBtn = document.getElementById('analyzeBtn');
|
| 222 |
+
const latency = document.getElementById('latency');
|
| 223 |
+
|
| 224 |
+
const heatmapImg = document.getElementById('heatmapImg');
|
| 225 |
+
const heatmapPlaceholder = document.getElementById('heatmapPlaceholder');
|
| 226 |
+
|
| 227 |
+
const donutHuman = document.getElementById('donutHuman');
|
| 228 |
+
const donutAI = document.getElementById('donutAI');
|
| 229 |
+
const humanPct = document.getElementById('humanPct');
|
| 230 |
+
const aiPct = document.getElementById('aiPct');
|
| 231 |
+
const badge = document.getElementById('badgeLabel');
|
| 232 |
+
const thresholdEl = document.getElementById('threshold');
|
| 233 |
+
const whyText = document.getElementById('whyText');
|
| 234 |
+
|
| 235 |
+
const fileInput = document.getElementById('fileInput');
|
| 236 |
+
const fileName = document.getElementById('fileName');
|
| 237 |
+
const recentList = document.getElementById('recentList');
|
| 238 |
+
const clearRecent = document.getElementById('clearRecent');
|
| 239 |
+
|
| 240 |
+
// ===== Tabs =====
|
| 241 |
+
function setTab(which){
|
| 242 |
+
if(which==='mic'){
|
| 243 |
+
tabMic.classList.add('bg-white/5');
|
| 244 |
+
tabUpload.classList.remove('bg-white/5');
|
| 245 |
+
micPanel.classList.remove('hidden');
|
| 246 |
+
uploadPanel.classList.add('hidden');
|
| 247 |
+
srcLabel.textContent = 'Microphone';
|
| 248 |
+
}else{
|
| 249 |
+
tabUpload.classList.add('bg-white/5');
|
| 250 |
+
tabMic.classList.remove('bg-white/5');
|
| 251 |
+
uploadPanel.classList.remove('hidden');
|
| 252 |
+
micPanel.classList.add('hidden');
|
| 253 |
+
srcLabel.textContent = 'Upload';
|
| 254 |
+
}
|
| 255 |
+
}
|
| 256 |
+
tabMic.onclick = ()=> setTab('mic');
|
| 257 |
+
tabUpload.onclick = ()=> setTab('upload');
|
| 258 |
+
setTab('mic');
|
| 259 |
+
|
| 260 |
+
// ===== Upload label =====
|
| 261 |
+
fileInput.onchange = ()=> fileName.textContent = fileInput.files?.[0]?.name || '';
|
| 262 |
+
|
| 263 |
+
// ===== Mic + meter =====
|
| 264 |
+
const meterCanvas = document.getElementById('meter');
|
| 265 |
+
const mctx = meterCanvas.getContext('2d');
|
| 266 |
+
const resizeMeter = ()=>{ meterCanvas.width = meterCanvas.clientWidth; meterCanvas.height = meterCanvas.clientHeight; };
|
| 267 |
+
resizeMeter(); addEventListener('resize', resizeMeter);
|
| 268 |
+
|
| 269 |
+
let mediaRecorder, chunks=[], micStream=null, audioCtx=null, analyser=null, raf=null, lastRecordedBlob=null;
|
| 270 |
+
|
| 271 |
+
function loopMeter(){
|
| 272 |
+
const w=meterCanvas.width, h=meterCanvas.height;
|
| 273 |
+
const data = new Uint8Array(analyser.frequencyBinCount);
|
| 274 |
+
const draw = ()=>{
|
| 275 |
+
analyser.getByteFrequencyData(data);
|
| 276 |
+
mctx.fillStyle = '#0b0b0f'; mctx.fillRect(0,0,w,h);
|
| 277 |
+
const bars = 48, barW = w/bars;
|
| 278 |
+
for (let i=0;i<bars;i++){
|
| 279 |
+
const v=data[i]/255, bh=v*h*0.9, x=i*barW+2, y=h-bh;
|
| 280 |
+
mctx.fillStyle = `rgba(255,106,0,${0.35+0.65*v})`;
|
| 281 |
+
mctx.fillRect(x,y,barW-4,bh);
|
| 282 |
+
}
|
| 283 |
+
raf = requestAnimationFrame(draw);
|
| 284 |
+
};
|
| 285 |
+
draw();
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
async function startRecording(){
|
| 289 |
+
if(micStream) return;
|
| 290 |
+
micStream = await navigator.mediaDevices.getUserMedia({audio:true});
|
| 291 |
+
mediaRecorder = new MediaRecorder(micStream, {mimeType: 'audio/webm'});
|
| 292 |
+
mediaRecorder.ondataavailable = e => { if(e.data.size>0) chunks.push(e.data); };
|
| 293 |
+
mediaRecorder.onstop = () => { lastRecordedBlob = new Blob(chunks, {type:'audio/webm'}); chunks = []; recStatus.textContent='Recorded'; };
|
| 294 |
+
mediaRecorder.start();
|
| 295 |
+
recStatus.textContent = 'Recording…';
|
| 296 |
+
recBtn.textContent = '■ Stop';
|
| 297 |
+
|
| 298 |
+
audioCtx = new (window.AudioContext||window.webkitAudioContext)();
|
| 299 |
+
const source = audioCtx.createMediaStreamSource(micStream);
|
| 300 |
+
analyser = audioCtx.createAnalyser(); analyser.fftSize = 1024;
|
| 301 |
+
source.connect(analyser); loopMeter();
|
| 302 |
+
}
|
| 303 |
+
function stopRecording(){
|
| 304 |
+
if(!micStream) return;
|
| 305 |
+
mediaRecorder?.stop();
|
| 306 |
+
micStream.getTracks().forEach(t => t.stop());
|
| 307 |
+
micStream=null;
|
| 308 |
+
cancelAnimationFrame(raf); audioCtx.close();
|
| 309 |
+
recBtn.textContent='● Record'; recStatus.textContent='Idle';
|
| 310 |
+
}
|
| 311 |
+
recBtn.onclick = ()=> micStream ? stopRecording() : startRecording();
|
| 312 |
+
|
| 313 |
+
// ===== Audio helpers: decode -> resample(16k mono) -> PCM16 -> WAV =====
|
| 314 |
+
async function blobToPCM(blob){
|
| 315 |
+
const arr = await blob.arrayBuffer();
|
| 316 |
+
const ctx = new (window.AudioContext||window.webkitAudioContext)();
|
| 317 |
+
const buf = await ctx.decodeAudioData(arr);
|
| 318 |
+
let pcm = buf.getChannelData(0);
|
| 319 |
+
if (buf.numberOfChannels>1){
|
| 320 |
+
const r = buf.getChannelData(1);
|
| 321 |
+
const n = Math.min(pcm.length, r.length);
|
| 322 |
+
const m = new Float32Array(n);
|
| 323 |
+
for (let i=0;i<n;i++) m[i] = 0.5*(pcm[i]+r[i]);
|
| 324 |
+
pcm = m;
|
| 325 |
+
}
|
| 326 |
+
await ctx.close();
|
| 327 |
+
return {pcm, sr: buf.sampleRate};
|
| 328 |
+
}
|
| 329 |
+
function resampleLinear(pcm, fromSr, toSr=16000){
|
| 330 |
+
if (fromSr===toSr) return pcm;
|
| 331 |
+
const ratio=toSr/fromSr, n=Math.round(pcm.length*ratio), out=new Float32Array(n);
|
| 332 |
+
for (let i=0;i<n;i++){
|
| 333 |
+
const x=i/ratio, i0=Math.floor(x), i1=Math.min(i0+1, pcm.length-1), t=x-i0;
|
| 334 |
+
out[i]=(1-t)*pcm[i0]+t*pcm[i1];
|
| 335 |
+
}
|
| 336 |
+
return out;
|
| 337 |
+
}
|
| 338 |
+
function floatTo16(pcm){
|
| 339 |
+
const out = new Int16Array(pcm.length);
|
| 340 |
+
for (let i=0;i<pcm.length;i++){ let s=Math.max(-1,Math.min(1,pcm[i])); out[i]=s<0?s*0x8000:s*0x7fff; }
|
| 341 |
+
return out;
|
| 342 |
+
}
|
| 343 |
+
function wavEncodePCM16(int16, sampleRate=16000, numChannels=1){
|
| 344 |
+
const byteRate=sampleRate*numChannels*2, blockAlign=numChannels*2;
|
| 345 |
+
const buffer=new ArrayBuffer(44 + int16.length*2), view=new DataView(buffer); let off=0;
|
| 346 |
+
const WU8=s=>{for(let i=0;i<s.length;i++) view.setUint8(off++, s.charCodeAt(i));}
|
| 347 |
+
const W32=v=>{view.setUint32(off,v,true); off+=4}, W16=v=>{view.setUint16(off,v,true); off+=2}
|
| 348 |
+
WU8('RIFF'); W32(36+int16.length*2); WU8('WAVE'); WU8('fmt '); W32(16);
|
| 349 |
+
W16(1); W16(numChannels); W32(sampleRate); W32(byteRate); W16(blockAlign); W16(16);
|
| 350 |
+
WU8('data'); W32(int16.length*2);
|
| 351 |
+
new Int16Array(buffer,44).set(int16);
|
| 352 |
+
return new Blob([buffer], {type:'audio/wav'});
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
// ===== UI helpers =====
|
| 356 |
+
function setBadgeFromBackend(out){
|
| 357 |
+
const isAI = (out.label || '').toLowerCase() === 'ai';
|
| 358 |
+
badge.textContent = isAI ? 'AI' : 'HUMAN';
|
| 359 |
+
badge.className = isAI
|
| 360 |
+
? "px-3 py-1.5 text-sm font-semibold rounded-xl border border-rose-400/30 bg-rose-400/15 text-rose-300"
|
| 361 |
+
: "px-3 py-1.5 text-sm font-semibold rounded-xl border border-emerald-400/30 bg-emerald-400/15 text-emerald-300";
|
| 362 |
+
}
|
| 363 |
+
function setWhyLine(out){
|
| 364 |
+
const src = out.threshold_source || '—';
|
| 365 |
+
const dec = out.decision || 'threshold';
|
| 366 |
+
const rs = (typeof out.replay_score === 'number') ? out.replay_score.toFixed(2) : '—';
|
| 367 |
+
const aiP = (out.ai*100).toFixed(1);
|
| 368 |
+
const thr = out.threshold;
|
| 369 |
+
const thrPct = (thr*100).toFixed(0);
|
| 370 |
+
const margin = (out.ai - thr).toFixed(2);
|
| 371 |
+
whyText.textContent = `Decision: ${dec} | AI=${aiP}% | thr(${src})=${thrPct}% | margin=${margin} | replay=${rs}`;
|
| 372 |
+
}
|
| 373 |
+
function addRecent({src,label,ph,pa}){
|
| 374 |
+
const li = document.createElement('li');
|
| 375 |
+
li.className = "flex items-center justify-between rounded-xl border border-white/10 bg-white/5 px-3 py-2";
|
| 376 |
+
li.innerHTML = `
|
| 377 |
+
<div class="flex items-center gap-3">
|
| 378 |
+
<span class="text-xs px-2 py-0.5 rounded-full border ${src==='Mic'?'border-indigo-400/40 bg-indigo-400/15 text-indigo-300':'border-amber-400/40 bg-amber-400/15 text-amber-300'}">${src}</span>
|
| 379 |
+
<span class="text-sm">${Math.round(ph*100)}% human / ${Math.round(pa*100)}% AI</span>
|
| 380 |
+
</div>
|
| 381 |
+
<span class="text-xs px-2 py-0.5 rounded-lg ${label==='AI'?'bg-rose-400/15 text-rose-300 border border-rose-400/30':'bg-emerald-400/15 text-emerald-300 border border-emerald-400/30'}">${label}</span>
|
| 382 |
+
`;
|
| 383 |
+
recentList.prepend(li);
|
| 384 |
+
}
|
| 385 |
+
clearRecent.onclick = ()=> { recentList.innerHTML=''; };
|
| 386 |
+
|
| 387 |
+
// ===== Analyze =====
|
| 388 |
+
analyzeBtn.onclick = async ()=>{
|
| 389 |
+
const t0 = performance.now();
|
| 390 |
+
try{
|
| 391 |
+
const isMic = !micPanel.classList.contains('hidden');
|
| 392 |
+
let blob = null;
|
| 393 |
+
if (isMic){
|
| 394 |
+
if(!lastRecordedBlob){ alert('Record 3–7 seconds first.'); return; }
|
| 395 |
+
blob = lastRecordedBlob;
|
| 396 |
+
} else {
|
| 397 |
+
if(!fileInput.files?.length){ alert('Choose an audio file.'); return; }
|
| 398 |
+
blob = fileInput.files[0];
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
// Decode -> resample -> WAV
|
| 402 |
+
const {pcm, sr} = await blobToPCM(blob);
|
| 403 |
+
const pcm16k = resampleLinear(pcm, sr, 16000);
|
| 404 |
+
const int16 = floatTo16(pcm16k);
|
| 405 |
+
const wavBlob = wavEncodePCM16(int16, 16000, 1);
|
| 406 |
+
|
| 407 |
+
const form = new FormData();
|
| 408 |
+
form.append('file', new File([wavBlob], 'audio.wav', {type:'audio/wav'}));
|
| 409 |
+
form.append('source_hint', isMic ? 'microphone' : 'upload');
|
| 410 |
+
|
| 411 |
+
const res = await fetch(BACKEND_URL, { method:'POST', body: form });
|
| 412 |
+
if(!res.ok) throw new Error(`API ${res.status}`);
|
| 413 |
+
const out = await res.json();
|
| 414 |
+
|
| 415 |
+
// Heatmap
|
| 416 |
+
heatmapImg.src = out.heatmap_b64;
|
| 417 |
+
heatmapPlaceholder.style.display = 'none';
|
| 418 |
+
|
| 419 |
+
// Donuts & numbers
|
| 420 |
+
const ph = out.human, pa = out.ai, thr = out.threshold;
|
| 421 |
+
humanPct.textContent = Math.round(ph*100);
|
| 422 |
+
aiPct.textContent = Math.round(pa*100);
|
| 423 |
+
donutHuman.style.setProperty('--val', ph.toFixed(3));
|
| 424 |
+
donutAI.style.setProperty('--val', pa.toFixed(3));
|
| 425 |
+
thresholdEl.textContent = `thr ${thr.toFixed(2)}`;
|
| 426 |
+
|
| 427 |
+
// Final label: TRUST BACKEND
|
| 428 |
+
setBadgeFromBackend(out);
|
| 429 |
+
setWhyLine(out);
|
| 430 |
+
|
| 431 |
+
latency.textContent = `${Math.round(performance.now()-t0)} ms`;
|
| 432 |
+
addRecent({src: isMic?'Mic':'Upload', label: out.label.toUpperCase(), ph, pa});
|
| 433 |
+
}catch(err){
|
| 434 |
+
console.error(err);
|
| 435 |
+
alert('Analyze failed. Check console & backend URL in config.js');
|
| 436 |
+
}
|
| 437 |
+
};
|
| 438 |
+
</script>
|
| 439 |
+
</body>
|
| 440 |
+
</html>
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libsndfile1
|
requirements.txt
CHANGED
|
@@ -1,3 +1,28 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.2.0
|
| 2 |
+
torchaudio>=2.2.0
|
| 3 |
+
numpy>=1.26.4
|
| 4 |
+
scipy>=1.11.4
|
| 5 |
+
librosa>=0.10.1
|
| 6 |
+
soundfile>=0.12.1
|
| 7 |
+
audiomentations>=0.37.0
|
| 8 |
+
gradio>=4.44.0
|
| 9 |
+
fastapi>=0.111.0
|
| 10 |
+
uvicorn>=0.30.0
|
| 11 |
+
python-dotenv>=1.0.1
|
| 12 |
+
pydantic>=2.6.4
|
| 13 |
+
requests>=2.32.0
|
| 14 |
+
matplotlib>=3.8.4
|
| 15 |
+
# faster-whisper
|
| 16 |
+
# speechbrain
|
| 17 |
+
black>=24.4.2
|
| 18 |
+
fastapi
|
| 19 |
+
uvicorn[standard]
|
| 20 |
+
python-multipart
|
| 21 |
+
pillow
|
| 22 |
+
matplotlib
|
| 23 |
+
transformers
|
| 24 |
+
torchaudio
|
| 25 |
+
soundfile
|
| 26 |
+
audiomentations
|
| 27 |
+
streamlit>=1.33
|
| 28 |
+
audio-recorder-streamlit==0.0.8
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# streamlit_app.py
|
| 2 |
+
import os, io, base64, urllib.request, pathlib
|
| 3 |
+
import numpy as np
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from PIL import Image
|
| 6 |
+
from matplotlib import cm
|
| 7 |
+
|
| 8 |
+
# ------- wiring to your detector -------
|
| 9 |
+
# We prefer the wav2vec2 detector; fall back to the CNN one if needed.
|
| 10 |
+
BACKENDS_TRY = ["app.inference_wav2vec", "app.inference"]
|
| 11 |
+
Detector = None
|
| 12 |
+
err = None
|
| 13 |
+
for mod in BACKENDS_TRY:
|
| 14 |
+
try:
|
| 15 |
+
Detector = __import__(mod, fromlist=["Detector"]).Detector
|
| 16 |
+
BREAK = True
|
| 17 |
+
break
|
| 18 |
+
except Exception as e:
|
| 19 |
+
err = e
|
| 20 |
+
if Detector is None:
|
| 21 |
+
st.error("Could not import Detector from app/. Make sure your repo contains app/inference_wav2vec.py (or app/inference.py).")
|
| 22 |
+
st.stop()
|
| 23 |
+
|
| 24 |
+
# ------- config / weights -------
|
| 25 |
+
def ensure_weights():
|
| 26 |
+
wp = os.environ.get("MODEL_WEIGHTS_PATH", st.secrets.get("MODEL_WEIGHTS_PATH", "app/models/weights/wav2vec2_classifier.pth"))
|
| 27 |
+
url = os.environ.get("MODEL_WEIGHTS_URL", st.secrets.get("MODEL_WEIGHTS_URL", ""))
|
| 28 |
+
|
| 29 |
+
if url and not os.path.exists(wp):
|
| 30 |
+
pathlib.Path(wp).parent.mkdir(parents=True, exist_ok=True)
|
| 31 |
+
with st.spinner("Downloading model weights…"):
|
| 32 |
+
urllib.request.urlretrieve(url, wp)
|
| 33 |
+
return wp
|
| 34 |
+
|
| 35 |
+
@st.cache_resource
|
| 36 |
+
def load_detector():
|
| 37 |
+
wp = ensure_weights()
|
| 38 |
+
det = Detector(weights_path=wp)
|
| 39 |
+
return det
|
| 40 |
+
|
| 41 |
+
det = load_detector()
|
| 42 |
+
|
| 43 |
+
# ------- helpers -------
|
| 44 |
+
def cam_to_png_bytes(cam: np.ndarray) -> bytes:
|
| 45 |
+
cam = np.array(cam, dtype=np.float32)
|
| 46 |
+
cam = np.clip(cam, 0.0, 1.0)
|
| 47 |
+
rgb = (cm.magma(cam)[..., :3] * 255).astype(np.uint8)
|
| 48 |
+
im = Image.fromarray(rgb)
|
| 49 |
+
buf = io.BytesIO()
|
| 50 |
+
im.save(buf, format="PNG")
|
| 51 |
+
return buf.getvalue()
|
| 52 |
+
|
| 53 |
+
def analyze(wav_bytes: bytes, source_hint: str):
|
| 54 |
+
proba = det.predict_proba(wav_bytes, source_hint=source_hint)
|
| 55 |
+
exp = det.explain(wav_bytes, source_hint=source_hint)
|
| 56 |
+
return proba, exp
|
| 57 |
+
|
| 58 |
+
# ------- UI -------
|
| 59 |
+
st.set_page_config(page_title="Voice Guard", page_icon="🛡️", layout="wide")
|
| 60 |
+
st.title("🛡️ Voice Guard — Human vs AI Speech (Streamlit)")
|
| 61 |
+
|
| 62 |
+
left, right = st.columns([1,2])
|
| 63 |
+
|
| 64 |
+
with left:
|
| 65 |
+
st.subheader("Input")
|
| 66 |
+
tab_rec, tab_up = st.tabs(["🎙️ Microphone", "📁 Upload"])
|
| 67 |
+
|
| 68 |
+
wav_bytes = None
|
| 69 |
+
source_hint = None
|
| 70 |
+
|
| 71 |
+
with tab_rec:
|
| 72 |
+
st.caption("If the recorder component fails on your browser, use Upload.")
|
| 73 |
+
try:
|
| 74 |
+
# light, zero-config recorder component
|
| 75 |
+
from audio_recorder_streamlit import audio_recorder
|
| 76 |
+
audio = audio_recorder(
|
| 77 |
+
text="Record",
|
| 78 |
+
recording_color="#ff6a00",
|
| 79 |
+
neutral_color="#2b2b2b",
|
| 80 |
+
icon_size="2x",
|
| 81 |
+
)
|
| 82 |
+
if audio:
|
| 83 |
+
wav_bytes = audio # component returns WAV bytes
|
| 84 |
+
source_hint = "microphone"
|
| 85 |
+
st.audio(wav_bytes, format="audio/wav")
|
| 86 |
+
except Exception:
|
| 87 |
+
st.info("Recorder component not available; please use the Upload tab.")
|
| 88 |
+
|
| 89 |
+
with tab_up:
|
| 90 |
+
f = st.file_uploader("Upload an audio file (wav/mp3/m4a)", type=["wav","mp3","m4a","aac"])
|
| 91 |
+
if f is not None:
|
| 92 |
+
wav_bytes = f.read()
|
| 93 |
+
source_hint = "upload"
|
| 94 |
+
st.audio(wav_bytes)
|
| 95 |
+
|
| 96 |
+
st.markdown("---")
|
| 97 |
+
run = st.button("🔍 Analyze", use_container_width=True, type="primary", disabled=wav_bytes is None)
|
| 98 |
+
|
| 99 |
+
with right:
|
| 100 |
+
st.subheader("Results")
|
| 101 |
+
placeholder = st.empty()
|
| 102 |
+
|
| 103 |
+
if run and wav_bytes:
|
| 104 |
+
with st.spinner("Analyzing…"):
|
| 105 |
+
proba, exp = analyze(wav_bytes, source_hint or "auto")
|
| 106 |
+
|
| 107 |
+
ph = proba["human"]; pa = proba["ai"]
|
| 108 |
+
label = proba["label"].upper()
|
| 109 |
+
thr = proba.get("threshold", 0.5)
|
| 110 |
+
rule = proba.get("decision", "threshold")
|
| 111 |
+
rscore = proba.get("replay_score", None)
|
| 112 |
+
thr_src = proba.get("threshold_source", "—")
|
| 113 |
+
|
| 114 |
+
col1, col2, col3 = st.columns(3)
|
| 115 |
+
with col1:
|
| 116 |
+
st.metric("Human", f"{ph*100:.1f} %")
|
| 117 |
+
with col2:
|
| 118 |
+
st.metric("AI", f"{pa*100:.1f} %")
|
| 119 |
+
with col3:
|
| 120 |
+
color = "#22c55e" if label=="HUMAN" else "#fb7185"
|
| 121 |
+
st.markdown(f"**Final Label:** <span style='color:{color}'>{label}</span>", unsafe_allow_html=True)
|
| 122 |
+
st.caption(f"thr({thr_src})={thr:.2f} • rule={rule} • replay={('-' if rscore is None else f'{rscore:.2f}')}")
|
| 123 |
+
|
| 124 |
+
st.markdown("##### Explanation Heatmap")
|
| 125 |
+
cam = np.array(exp["cam"], dtype=np.float32)
|
| 126 |
+
st.image(cam_to_png_bytes(cam), caption="Spectrogram importance", use_column_width=True)
|
| 127 |
+
|
| 128 |
+
st.markdown("---")
|
| 129 |
+
with st.expander("Raw JSON (debug)"):
|
| 130 |
+
st.json({"proba": proba, "explain": {"cam_shape": list(cam.shape)}})
|
| 131 |
+
|
| 132 |
+
st.caption("Tip: If the mic recorder fails, upload a short 3–7s clip instead.")
|