Spaces:

varunkul
/

Voice-guard

Sleeping

App Files Files Community

varunkul commited on Oct 5

Commit

6ecef58

verified ·

1 Parent(s): 3632e7a

Upload 8 files

Browse files

Files changed (8) hide show

.env +2 -0
.gitattributes +3 -35
README.md +9 -17
gen_clips.py +282 -0
index.html +440 -0
packages.txt +1 -0
requirements.txt +28 -3
streamlit_app.py +132 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ELEVEN_API_KEY=sk_a32c9d8914a5550267df4c3df132619cab9e4dbfb1de3e0c
2	+ ELEVEN_VOICE_ID=

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+.pth filter=lfs diff=lfs merge=lfs -text
+.pt filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,20 +1,12 @@
----
-title: Voice Guard
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: Streamlit template space
-license: apache-2.0
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+# AI Voice Detector — Human vs AI Speech (Hack-Ready)
+**Goal:** Detect whether an audio clip is **AI-generated or human** in real time, with an **explainable heatmap** over the spectrogram and **(optional) provenance check** via ElevenLabs API.
+## Quickstart
+```bash
+python -m venv .venv && source .venv/bin/activate      # Windows: .venv\Scripts\activate
+pip install -r requirements.txt
+python app/app.py
+```

gen_clips.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import os
+from dotenv import load_dotenv
+from app.elevenlabs_tools import generate_tts_dataset
+from app.utils.convert_mp3_to_wav import mp3_to_wav16k
+# --- Load API key ---
+load_dotenv()
+if not os.getenv("ELEVEN_API_KEY"):
+    raise ValueError("ELEVEN_API_KEY not found in .env file")
+# --- Your ElevenLabs voices ---
+VOICES = {
+    "Adam":      "pNInz6obpgDQGcFmaJgB",
+    "Alice":     "Xb7hH8MSUJpSbSDYk0k2",
+    "Aria":      "9BWtsMINqrJLrRacOk9x",
+    "Brian":     "nPczCjzI2devNBz1zQrb",
+    "Bill":      "pqHfZKP75CvOlQylNhV4",
+    "Charlotte": "XB0fDUnXU5powFXDhCwa",
+    "Clyde":     "2EiwWnXFnvU5JabPnv8n",
+    "Drew":      "29vD33N1CtxCmqQRPOHJ",
+    "Freya":     "jsCqWAovK2LkecY7zXl4",
+    "Gigi":      "jBpfuIE2acCO8z3wKNLl",
+}
+MP3_ROOT = "data/raw/ai_mp3"
+WAV_ROOT = "data/raw/ai"
+# --- 200 sentences (20 per voice) ---
+SENTS = {
+    # Alice = News Anchor (F) — 20
+    "Alice": [
+        "Good evening. Here are today's top stories from campus and around Arlington.",
+        "City officials approved new bike lanes, citing safety and climate benefits.",
+        "Temperatures rise tomorrow, with scattered showers likely late in the afternoon.",
+        "The Mavericks sealed a comeback win after a tense fourth quarter tonight.",
+        "Economists project steady growth this quarter despite lingering supply constraints.",
+        "Construction on Cooper Street resumes Monday; expect delays and posted detours.",
+        "A UTA research team announced a battery recycling breakthrough this morning.",
+        "Flights at DFW remain on schedule, with only minor delays reported.",
+        "Early voting saw record turnout across several precincts, officials confirmed.",
+        "That is the latest update; we will return with more at eleven.",
+        "Transit officials unveiled a pilot for free weekend rides across the city.",
+        "State health leaders reported declining flu cases headed into next week.",
+        "Energy prices dipped slightly today amid forecasts for milder temperatures.",
+        "The council passed a balanced budget, prioritizing schools and road repairs.",
+        "Local firefighters rescued a hiker after an overnight search near the lake.",
+        "A new scholarship program will support first-generation students beginning this fall.",
+        "Sports headlines: the women's team advances to the regional semifinals tomorrow.",
+        "Expect strong winds overnight; secure loose items and check travel advisories.",
+        "Police announced an amnesty weekend to safely turn in prohibited fireworks.",
+        "That wraps the hour; for breaking updates, follow our digital live blog.",
+    ],
+    # Adam = Friendly Conversational (M) — 20
+    "Adam": [
+        "Hey, I grabbed coffee already. Want me to save you a seat?",
+        "I couldn't find your charger, but I left a spare cable on the desk.",
+        "Traffic is heavy near the stadium; let's park early and walk together.",
+        "Your demo looked great. The UI felt clean, fast, and friendly.",
+        "Let's split the grocery list: produce for you, pantry items for me.",
+        "I'll ping the group chat once I reach the venue, no worries.",
+        "Your slides are solid; add a quick metric slide before the demo.",
+        "The new cafe downtown has almond croissants that blew my mind.",
+        "I'm heading out now; text me if you need anything from Target.",
+        "Thanks again for yesterday. You genuinely saved our timeline.",
+        "I booked the study room at six; bring markers and sticky notes.",
+        "We can pair on tests tonight, then merge before midnight.",
+        "I'll water the plants while you're away; just leave the key.",
+        "Your playlist slapped; share it so I can loop it tomorrow.",
+        "The package arrived early, so I'll drop it off after class.",
+        "Let's meal prep Sunday afternoon and avoid takeout next week.",
+        "I left comments in the doc; happy to chat through suggestions.",
+        "Shall we run by the lake at seven and grab smoothies after?",
+        "I set the reminder; we'll check results first thing in the morning.",
+        "Great news: the refund cleared, and the receipt is in your email.",
+    ],
+    # Clyde = British Formal (M) — 20
+    "Clyde": [
+        "Kindly ensure the documentation is reviewed before the committee convenes Thursday afternoon.",
+        "Your reservation is confirmed; a private room will be prepared upon your arrival.",
+        "Please submit the revised manuscript, adhering to the journal's formatting guidelines.",
+        "The seminar commences at nine precisely; late admittance may not be accommodated.",
+        "We appreciate your patience while maintenance completes the scheduled electrical inspection.",
+        "Do verify the figures; precision remains paramount in this investigation.",
+        "The board welcomes your proposal and invites a concise presentation next week.",
+        "Do accept my apologies; the courier appears to have misplaced the parcel.",
+        "The contract shall be executed once both parties acknowledge the amended clause.",
+        "I trust the arrangements meet your expectations; advise if alterations are required.",
+        "Minutes from the previous meeting are circulated for your timely acknowledgment.",
+        "Your membership will be renewed upon completion of the enclosed application.",
+        "The gallery preview opens at six; appropriate attire is kindly requested.",
+        "Please confer with procurement before engaging additional external suppliers.",
+        "We remain grateful for your counsel and continuing professional partnership.",
+        "The timetable reflects minor adjustments to accommodate laboratory availability.",
+        "Kindly return the archive keys to reception at the close of business.",
+        "A modest reception will follow the lecture in the Great Hall foyer.",
+        "Your diligence is noted; the supervisory panel commends your progress.",
+        "Should difficulties arise, do not hesitate to contact the department secretary.",
+    ],
+    # Charlotte = Energetic Young (F) — 20
+    "Charlotte": [
+        "Let's go team, hack time! Push that commit and ship the killer feature.",
+        "I'm hyped for finals; caffeine plus playlists equals unstoppable study mode.",
+        "Your reel looked amazing; post it now before the algorithm naps.",
+        "The new sneakers dropped today, and the colors are ridiculously clean.",
+        "We are sprinting to the finish; grab snacks and let's smash these tasks.",
+        "Quick check-in: are we vibing with blue accents or neon gradients?",
+        "That trailer went hard; I'm watching the premiere on night one.",
+        "Toss me the aux; I have the perfect focus track for crunch.",
+        "Class got canceled; brunch and brainstorming at ten sound perfect.",
+        "Big win, everyone! Screenshots, gifs, and celebratory donuts for the squad.",
+        "Mic check at five, lights at six, and we go live at seven.",
+        "I updated the banner; the new glow makes the title pop.",
+        "Can we swap the hero image? The neon skyline absolutely slaps.",
+        "Tiny bug spotted; I'm patching it now and pushing a hotfix.",
+        "The vibe is immaculate; let's ride the momentum and overdeliver.",
+        "Okay, squad goals: demo flawless, judges smiling, trophy secured.",
+        "I queued the soundtrack; it builds perfectly into the reveal moment.",
+        "Let's loop the b-roll while we talk through the metrics slide.",
+        "The confetti emoji is ready; I am saving it for the finale.",
+        "Final stretch energy: deep breath, big smile, and hit deploy.",
+    ],
+    # Freya = Calm Meditation (F) — 20
+    "Freya": [
+        "Breathe in gently, noticing cool air filling your chest and shoulders.",
+        "Exhale slowly, allowing the tension around your eyes to soften.",
+        "Let your attention rest on the rhythm of your breath, steady and quiet.",
+        "Imagine warm sunlight touching your face, inviting ease into your morning.",
+        "Release today's concerns; your body knows how to return to balance.",
+        "Sense the ground beneath you, steady, supportive, and completely reliable.",
+        "With each inhale, welcome spaciousness; with each exhale, welcome calm.",
+        "Thank your busy mind for its effort, and invite it to rest.",
+        "Notice your heartbeat, patient and gentle, guiding you toward presence.",
+        "Carry this softness forward; you are grounded, clear, and ready.",
+        "Let your shoulders drop slightly, as if set down from a kind weight.",
+        "Picture a wide horizon; there is time to move with kindness.",
+        "Let stray thoughts pass like clouds, changing shape and drifting away.",
+        "Soften the jaw; let the tongue rest, calm and unhurried.",
+        "Feel the breath arrive, then leave, like waves returning to sea.",
+        "Offer gratitude to this moment, exactly as it is appearing.",
+        "Invite quiet where worry stood; let steady breath fill that space.",
+        "Imagine your spine lengthening, lifting you gently into balance.",
+        "Hold kindness in the chest; exhale and share it outward.",
+        "Return to the breath whenever the mind asks for a handhold.",
+    ],
+    # Bill = Elderly Storyteller (M) — 20
+    "Bill": [
+        "When summer storms rolled in, we counted seconds between lightning and thunder.",
+        "Your grandmother kept recipes on cards, stained with sweet berry memories.",
+        "We built radios from kits, chasing distant stations after sundown.",
+        "The library smelled of paper and varnish, refuge on rainy afternoons.",
+        "I learned patience fixing bicycles, one stubborn bolt at a time.",
+        "We mapped the night sky, tracing stories across cold constellations.",
+        "A firm handshake once sealed agreements stronger than signed paper.",
+        "I still hear that tune drifting from open windows each spring.",
+        "The river taught respect; quiet water can hide a heavy current.",
+        "Keep your curiosity; it carries farther than cleverness alone.",
+        "We patched leaky roofs with laughter and tar on summer mornings.",
+        "Neighbors traded tools, stories, and peaches over the backyard fence.",
+        "A pocketknife and twine solved more problems than any fancy kit.",
+        "The best advice I got was simple: listen longer than you speak.",
+        "We saved bottle caps for games that lasted until the porch lights.",
+        "Patience is a bridge you build before the flood ever arrives.",
+        "The kindest teachers led with questions, not with thunder.",
+        "I kept a notebook of firsts: first snowfall, first bicycle, first apology.",
+        "Luck visits briefly; preparation invites it to stay for tea.",
+        "If you tend your friendships, they will flower even in winter.",
+    ],
+    # Brian = Tech Presenter (M) — 20
+    "Brian": [
+        "Today we will deploy a tiny model to the edge with real-time inference.",
+        "Our pipeline standardizes audio at sixteen kilohertz for consistent features.",
+        "We log predictions and latencies, then visualize drift on weekly dashboards.",
+        "Feature store versioning prevents training-serving skew across environments.",
+        "We will run A B tests, tracking equal error rate and calibration.",
+        "The container image stays under two hundred megabytes for minimal cold starts.",
+        "Webhooks post verdicts to Slack, enabling rapid human review.",
+        "Augmentation simulates noise, speed changes, and codec artifacts during training.",
+        "Grad CAM highlights mel regions influencing final predictions the most.",
+        "We export reports as CSV and HTML for compliance and audits.",
+        "A rolling window monitors precision and recall across recent deployments.",
+        "Canary releases protect users while we validate new thresholds in production.",
+        "We encrypt artifacts at rest and rotate keys on a fixed cadence.",
+        "Offline evaluation includes ablations to isolate the contribution of features.",
+        "A retraining job triggers automatically when drift exceeds our alert budget.",
+        "We tag datasets with immutable hashes to ensure reproducibility.",
+        "Telemetry includes device model, operating system, and inference time buckets.",
+        "A fallback heuristic keeps the product usable if models misbehave.",
+        "Dashboards display confidence histograms to surface calibration issues.",
+        "We close with a demo and share the public notebook for transparency.",
+    ],
+    # Gigi = Audiobook Warm (F) — 20
+    "Gigi": [
+        "She folded the letter carefully, as if gentleness might change its meaning.",
+        "The lighthouse turned, patient and steady, casting silver across the harbor.",
+        "He packed the last box, breathing dust, cedar, and something like courage.",
+        "Morning arrived with rain and hibiscus, petals bright against the fence.",
+        "The attic kept summers in jars, peaches, sunlight, and untold stories.",
+        "She traced the map's worn edges, wondering where the river begins.",
+        "He laughed softly, warm as cinnamon and autumn kitchens.",
+        "The train hummed northward, carrying secrets and a pocket of hopes.",
+        "Night gathered gently, a shawl of stars over the sleeping town.",
+        "She realized beginnings often wear the same shoes as endings.",
+        "Wind braided through the pines, whispering names they thought forgotten.",
+        "He watched the porch light flicker, a heartbeat for the quiet house.",
+        "They shared strawberries on the curb, red thumbs and easy grins.",
+        "She kept a seashell on the desk to remember the tides.",
+        "He learned patience from bread dough, rising in its own time.",
+        "The street woke slowly, clinking bottles and morning radios.",
+        "She carried a postcard everywhere, proof that distance could be kind.",
+        "Rain wrote cursive on the window, a lesson in soft persistence.",
+        "He folded the map again, trusting the road would teach directions.",
+        "They left the lamp lit, so tomorrow could find its way back.",
+    ],
+    # Drew = Sports Commentator (M) — 20
+    "Drew": [
+        "He fires from deep, nothing but net, and the crowd erupts again!",
+        "The relay exchange was flawless, shaving precious milliseconds off the record.",
+        "She clears the bar with ease; that is a season best.",
+        "The keeper guesses right, stretches wide, and palms it away brilliantly.",
+        "They are pressing high now, forcing turnovers and controlling the tempo.",
+        "Off the corner, a thunderous header rockets into the upper ninety.",
+        "With two laps remaining, strategy and patience decide this championship.",
+        "The rookie delivers under pressure, a clutch performance in overtime.",
+        "He splits the defense, step backs, and drills a cold dagger.",
+        "The stadium is shaking; fans know they are witnessing something special.",
+        "A perfect pick frees the shooter, and he nails the mid-range.",
+        "She accelerates on the back stretch, pulling clear of the pack.",
+        "Defense rotates quickly and denies the easy layup at the rim.",
+        "The captain rallies the bench, demanding focus for the final minutes.",
+        "A crafty nutmeg draws gasps from the away section.",
+        "He reads the screen, jumps the passing lane, and steals it clean.",
+        "The serve kisses the line; challenge confirms an inch to spare.",
+        "She nails the dismount, and the judges reward the precision.",
+        "A booming kick flips field position and buys valuable time.",
+        "Timeout here; the next possession will write tonight's headline.",
+    ],
+    # Aria = Childlike Curious (Neutral/F) — 20
+    "Aria": [
+        "Do clouds ever get tired from floating and making so many shapes?",
+        "If shadows are quiet, do they still count as parts of sunlight?",
+        "How many raindrops fit on a ladybug's back before it tickles?",
+        "Why do cats blink slowly, like they are telling secrets with eyelids?",
+        "If trees could vote, would they choose longer springs or louder birds?",
+        "Do stars practice shining, or are they born already busy and bright?",
+        "What happens to sound after it stops; does it nap somewhere cozy?",
+        "Can a thought be heavy enough to pull socks from drawers?",
+        "If books could taste words, would poems be chocolate or strawberries tonight?",
+        "When the moon hides, does it giggle behind clouds or play peekaboo?",
+        "Do puddles remember the sky they borrowed for a little while?",
+        "If a kite lets go, does the wind promise to bring it back?",
+        "Are whispers just brave words that prefer smaller adventures?",
+        "Can colors be friends, or do they argue over favorite sunsets?",
+        "If time wore shoes, would it sprint weekdays and stroll Sundays?",
+        "Do fireflies save their glow for midnight parties in the grass?",
+        "If snowflakes could vote, would they pick twirls or soft landings?",
+        "Where does a yawn travel to after everyone catches it?",
+        "Can a memory wave hello when you pass the same corner?",
+        "If a dream goes missing, does it send postcards from tomorrow?",
+    ],
+}
+def main():
+    os.makedirs(MP3_ROOT, exist_ok=True)
+    os.makedirs(WAV_ROOT, exist_ok=True)
+    total = 0
+    for voice_name, lines in SENTS.items():
+        voice_id = VOICES[voice_name]
+        out_mp3 = os.path.join(MP3_ROOT, voice_name)
+        print(f"\nGenerating {len(lines)} clips for {voice_name} -> {out_mp3}")
+        generate_tts_dataset(texts=lines, voice_id=voice_id, out_dir=out_mp3)
+        print(f"Converting MP3 -> WAV(16k mono) into {WAV_ROOT} ...")
+        mp3_to_wav16k(src_dir=out_mp3, dst_dir=WAV_ROOT)
+        total += len(lines)
+    print(f"\nDone. Generated {total} clips total.")
+    print(f"MP3s in: {MP3_ROOT}")
+    print(f"WAVs in: {WAV_ROOT}")
+if __name__ == "__main__":
+    main()

index.html ADDED Viewed

	@@ -0,0 +1,440 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width,initial-scale=1" />
+  <title>Voice Guard — AI Voice Detector</title>
+  <!-- Tailwind (CDN) -->
+  <script src="https://cdn.tailwindcss.com"></script>
+  <script>
+    tailwind.config = {
+      theme: {
+        extend: {
+          fontFamily: { sans: ['Inter','ui-sans-serif','system-ui'] },
+          colors: {
+            brand: { 400:'#ff8e34', 500:'#ff6a00' }
+          },
+          boxShadow: { glass: '0 10px 30px rgba(0,0,0,.35), inset 0 1px 0 rgba(255,255,255,.05)' }
+        }
+      }
+    }
+  </script>
+  <!-- Optional config file that sets window.BACKEND_URL -->
+  <script src="config.js"></script>
+  <!-- Inter font -->
+  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap" rel="stylesheet">
+  <style>
+    .glass{
+      background: rgba(28,28,33,.55);
+      border: 1px solid rgba(255,255,255,.06);
+      box-shadow: var(--glass, 0 10px 30px rgba(0,0,0,.35)), inset 0 1px 0 rgba(255,255,255,.05);
+      backdrop-filter: blur(10px);
+    }
+    .donut{
+      --val:.34; --col:#ff6a00;
+      background: conic-gradient(var(--col) calc(var(--val)*360deg), #2c2c2c 0);
+      mask: radial-gradient(farthest-side, #0000 62%, #000 63%);
+      -webkit-mask: radial-gradient(farthest-side, #0000 62%, #000 63%);
+      transition: background .35s ease;
+    }
+    ::-webkit-scrollbar { width: 10px; height: 10px; }
+    ::-webkit-scrollbar-thumb { background: #23242a; border-radius: 999px; }
+  </style>
+</head>
+<body class="bg-[#0C0D10] text-white font-sans">
+  <!-- Soft gradient background -->
+  <div class="fixed inset-0 -z-10">
+    <div class="absolute -top-24 -right-24 w-[600px] h-[600px] rounded-full blur-3xl opacity-30"
+         style="background: radial-gradient(closest-side,#ff6a00,transparent 70%);"></div>
+    <div class="absolute -bottom-24 -left-20 w-[500px] h-[500px] rounded-full blur-3xl opacity-20"
+         style="background: radial-gradient(closest-side,#5eead4,transparent 70%);"></div>
+  </div>
+  <!-- Header -->
+  <header class="mx-auto max-w-7xl px-6 py-4">
+    <div class="flex items-center justify-between rounded-2xl glass px-4 py-3">
+      <div class="flex items-center gap-3">
+        <div class="h-9 w-9 rounded-xl bg-brand-500/10 ring-1 ring-brand-500/40 grid place-content-center">
+          <!-- Shield waveform icon -->
+          <svg width="20" height="20" viewBox="0 0 24 24" fill="none">
+            <path d="M12 2l7 3v6c0 5.25-3.5 9.75-7 11-3.5-1.25-7-5.75-7-11V5l7-3Z" stroke="#ff6a00" stroke-width="1.5"/>
+            <path d="M7 12h2l1-4 2 8 1-6 1 3h3" stroke="#fff" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+          </svg>
+        </div>
+        <div>
+          <h1 class="text-lg font-semibold">Voice Guard</h1>
+          <p class="text-xs text-white/50 -mt-0.5">Human vs AI Speech</p>
+        </div>
+      </div>
+      <button id="analyzeBtn"
+              class="rounded-xl px-4 py-2.5 text-sm font-semibold bg-brand-500 hover:bg-brand-400 text-white shadow-lg">
+        Analyze
+      </button>
+    </div>
+  </header>
+  <!-- Main -->
+  <main class="mx-auto max-w-7xl px-6 pb-24">
+    <div class="grid grid-cols-12 gap-6">
+      <!-- LEFT: Inputs + status -->
+      <section class="col-span-4 space-y-6">
+        <div class="glass rounded-2xl p-5">
+          <div class="flex items-center justify-between">
+            <h2 class="text-sm font-semibold text-white/80">Input</h2>
+            <span class="text-xs px-2 py-1 rounded-full bg-white/5 border border-white/10">3–7s</span>
+          </div>
+          <div class="mt-4 grid grid-cols-2 gap-2">
+            <button id="tabMic" class="w-full rounded-xl border border-white/10 bg-white/5 px-3 py-2 text-sm font-medium">
+              Microphone
+            </button>
+            <button id="tabUpload" class="w-full rounded-xl border border-white/10 bg-transparent px-3 py-2 text-sm font-medium hover:bg-white/5">
+              Upload
+            </button>
+          </div>
+          <!-- Mic panel -->
+          <div id="micPanel" class="mt-4 space-y-4">
+            <div class="flex items-center gap-3">
+              <button id="recBtn" class="rounded-lg bg-white/10 hover:bg-white/20 px-3 py-2 text-sm border border-white/10">● Record</button>
+              <span id="recStatus" class="text-xs text-white/60">Idle</span>
+            </div>
+            <div class="rounded-xl border border-white/10 bg-black/30 h-24 overflow-hidden">
+              <canvas id="meter" class="w-full h-full"></canvas>
+            </div>
+          </div>
+          <!-- Upload panel -->
+          <div id="uploadPanel" class="mt-4 hidden space-y-3">
+            <label class="block text-sm text-white/70">Choose audio (.wav/.mp3)</label>
+            <input id="fileInput" type="file" accept="audio/*"
+                   class="w-full rounded-xl bg-black/30 border border-white/10 p-3 text-sm file:mr-4 file:rounded-lg file:border-0 file:bg-brand-500 file:px-4 file:py-2 file:text-white file:text-sm hover:file:bg-brand-400"/>
+            <p id="fileName" class="text-xs text-white/50"></p>
+          </div>
+        </div>
+        <div class="grid grid-cols-2 gap-4">
+          <div class="glass rounded-2xl p-4">
+            <p class="text-xs text-white/60">Source</p>
+            <p id="srcLabel" class="mt-1 text-lg font-semibold">Microphone</p>
+          </div>
+          <div class="glass rounded-2xl p-4">
+            <p class="text-xs text-white/60">Latency</p>
+            <p id="latency" class="mt-1 text-lg font-semibold">—</p>
+          </div>
+        </div>
+        <div class="glass rounded-2xl p-5">
+          <div class="flex items-center justify-between">
+            <h3 class="text-sm font-semibold text-white/80">Recent</h3>
+            <button id="clearRecent" class="text-xs text-white/50 hover:text-white/80">Clear</button>
+          </div>
+          <ul id="recentList" class="mt-3 space-y-2 max-h-60 overflow-auto"></ul>
+        </div>
+      </section>
+      <!-- RIGHT: Heatmap + donuts + label -->
+      <section class="col-span-8 space-y-6">
+        <div class="glass rounded-2xl p-5">
+          <div class="flex items-center justify-between">
+            <h2 class="text-sm font-semibold text-white/80">Explanation Heatmap</h2>
+            <div class="text-xs text-white/50">Spectrogram importance</div>
+          </div>
+          <div class="mt-4 h-[340px] rounded-xl border border-white/10 overflow-hidden bg-black/30 grid place-items-center">
+            <img id="heatmapImg" class="w-full h-full object-contain" alt="Heatmap"/>
+            <span id="heatmapPlaceholder" class="text-white/50 text-sm">No analysis yet</span>
+          </div>
+        </div>
+        <div class="grid grid-cols-3 gap-6">
+          <div class="glass rounded-2xl p-5">
+            <div class="flex items-center justify-between">
+              <p class="text-sm text-white/70">Human</p>
+              <span class="text-xs rounded-full bg-emerald-400/15 text-emerald-300 px-2 py-0.5 border border-emerald-400/30">Class 0</span>
+            </div>
+            <div class="mt-4 flex items-center gap-6">
+              <div class="donut size-28 rounded-full" id="donutHuman" style="--val:.50; --col:#34d399"></div>
+              <div>
+                <p class="text-3xl font-extrabold"><span id="humanPct">50</span>%</p>
+                <p class="text-xs text-white/60 mt-1">Likelihood</p>
+              </div>
+            </div>
+          </div>
+          <div class="glass rounded-2xl p-5">
+            <div class="flex items-center justify-between">
+              <p class="text-sm text-white/70">AI</p>
+              <span class="text-xs rounded-full bg-rose-400/15 text-rose-300 px-2 py-0.5 border border-rose-400/30">Class 1</span>
+            </div>
+            <div class="mt-4 flex items-center gap-6">
+              <div class="donut size-28 rounded-full" id="donutAI" style="--val:.50; --col:#fb7185"></div>
+              <div>
+                <p class="text-3xl font-extrabold"><span id="aiPct">50</span>%</p>
+                <p class="text-xs text-white/60 mt-1">Likelihood</p>
+              </div>
+            </div>
+          </div>
+          <div class="glass rounded-2xl p-5">
+            <p class="text-sm text-white/70">Final Label</p>
+            <div class="mt-3 flex items-center gap-3">
+              <div id="badgeLabel"
+                   class="px-3 py-1.5 text-sm font-semibold rounded-xl border border-emerald-400/30 bg-emerald-400/15 text-emerald-300">
+                HUMAN
+              </div>
+              <span id="threshold" class="text-xs text-white/50">thr 0.60</span>
+            </div>
+            <!-- NEW: why line -->
+            <p id="whyText" class="text-xs text-white/60 mt-2"></p>
+            <p class="text-xs text-white/60 mt-3 leading-relaxed">
+              Click Analyze to send audio to the API and render the real heatmap.
+            </p>
+          </div>
+        </div>
+      </section>
+    </div>
+  </main>
+  <script>
+    // ===== Config =====
+    const BACKEND_URL = window.BACKEND_URL || "http://127.0.0.1:8000/analyze";
+    // ===== Elements =====
+    const tabMic = document.getElementById('tabMic');
+    const tabUpload = document.getElementById('tabUpload');
+    const micPanel = document.getElementById('micPanel');
+    const uploadPanel = document.getElementById('uploadPanel');
+    const srcLabel = document.getElementById('srcLabel');
+    const recBtn = document.getElementById('recBtn');
+    const recStatus = document.getElementById('recStatus');
+    const analyzeBtn = document.getElementById('analyzeBtn');
+    const latency = document.getElementById('latency');
+    const heatmapImg = document.getElementById('heatmapImg');
+    const heatmapPlaceholder = document.getElementById('heatmapPlaceholder');
+    const donutHuman = document.getElementById('donutHuman');
+    const donutAI = document.getElementById('donutAI');
+    const humanPct = document.getElementById('humanPct');
+    const aiPct = document.getElementById('aiPct');
+    const badge = document.getElementById('badgeLabel');
+    const thresholdEl = document.getElementById('threshold');
+    const whyText = document.getElementById('whyText');
+    const fileInput = document.getElementById('fileInput');
+    const fileName = document.getElementById('fileName');
+    const recentList = document.getElementById('recentList');
+    const clearRecent = document.getElementById('clearRecent');
+    // ===== Tabs =====
+    function setTab(which){
+      if(which==='mic'){
+        tabMic.classList.add('bg-white/5');
+        tabUpload.classList.remove('bg-white/5');
+        micPanel.classList.remove('hidden');
+        uploadPanel.classList.add('hidden');
+        srcLabel.textContent = 'Microphone';
+      }else{
+        tabUpload.classList.add('bg-white/5');
+        tabMic.classList.remove('bg-white/5');
+        uploadPanel.classList.remove('hidden');
+        micPanel.classList.add('hidden');
+        srcLabel.textContent = 'Upload';
+      }
+    }
+    tabMic.onclick = ()=> setTab('mic');
+    tabUpload.onclick = ()=> setTab('upload');
+    setTab('mic');
+    // ===== Upload label =====
+    fileInput.onchange = ()=> fileName.textContent = fileInput.files?.[0]?.name || '';
+    // ===== Mic + meter =====
+    const meterCanvas = document.getElementById('meter');
+    const mctx = meterCanvas.getContext('2d');
+    const resizeMeter = ()=>{ meterCanvas.width = meterCanvas.clientWidth; meterCanvas.height = meterCanvas.clientHeight; };
+    resizeMeter(); addEventListener('resize', resizeMeter);
+    let mediaRecorder, chunks=[], micStream=null, audioCtx=null, analyser=null, raf=null, lastRecordedBlob=null;
+    function loopMeter(){
+      const w=meterCanvas.width, h=meterCanvas.height;
+      const data = new Uint8Array(analyser.frequencyBinCount);
+      const draw = ()=>{
+        analyser.getByteFrequencyData(data);
+        mctx.fillStyle = '#0b0b0f'; mctx.fillRect(0,0,w,h);
+        const bars = 48, barW = w/bars;
+        for (let i=0;i<bars;i++){
+          const v=data[i]/255, bh=v*h*0.9, x=i*barW+2, y=h-bh;
+          mctx.fillStyle = `rgba(255,106,0,${0.35+0.65*v})`;
+          mctx.fillRect(x,y,barW-4,bh);
+        }
+        raf = requestAnimationFrame(draw);
+      };
+      draw();
+    }
+    async function startRecording(){
+      if(micStream) return;
+      micStream = await navigator.mediaDevices.getUserMedia({audio:true});
+      mediaRecorder = new MediaRecorder(micStream, {mimeType: 'audio/webm'});
+      mediaRecorder.ondataavailable = e => { if(e.data.size>0) chunks.push(e.data); };
+      mediaRecorder.onstop = () => { lastRecordedBlob = new Blob(chunks, {type:'audio/webm'}); chunks = []; recStatus.textContent='Recorded'; };
+      mediaRecorder.start();
+      recStatus.textContent = 'Recording…';
+      recBtn.textContent = '■ Stop';
+      audioCtx = new (window.AudioContext||window.webkitAudioContext)();
+      const source = audioCtx.createMediaStreamSource(micStream);
+      analyser = audioCtx.createAnalyser(); analyser.fftSize = 1024;
+      source.connect(analyser); loopMeter();
+    }
+    function stopRecording(){
+      if(!micStream) return;
+      mediaRecorder?.stop();
+      micStream.getTracks().forEach(t => t.stop());
+      micStream=null;
+      cancelAnimationFrame(raf); audioCtx.close();
+      recBtn.textContent='● Record'; recStatus.textContent='Idle';
+    }
+    recBtn.onclick = ()=> micStream ? stopRecording() : startRecording();
+    // ===== Audio helpers: decode -> resample(16k mono) -> PCM16 -> WAV =====
+    async function blobToPCM(blob){
+      const arr = await blob.arrayBuffer();
+      const ctx = new (window.AudioContext||window.webkitAudioContext)();
+      const buf = await ctx.decodeAudioData(arr);
+      let pcm = buf.getChannelData(0);
+      if (buf.numberOfChannels>1){
+        const r = buf.getChannelData(1);
+        const n = Math.min(pcm.length, r.length);
+        const m = new Float32Array(n);
+        for (let i=0;i<n;i++) m[i] = 0.5*(pcm[i]+r[i]);
+        pcm = m;
+      }
+      await ctx.close();
+      return {pcm, sr: buf.sampleRate};
+    }
+    function resampleLinear(pcm, fromSr, toSr=16000){
+      if (fromSr===toSr) return pcm;
+      const ratio=toSr/fromSr, n=Math.round(pcm.length*ratio), out=new Float32Array(n);
+      for (let i=0;i<n;i++){
+        const x=i/ratio, i0=Math.floor(x), i1=Math.min(i0+1, pcm.length-1), t=x-i0;
+        out[i]=(1-t)*pcm[i0]+t*pcm[i1];
+      }
+      return out;
+    }
+    function floatTo16(pcm){
+      const out = new Int16Array(pcm.length);
+      for (let i=0;i<pcm.length;i++){ let s=Math.max(-1,Math.min(1,pcm[i])); out[i]=s<0?s*0x8000:s*0x7fff; }
+      return out;
+    }
+    function wavEncodePCM16(int16, sampleRate=16000, numChannels=1){
+      const byteRate=sampleRate*numChannels*2, blockAlign=numChannels*2;
+      const buffer=new ArrayBuffer(44 + int16.length*2), view=new DataView(buffer); let off=0;
+      const WU8=s=>{for(let i=0;i<s.length;i++) view.setUint8(off++, s.charCodeAt(i));}
+      const W32=v=>{view.setUint32(off,v,true); off+=4}, W16=v=>{view.setUint16(off,v,true); off+=2}
+      WU8('RIFF'); W32(36+int16.length*2); WU8('WAVE'); WU8('fmt '); W32(16);
+      W16(1); W16(numChannels); W32(sampleRate); W32(byteRate); W16(blockAlign); W16(16);
+      WU8('data'); W32(int16.length*2);
+      new Int16Array(buffer,44).set(int16);
+      return new Blob([buffer], {type:'audio/wav'});
+    }
+    // ===== UI helpers =====
+    function setBadgeFromBackend(out){
+      const isAI = (out.label || '').toLowerCase() === 'ai';
+      badge.textContent = isAI ? 'AI' : 'HUMAN';
+      badge.className = isAI
+        ? "px-3 py-1.5 text-sm font-semibold rounded-xl border border-rose-400/30 bg-rose-400/15 text-rose-300"
+        : "px-3 py-1.5 text-sm font-semibold rounded-xl border border-emerald-400/30 bg-emerald-400/15 text-emerald-300";
+    }
+    function setWhyLine(out){
+      const src = out.threshold_source || '—';
+      const dec = out.decision || 'threshold';
+      const rs  = (typeof out.replay_score === 'number') ? out.replay_score.toFixed(2) : '—';
+      const aiP = (out.ai*100).toFixed(1);
+      const thr = out.threshold;
+      const thrPct = (thr*100).toFixed(0);
+      const margin = (out.ai - thr).toFixed(2);
+      whyText.textContent = `Decision: ${dec} | AI=${aiP}% | thr(${src})=${thrPct}% | margin=${margin} | replay=${rs}`;
+    }
+    function addRecent({src,label,ph,pa}){
+      const li = document.createElement('li');
+      li.className = "flex items-center justify-between rounded-xl border border-white/10 bg-white/5 px-3 py-2";
+      li.innerHTML = `
+        <div class="flex items-center gap-3">
+          <span class="text-xs px-2 py-0.5 rounded-full border ${src==='Mic'?'border-indigo-400/40 bg-indigo-400/15 text-indigo-300':'border-amber-400/40 bg-amber-400/15 text-amber-300'}">${src}</span>
+          <span class="text-sm">${Math.round(ph*100)}% human / ${Math.round(pa*100)}% AI</span>
+        </div>
+        <span class="text-xs px-2 py-0.5 rounded-lg ${label==='AI'?'bg-rose-400/15 text-rose-300 border border-rose-400/30':'bg-emerald-400/15 text-emerald-300 border border-emerald-400/30'}">${label}</span>
+      `;
+      recentList.prepend(li);
+    }
+    clearRecent.onclick = ()=> { recentList.innerHTML=''; };
+    // ===== Analyze =====
+    analyzeBtn.onclick = async ()=>{
+      const t0 = performance.now();
+      try{
+        const isMic = !micPanel.classList.contains('hidden');
+        let blob = null;
+        if (isMic){
+          if(!lastRecordedBlob){ alert('Record 3–7 seconds first.'); return; }
+          blob = lastRecordedBlob;
+        } else {
+          if(!fileInput.files?.length){ alert('Choose an audio file.'); return; }
+          blob = fileInput.files[0];
+        }
+        // Decode -> resample -> WAV
+        const {pcm, sr} = await blobToPCM(blob);
+        const pcm16k = resampleLinear(pcm, sr, 16000);
+        const int16 = floatTo16(pcm16k);
+        const wavBlob = wavEncodePCM16(int16, 16000, 1);
+        const form = new FormData();
+        form.append('file', new File([wavBlob], 'audio.wav', {type:'audio/wav'}));
+        form.append('source_hint', isMic ? 'microphone' : 'upload');
+        const res = await fetch(BACKEND_URL, { method:'POST', body: form });
+        if(!res.ok) throw new Error(`API ${res.status}`);
+        const out = await res.json();
+        // Heatmap
+        heatmapImg.src = out.heatmap_b64;
+        heatmapPlaceholder.style.display = 'none';
+        // Donuts & numbers
+        const ph = out.human, pa = out.ai, thr = out.threshold;
+        humanPct.textContent = Math.round(ph*100);
+        aiPct.textContent = Math.round(pa*100);
+        donutHuman.style.setProperty('--val', ph.toFixed(3));
+        donutAI.style.setProperty('--val', pa.toFixed(3));
+        thresholdEl.textContent = `thr ${thr.toFixed(2)}`;
+        // Final label: TRUST BACKEND
+        setBadgeFromBackend(out);
+        setWhyLine(out);
+        latency.textContent = `${Math.round(performance.now()-t0)} ms`;
+        addRecent({src: isMic?'Mic':'Upload', label: out.label.toUpperCase(), ph, pa});
+      }catch(err){
+        console.error(err);
+        alert('Analyze failed. Check console & backend URL in config.js');
+      }
+    };
+  </script>
+</body>
+</html>

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libsndfile1

requirements.txt CHANGED Viewed

@@ -1,3 +1,28 @@
-altair
-pandas
-streamlit

+torch>=2.2.0
+torchaudio>=2.2.0
+numpy>=1.26.4
+scipy>=1.11.4
+librosa>=0.10.1
+soundfile>=0.12.1
+audiomentations>=0.37.0
+gradio>=4.44.0
+fastapi>=0.111.0
+uvicorn>=0.30.0
+python-dotenv>=1.0.1
+pydantic>=2.6.4
+requests>=2.32.0
+matplotlib>=3.8.4
+# faster-whisper
+# speechbrain
+black>=24.4.2
+fastapi
+uvicorn[standard]
+python-multipart
+pillow
+matplotlib
+transformers
+torchaudio
+soundfile
+audiomentations
+streamlit>=1.33
+audio-recorder-streamlit==0.0.8

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# streamlit_app.py
+import os, io, base64, urllib.request, pathlib
+import numpy as np
+import streamlit as st
+from PIL import Image
+from matplotlib import cm
+# ------- wiring to your detector -------
+# We prefer the wav2vec2 detector; fall back to the CNN one if needed.
+BACKENDS_TRY = ["app.inference_wav2vec", "app.inference"]
+Detector = None
+err = None
+for mod in BACKENDS_TRY:
+    try:
+        Detector = __import__(mod, fromlist=["Detector"]).Detector
+        BREAK = True
+        break
+    except Exception as e:
+        err = e
+if Detector is None:
+    st.error("Could not import Detector from app/. Make sure your repo contains app/inference_wav2vec.py (or app/inference.py).")
+    st.stop()
+# ------- config / weights -------
+def ensure_weights():
+    wp  = os.environ.get("MODEL_WEIGHTS_PATH", st.secrets.get("MODEL_WEIGHTS_PATH", "app/models/weights/wav2vec2_classifier.pth"))
+    url = os.environ.get("MODEL_WEIGHTS_URL",  st.secrets.get("MODEL_WEIGHTS_URL",  ""))
+    if url and not os.path.exists(wp):
+        pathlib.Path(wp).parent.mkdir(parents=True, exist_ok=True)
+        with st.spinner("Downloading model weights…"):
+            urllib.request.urlretrieve(url, wp)
+    return wp
+@st.cache_resource
+def load_detector():
+    wp = ensure_weights()
+    det = Detector(weights_path=wp)
+    return det
+det = load_detector()
+# ------- helpers -------
+def cam_to_png_bytes(cam: np.ndarray) -> bytes:
+    cam = np.array(cam, dtype=np.float32)
+    cam = np.clip(cam, 0.0, 1.0)
+    rgb = (cm.magma(cam)[..., :3] * 255).astype(np.uint8)
+    im = Image.fromarray(rgb)
+    buf = io.BytesIO()
+    im.save(buf, format="PNG")
+    return buf.getvalue()
+def analyze(wav_bytes: bytes, source_hint: str):
+    proba = det.predict_proba(wav_bytes, source_hint=source_hint)
+    exp   = det.explain(wav_bytes, source_hint=source_hint)
+    return proba, exp
+# ------- UI -------
+st.set_page_config(page_title="Voice Guard", page_icon="🛡️", layout="wide")
+st.title("🛡️ Voice Guard — Human vs AI Speech (Streamlit)")
+left, right = st.columns([1,2])
+with left:
+    st.subheader("Input")
+    tab_rec, tab_up = st.tabs(["🎙️ Microphone", "📁 Upload"])
+    wav_bytes = None
+    source_hint = None
+    with tab_rec:
+        st.caption("If the recorder component fails on your browser, use Upload.")
+        try:
+            # light, zero-config recorder component
+            from audio_recorder_streamlit import audio_recorder
+            audio = audio_recorder(
+                text="Record",
+                recording_color="#ff6a00",
+                neutral_color="#2b2b2b",
+                icon_size="2x",
+            )
+            if audio:
+                wav_bytes = audio  # component returns WAV bytes
+                source_hint = "microphone"
+                st.audio(wav_bytes, format="audio/wav")
+        except Exception:
+            st.info("Recorder component not available; please use the Upload tab.")
+    with tab_up:
+        f = st.file_uploader("Upload an audio file (wav/mp3/m4a)", type=["wav","mp3","m4a","aac"])
+        if f is not None:
+            wav_bytes = f.read()
+            source_hint = "upload"
+            st.audio(wav_bytes)
+    st.markdown("---")
+    run = st.button("🔍 Analyze", use_container_width=True, type="primary", disabled=wav_bytes is None)
+with right:
+    st.subheader("Results")
+    placeholder = st.empty()
+    if run and wav_bytes:
+        with st.spinner("Analyzing…"):
+            proba, exp = analyze(wav_bytes, source_hint or "auto")
+        ph = proba["human"]; pa = proba["ai"]
+        label = proba["label"].upper()
+        thr = proba.get("threshold", 0.5)
+        rule = proba.get("decision", "threshold")
+        rscore = proba.get("replay_score", None)
+        thr_src = proba.get("threshold_source", "—")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.metric("Human", f"{ph*100:.1f} %")
+        with col2:
+            st.metric("AI", f"{pa*100:.1f} %")
+        with col3:
+            color = "#22c55e" if label=="HUMAN" else "#fb7185"
+            st.markdown(f"**Final Label:** <span style='color:{color}'>{label}</span>", unsafe_allow_html=True)
+            st.caption(f"thr({thr_src})={thr:.2f} • rule={rule} • replay={('-' if rscore is None else f'{rscore:.2f}')}")
+        st.markdown("##### Explanation Heatmap")
+        cam = np.array(exp["cam"], dtype=np.float32)
+        st.image(cam_to_png_bytes(cam), caption="Spectrogram importance", use_column_width=True)
+        st.markdown("---")
+        with st.expander("Raw JSON (debug)"):
+            st.json({"proba": proba, "explain": {"cam_shape": list(cam.shape)}})
+st.caption("Tip: If the mic recorder fails, upload a short 3–7s clip instead.")