import streamlit as st import requests import tempfile import os import subprocess import speech_recognition as sr from pydub import AudioSegment import re from typing import Dict, Tuple import time # Configure Streamlit page st.set_page_config( page_title="English Accent Detector | REM Waste", page_icon="🎤", layout="wide", initial_sidebar_state="collapsed" ) # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) class AccentDetector: """Streamlined accent detection for English speech analysis""" def __init__(self): self.accent_patterns = { 'American': { 'keywords': ['gonna', 'wanna', 'gotta', 'kinda', 'sorta', 'yeah', 'awesome', 'dude'], 'vocabulary': ['elevator', 'apartment', 'garbage', 'vacation', 'cookie', 'candy', 'mom', 'color'], 'phrases': ['you know', 'like totally', 'for sure', 'right now'] }, 'British': { 'keywords': ['brilliant', 'lovely', 'quite', 'rather', 'chap', 'bloody', 'bloke', 'cheers'], 'vocabulary': ['lift', 'flat', 'rubbish', 'holiday', 'biscuit', 'queue', 'mum', 'colour'], 'phrases': ['i say', 'good heavens', 'how do you do', 'spot on'] }, 'Australian': { 'keywords': ['mate', 'bloody', 'crikey', 'reckon', 'fair dinkum', 'bonkers', 'ripper'], 'vocabulary': ['arvo', 'brekkie', 'servo', 'bottle-o', 'mozzie', 'barbie', 'ute'], 'phrases': ['no worries', 'good on ya', 'she\'ll be right', 'too right'] }, 'Canadian': { 'keywords': ['eh', 'about', 'house', 'out', 'sorry', 'hoser', 'beauty'], 'vocabulary': ['toque', 'hydro', 'washroom', 'parkade', 'chesterfield', 'serviette'], 'phrases': ['you bet', 'take off', 'give\'r', 'double double'] }, 'South African': { 'keywords': ['ag', 'man', 'hey', 'lekker', 'eish', 'shame', 'howzit'], 'vocabulary': ['robot', 'bakkie', 'boerewors', 'biltong', 'braai', 'veld'], 'phrases': ['just now', 'now now', 'is it', 'sharp sharp'] } } @st.cache_data def download_video(_self, url: str) -> str: """Download video with caching, including Loom/YouTube support and debug output""" try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } # YouTube support (including Shorts) if 'youtube.com' in url or 'youtu.be' in url: try: import yt_dlp except ImportError: raise Exception("yt-dlp is required for YouTube downloads. Please install with 'pip install yt-dlp'.") # Use yt-dlp to download best audio to a temp directory, let yt-dlp pick the filename tmpdir = tempfile.mkdtemp() ydl_opts = { 'format': 'bestaudio[ext=m4a]/bestaudio/best', 'outtmpl': f'{tmpdir}/%(id)s.%(ext)s', 'quiet': True, 'noplaylist': True, 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192', }], 'ffmpeg_location': '/opt/homebrew/bin/ffmpeg', 'overwrites': True, } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) # Find the resulting .wav file for f in os.listdir(tmpdir): if f.endswith('.wav'): # Move the file to a permanent temp location so it persists after this function final_temp = tempfile.NamedTemporaryFile(delete=False, suffix='.wav') final_temp.close() with open(os.path.join(tmpdir, f), 'rb') as src, open(final_temp.name, 'wb') as dst: dst.write(src.read()) return final_temp.name raise Exception("yt-dlp did not produce a valid audio file. Try another video or update yt-dlp/ffmpeg.") except Exception as e: raise Exception(f"yt-dlp failed: {str(e)}. Try updating yt-dlp and ffmpeg.") # Loom support (fallback: try to extract video from page HTML) if 'loom.com' in url: resp = requests.get(url, headers=headers, timeout=30) if resp.status_code != 200: raise Exception("Failed to fetch Loom page") html = resp.text import re match = re.search(r'src="([^"]+\.mp4)"', html) if not match: match = re.search(r'https://cdn\.loom\.com/sessions/[^"\s]+\.mp4', html) if not match: raise Exception("Could not extract Loom video stream URL from page HTML") video_url = match.group(1) url = video_url # Download video (Loom or direct) response = requests.get(url, headers=headers, stream=True, timeout=60) response.raise_for_status() with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: for chunk in response.iter_content(chunk_size=8192): if chunk: temp_file.write(chunk) return temp_file.name except Exception as e: raise Exception(f"Download failed: {str(e)}") def extract_audio_simple(self, video_path: str) -> str: """Robust audio extraction: handles mp3, wav, mp4, etc.""" try: import os from pydub import AudioSegment ext = os.path.splitext(video_path)[1].lower() audio_path = video_path.rsplit('.', 1)[0] + '.wav' # If already wav, use pydub directly if ext == '.wav': audio = AudioSegment.from_wav(video_path) else: audio = AudioSegment.from_file(video_path) audio = audio.set_frame_rate(16000).set_channels(1) if len(audio) > 120000: audio = audio[:120000] audio.export(audio_path, format="wav") return audio_path except Exception as e: raise Exception(f"Audio extraction failed: {str(e)}") def transcribe_audio(self, audio_path: str) -> str: """Transcribe with error handling""" try: r = sr.Recognizer() r.energy_threshold = 300 r.dynamic_energy_threshold = True with sr.AudioFile(audio_path) as source: r.adjust_for_ambient_noise(source, duration=0.5) audio_data = r.record(source) # Try Google Speech Recognition text = r.recognize_google(audio_data, language='en-US') return text.lower() except sr.UnknownValueError: raise Exception("Could not understand the audio clearly") except sr.RequestError as e: raise Exception(f"Speech recognition service error: {str(e)}") except Exception as e: raise Exception(f"Transcription failed: {str(e)}") def analyze_patterns(self, text: str) -> Dict[str, float]: """Enhanced pattern analysis""" scores = {} words = text.split() word_count = max(len(words), 1) for accent, patterns in self.accent_patterns.items(): score = 0.0 total_matches = 0 # Keywords (high weight) for keyword in patterns['keywords']: if keyword in text: score += 20.0 total_matches += 1 # Vocabulary (medium weight) for vocab in patterns['vocabulary']: if vocab in text: score += 15.0 total_matches += 1 # Phrases (high weight) for phrase in patterns['phrases']: if phrase in text: score += 25.0 total_matches += 1 # Normalize and add base confidence if total_matches > 0: score = min(score * (total_matches / word_count) * 50, 95.0) else: score = self._get_base_score(text, accent) scores[accent] = round(max(score, 5.0), 1) return scores def _get_base_score(self, text: str, accent: str) -> float: """Base scoring for general patterns""" base_scores = { 'American': 30.0, 'British': 20.0, 'Australian': 15.0, 'Canadian': 18.0, 'South African': 12.0 } score = base_scores.get(accent, 15.0) # Spelling adjustments if accent == 'British': if any(word in text for word in ['colour', 'favour', 'centre', 'theatre']): score += 25.0 elif accent == 'American': if any(word in text for word in ['color', 'favor', 'center', 'theater']): score += 25.0 return min(score, 45.0) def classify_accent(self, scores: Dict[str, float]) -> Tuple[str, float, str]: """Classify and explain results""" if not scores: return "Unknown", 0.0, "No speech detected" # Get top result top_accent = max(scores.items(), key=lambda x: x[1]) accent, confidence = top_accent # Generate explanation if confidence < 25: explanation = "Low confidence - speech patterns are not strongly distinctive" elif confidence < 50: explanation = f"Moderate confidence in {accent} accent based on some linguistic markers" elif confidence < 75: explanation = f"Good confidence in {accent} accent with clear characteristic patterns" else: explanation = f"High confidence in {accent} accent with strong linguistic evidence" return accent, confidence, explanation # Initialize detector @st.cache_resource def get_detector(): return AccentDetector() def main(): # Header st.title("🎤 English Accent Detection Tool") st.markdown("**AI-powered accent analysis for English speech | Built for REM Waste**") # Description with st.expander("ℹ️ How it works", expanded=False): st.markdown(""" 1. **Input**: Paste a public video URL (MP4, Loom, YouTube, etc.) 2. **Processing**: Extract audio → Transcribe speech → Analyze patterns 3. **Output**: Accent classification + confidence score + explanation **Supported Accents**: American, British, Australian, Canadian, South African """) # Input section st.subheader("📹 Video Input") # File upload option uploaded_file = st.file_uploader( "Or upload a local video/audio file (MP4, WAV, MP3, etc.):", type=["mp4", "mov", "avi", "wav", "mp3", "m4a", "aac", "ogg"], help="Upload a file directly if you can't use a public URL." ) # Sample URLs for testing with st.expander("🧪 Test with sample videos"): st.markdown(""" **Sample URLs for testing:** - `https://sample-videos.com/zip/10/mp4/SampleVideo_1280x720_1mb.mp4` - `https://www.learningcontainer.com/wp-content/uploads/2020/05/sample-mp4-file.mp4` - Or any public Loom/YouTube video URL """) video_url = st.text_input( "Enter video URL:", placeholder="https://example.com/video.mp4", help="Must be a publicly accessible video URL" ) # Process button if st.button("🚀 Analyze Accent", type="primary"): if not video_url.strip() and not uploaded_file: st.error("⚠️ Please enter a video URL or upload a file") return if video_url and not video_url.startswith(('http://', 'https://')): st.error("⚠️ Please enter a valid URL starting with http:// or https://") return detector = get_detector() temp_files = [] try: progress_bar = st.progress(0) status_text = st.empty() if uploaded_file: # Save uploaded file to a temp file suffix = os.path.splitext(uploaded_file.name)[1] with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f: f.write(uploaded_file.read()) video_path = f.name temp_files.append(video_path) else: status_text.text("📥 Downloading video...") progress_bar.progress(20) video_path = detector.download_video(video_url) temp_files.append(video_path) # Step 2: Extract audio status_text.text("🎵 Extracting audio...") progress_bar.progress(50) audio_path = detector.extract_audio_simple(video_path) temp_files.append(audio_path) # Step 3: Transcribe status_text.text("🎤 Transcribing speech...") progress_bar.progress(75) transcript = detector.transcribe_audio(audio_path) # Step 4: Analyze status_text.text("🔍 Analyzing accent patterns...") progress_bar.progress(90) scores = detector.analyze_patterns(transcript) accent, confidence, explanation = detector.classify_accent(scores) # Complete progress_bar.progress(100) status_text.text("✅ Analysis complete!") time.sleep(0.5) # Clear progress indicators progress_bar.empty() status_text.empty() # Display results st.success("🎉 **Analysis Complete!**") # Main metrics col1, col2, col3 = st.columns(3) with col1: st.markdown(f"""

🗣️ Detected Accent

{accent}

""", unsafe_allow_html=True) with col2: st.markdown(f"""

🎯 Confidence

{confidence}%

""", unsafe_allow_html=True) with col3: # Get transcript length for quality indicator word_count = len(transcript.split()) quality = "High" if word_count > 50 else "Medium" if word_count > 20 else "Low" st.markdown(f"""

📊 Data Quality

{quality}

{word_count} words

""", unsafe_allow_html=True) st.markdown("---") # Explanation st.subheader("📝 Analysis Summary") st.info(explanation) # Transcript st.subheader("📄 Transcribed Speech") st.text_area( "Full transcript:", transcript, height=120, help="This is what the AI heard from the video" ) # Detailed scores st.subheader("📊 All Accent Scores") # Create a more visual representation for accent_name, score in sorted(scores.items(), key=lambda x: x[1], reverse=True): # Create progress bar for each accent col_name, col_bar, col_score = st.columns([2, 6, 1]) with col_name: st.write(f"**{accent_name}**") with col_bar: st.progress(score / 100) with col_score: st.write(f"{score}%") # Additional insights if confidence > 60: st.success(f"🎯 **Strong Detection**: The {accent} accent markers are clearly present in the speech.") elif confidence > 40: st.warning(f"⚠️ **Moderate Detection**: Some {accent} patterns detected, but results may vary with longer audio.") else: st.info("💡 **Tip**: Longer speech samples (30+ seconds) generally provide more accurate results.") except Exception as e: st.error(f"❌ **Processing Error**: {str(e)}") st.info(""" **Troubleshooting Tips:** - Ensure the video URL is publicly accessible - Try a different video format or shorter video - Make sure the video contains clear English speech - Check your internet connection """) finally: # Cleanup temp files for temp_file in temp_files: try: if os.path.exists(temp_file): os.remove(temp_file) except: pass # Footer information st.markdown("---") col1, col2 = st.columns(2) with col1: st.markdown(""" **🔧 Technical Details** - Audio processing: Up to 2 minutes - Speech recognition: Google API - Analysis: Pattern matching + linguistics - Processing time: ~30-90 seconds """) with col2: st.markdown(""" **📋 Requirements** - Public video URLs only - Clear English speech preferred - Supports MP4, MOV, AVI formats - Works with Loom, YouTube, direct links """) # API information with st.expander("🔗 API Usage"): st.code(""" # Python API usage example from accent_detector import AccentDetector detector = AccentDetector() result = detector.process_video("https://your-video.com/file.mp4") print(f"Accent: {result['accent']}") print(f"Confidence: {result['confidence']}%") """, language="python") # About section with st.expander("ℹ️ About This Tool"): st.markdown(""" **Built for REM Waste Interview Challenge** This accent detection tool analyzes English speech patterns to classify regional accents. It's designed for hiring automation systems that need to evaluate spoken English proficiency. **Algorithm Overview:** - Extracts audio from video files - Transcribes speech using Google Speech Recognition - Analyzes linguistic patterns, vocabulary, and pronunciation markers - Provides confidence scores based on pattern strength **Accuracy Notes:** - Best results with 30+ seconds of clear speech - Confidence scores reflect pattern strength, not absolute accuracy - Designed for screening purposes, not definitive classification **Privacy & Ethics:** - No audio/video data is stored permanently - Temporary files are automatically deleted - Tool is intended for voluntary language assessment only """) if __name__ == "__main__": main()