Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import tempfile | |
import os | |
from pathlib import Path | |
import subprocess | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
import re | |
import numpy as np | |
from typing import Dict, Tuple, Optional | |
import json | |
class AccentDetector: | |
""" | |
Accent detection system that analyzes English speech patterns | |
to classify regional accents and provide confidence scores. | |
""" | |
def __init__(self): | |
self.accent_patterns = { | |
'American': { | |
'keywords': ['gonna', 'wanna', 'gotta', 'kinda', 'sorta'], | |
'phonetic_markers': ['r-colored vowels', 'rhotic'], | |
'vocabulary': ['elevator', 'apartment', 'garbage', 'vacation', 'cookie'] | |
}, | |
'British': { | |
'keywords': ['brilliant', 'lovely', 'quite', 'rather', 'chap'], | |
'phonetic_markers': ['non-rhotic', 'received pronunciation'], | |
'vocabulary': ['lift', 'flat', 'rubbish', 'holiday', 'biscuit'] | |
}, | |
'Australian': { | |
'keywords': ['mate', 'bloody', 'fair dinkum', 'crikey', 'reckon'], | |
'phonetic_markers': ['broad vowels', 'rising intonation'], | |
'vocabulary': ['arvo', 'brekkie', 'servo', 'bottle-o', 'mozzie'] | |
}, | |
'Canadian': { | |
'keywords': ['eh', 'about', 'house', 'out', 'sorry'], | |
'phonetic_markers': ['canadian raising', 'eh particle'], | |
'vocabulary': ['toque', 'hydro', 'washroom', 'parkade', 'chesterfield'] | |
}, | |
'South African': { | |
'keywords': ['ag', 'man', 'hey', 'lekker', 'braai'], | |
'phonetic_markers': ['kit-split', 'dental fricatives'], | |
'vocabulary': ['robot', 'bakkie', 'boerewors', 'biltong', 'sosatie'] | |
} | |
} | |
def download_video(self, url: str) -> str: | |
"""Download video from URL to temporary file""" | |
try: | |
response = requests.get(url, stream=True, timeout=30) | |
response.raise_for_status() | |
# Create temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: | |
for chunk in response.iter_content(chunk_size=8192): | |
temp_file.write(chunk) | |
return temp_file.name | |
except Exception as e: | |
raise Exception(f"Failed to download video: {str(e)}") | |
def extract_audio(self, video_path: str) -> str: | |
"""Extract audio from video file using ffmpeg""" | |
try: | |
audio_path = video_path.replace('.mp4', '.wav') | |
# Use ffmpeg to extract audio | |
cmd = [ | |
'ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', | |
'-ar', '16000', '-ac', '1', '-y', audio_path | |
] | |
result = subprocess.run(cmd, capture_output=True, text=True) | |
if result.returncode != 0: | |
# Fallback to pydub if ffmpeg fails | |
audio = AudioSegment.from_file(video_path) | |
audio = audio.set_frame_rate(16000).set_channels(1) | |
audio.export(audio_path, format="wav") | |
return audio_path | |
except Exception as e: | |
raise Exception(f"Failed to extract audio: {str(e)}") | |
def transcribe_audio(self, audio_path: str) -> str: | |
"""Transcribe audio to text using speech recognition""" | |
try: | |
r = sr.Recognizer() | |
with sr.AudioFile(audio_path) as source: | |
# Adjust for ambient noise | |
r.adjust_for_ambient_noise(source, duration=0.5) | |
audio_data = r.record(source) | |
# Use Google Speech Recognition (free tier) | |
text = r.recognize_google(audio_data, language='en-US') | |
return text.lower() | |
except sr.UnknownValueError: | |
raise Exception("Could not understand the audio") | |
except sr.RequestError as e: | |
raise Exception(f"Speech recognition error: {str(e)}") | |
def analyze_accent_patterns(self, text: str) -> Dict[str, float]: | |
"""Analyze text for accent-specific patterns""" | |
scores = {} | |
words = text.split() | |
word_count = len(words) | |
if word_count == 0: | |
return {accent: 0.0 for accent in self.accent_patterns.keys()} | |
for accent, patterns in self.accent_patterns.items(): | |
score = 0.0 | |
matches = 0 | |
# Check for accent-specific keywords | |
for keyword in patterns['keywords']: | |
if keyword in text: | |
score += 15.0 | |
matches += 1 | |
# Check for accent-specific vocabulary | |
for vocab_word in patterns['vocabulary']: | |
if vocab_word in text: | |
score += 10.0 | |
matches += 1 | |
# Normalize score based on text length and matches | |
if matches > 0: | |
score = min(score * (matches / word_count) * 100, 95.0) | |
else: | |
# Base score for general English patterns | |
score = self._calculate_base_score(text, accent) | |
scores[accent] = round(score, 1) | |
return scores | |
def _calculate_base_score(self, text: str, accent: str) -> float: | |
"""Calculate base confidence score for accent detection""" | |
# Simple heuristics based on common patterns | |
base_scores = { | |
'American': 25.0, # Default higher for American English | |
'British': 15.0, | |
'Australian': 10.0, | |
'Canadian': 12.0, | |
'South African': 8.0 | |
} | |
# Adjust based on text characteristics | |
score = base_scores.get(accent, 10.0) | |
# Look for spelling patterns | |
if accent == 'British' and ('colour' in text or 'favour' in text or 'centre' in text): | |
score += 20.0 | |
elif accent == 'American' and ('color' in text or 'favor' in text or 'center' in text): | |
score += 20.0 | |
return min(score, 40.0) # Cap base scores | |
def classify_accent(self, scores: Dict[str, float]) -> Tuple[str, float, str]: | |
"""Classify the most likely accent and provide explanation""" | |
if not scores or all(score == 0 for score in scores.values()): | |
return "Unknown", 0.0, "Insufficient accent markers detected" | |
# Find the highest scoring accent | |
top_accent = max(scores.items(), key=lambda x: x[1]) | |
accent_name, confidence = top_accent | |
# Generate explanation | |
explanation = self._generate_explanation(accent_name, confidence, scores) | |
return accent_name, confidence, explanation | |
def _generate_explanation(self, accent: str, confidence: float, all_scores: Dict[str, float]) -> str: | |
"""Generate explanation for the accent classification""" | |
if confidence < 20: | |
return f"Low confidence detection. The speech patterns are not strongly indicative of any specific English accent." | |
elif confidence < 50: | |
return f"Moderate confidence in {accent} accent based on limited linguistic markers." | |
elif confidence < 75: | |
return f"Good confidence in {accent} accent. Several characteristic patterns detected." | |
else: | |
return f"High confidence in {accent} accent with strong linguistic indicators." | |
def process_video(self, url: str) -> Dict: | |
"""Main processing pipeline""" | |
temp_files = [] | |
try: | |
# Step 1: Download video | |
st.write("π₯ Downloading video...") | |
video_path = self.download_video(url) | |
temp_files.append(video_path) | |
# Step 2: Extract audio | |
st.write("π΅ Extracting audio...") | |
audio_path = self.extract_audio(video_path) | |
temp_files.append(audio_path) | |
# Step 3: Transcribe audio | |
st.write("π€ Transcribing speech...") | |
transcript = self.transcribe_audio(audio_path) | |
# Step 4: Analyze accent | |
st.write("π Analyzing accent patterns...") | |
accent_scores = self.analyze_accent_patterns(transcript) | |
accent, confidence, explanation = self.classify_accent(accent_scores) | |
return { | |
'success': True, | |
'transcript': transcript, | |
'accent': accent, | |
'confidence': confidence, | |
'explanation': explanation, | |
'all_scores': accent_scores | |
} | |
except Exception as e: | |
return { | |
'success': False, | |
'error': str(e) | |
} | |
finally: | |
# Cleanup temporary files | |
for temp_file in temp_files: | |
try: | |
if os.path.exists(temp_file): | |
os.remove(temp_file) | |
except: | |
pass | |
def main(): | |
st.set_page_config( | |
page_title="English Accent Detector", | |
page_icon="π€", | |
layout="wide" | |
) | |
st.title("π€ English Accent Detection Tool") | |
st.markdown("### Analyze English accents from video content") | |
st.markdown(""" | |
**How it works:** | |
1. Paste a public video URL (MP4, Loom, etc.) | |
2. The tool extracts audio and transcribes speech | |
3. AI analyzes linguistic patterns to detect English accent | |
4. Get classification, confidence score, and explanation | |
""") | |
# Input section | |
st.subheader("πΉ Video Input") | |
video_url = st.text_input( | |
"Enter video URL:", | |
placeholder="https://example.com/video.mp4 or Loom link", | |
help="Must be a direct video link or public Loom video" | |
) | |
# Process button | |
if st.button("π Analyze Accent", type="primary"): | |
if not video_url: | |
st.error("Please enter a video URL") | |
return | |
# Validate URL | |
if not (video_url.startswith('http://') or video_url.startswith('https://')): | |
st.error("Please enter a valid URL starting with http:// or https://") | |
return | |
# Initialize detector | |
detector = AccentDetector() | |
# Process video | |
with st.spinner("Processing video... This may take a few minutes."): | |
result = detector.process_video(video_url) | |
# Display results | |
if result['success']: | |
st.success("β Analysis Complete!") | |
# Main results | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric( | |
label="π£οΈ Detected Accent", | |
value=result['accent'] | |
) | |
with col2: | |
st.metric( | |
label="π― Confidence Score", | |
value=f"{result['confidence']}%" | |
) | |
# Explanation | |
st.subheader("π Analysis Explanation") | |
st.write(result['explanation']) | |
# Transcript | |
st.subheader("π Transcript") | |
st.text_area("Transcribed Text:", result['transcript'], height=100) | |
# Detailed scores | |
st.subheader("π Detailed Accent Scores") | |
scores_df = [] | |
for accent, score in result['all_scores'].items(): | |
scores_df.append({"Accent": accent, "Confidence": f"{score}%"}) | |
st.table(scores_df) | |
else: | |
st.error(f"β Error: {result['error']}") | |
# Footer | |
st.markdown("---") | |
st.markdown(""" | |
**Technical Notes:** | |
- Supports common video formats (MP4, MOV, AVI) | |
- Works with public Loom videos and direct video links | |
- Analyzes vocabulary, pronunciation patterns, and linguistic markers | |
- Optimized for English language detection | |
""") | |
if __name__ == "__main__": | |
main() |