Spaces:

abdu-l7hman
/

stutter-detector

Running

App Files Files Community

abdu-l7hman commited on 3 days ago

Commit

8d3d1a5

1 Parent(s): 0c5d983

Update model to augmented version and add ffmpeg

Browse files

Files changed (3) hide show

app.py +174 -78
models/stutter_detector_all_types.pth +2 -2
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,89 +1,196 @@
 from flask import Flask, render_template, request, jsonify
-from flask_cors import CORS  # <--- ADDED THIS
 import os
 from werkzeug.utils import secure_filename
-import time
-import sys
-import subprocess
 import shutil
-# Ensure we can find the models folder
-sys.path.append(os.path.join(os.path.dirname(__file__), 'models'))
-# Lazy-import model dependencies
-try:
-    from models.stutter_detector_local import ImprovedStutterDetector, calculate_stutter_severity
-    MODEL_DEPS_AVAILABLE = True
-except Exception as e:
-    print(f"Model dependencies unavailable: {e}")
-    ImprovedStutterDetector = None
-    def calculate_stutter_severity(_):
-        return None
-    MODEL_DEPS_AVAILABLE = False
 app = Flask(__name__)
-CORS(app) # <--- ENABLE CORS HERE
-# config
-UPLOAD_FOLDER = '/tmp/uploads' # Use /tmp because other folders might be read-only on HF
 ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg', 'webm', 'm4a'}
-MODEL_PATH = 'models/stutter_detector_all_types.pth'
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
-app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
-# Check ffmpeg
-FFMPEG_AVAILABLE = shutil.which('ffmpeg') is not None
-# Load Model
-print("Loading stutter detection model...")
-detector = None
-if MODEL_DEPS_AVAILABLE and ImprovedStutterDetector is not None:
-    try:
-        detector = ImprovedStutterDetector(MODEL_PATH, device='cpu') # Force CPU
-        print("Model loaded successfully!")
-    except Exception as e:
-        print(f"Error loading model: {e}")
-        detector = None
-else:
-    print("Skipping model load due to missing dependencies.")
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-def convert_to_wav(input_path, output_path):
-    try:
-        subprocess.run([
-            'ffmpeg', '-i', input_path,
-            '-acodec', 'pcm_s16le',
-            '-ar', '16000',
-            '-ac', '1',
-            '-y',
-            output_path
-        ], check=True, capture_output=True)
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"FFmpeg conversion error: {e.stderr.decode()}")
-        return False
-    except FileNotFoundError:
-        return False
-# ... [Keep your analyze_audio_file function exactly as it is] ...
-# ... [Paste the analyze_audio_file function here from your original code] ...
-# Route: Only strictly necessary endpoints for your API
 @app.route('/health', methods=['GET'])
 def health_check():
-    return jsonify({'status': 'healthy', 'model_loaded': detector is not None}), 200
 @app.route('/upload', methods=['POST'])
 def upload_file():
-    # ... [Keep your existing upload logic exactly as it is] ...
-    # Just ensure you use the analyze_audio_file function defined above
-    if detector is None:
-        return jsonify({'error': 'Model not loaded.'}), 500
     if 'audio' not in request.files:
         return jsonify({'error': 'No audio file provided'}), 400
@@ -95,20 +202,9 @@ def upload_file():
         filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
         file.save(filepath)
-        # Get params from request or default
-        segment_duration = float(request.form.get('segment_duration', 3.0))
-        stutter_threshold = float(request.form.get('stutter_threshold', 0.5))
         try:
-            # Run analysis
-            results = detector.analyze_audio_file(
-                filepath,
-                segment_duration=segment_duration,
-                stutter_threshold=stutter_threshold
-            )
-            # Calculate severity
-            results['severity'] = calculate_stutter_severity(results)
             # Cleanup
             if os.path.exists(filepath): os.remove(filepath)
@@ -116,11 +212,11 @@ def upload_file():
             return jsonify(results), 200
         except Exception as e:
             if os.path.exists(filepath): os.remove(filepath)
             return jsonify({'error': str(e)}), 500
-    return jsonify({'error': 'Invalid file'}), 400
 if __name__ == '__main__':
-    # This is only for local testing, Docker uses Gunicorn
     app.run(host='0.0.0.0', port=7860)

 from flask import Flask, render_template, request, jsonify
+from flask_cors import CORS
 import os
 from werkzeug.utils import secure_filename
 import shutil
+import subprocess
+import sys
+# --- ML Imports ---
+import torch
+from torch import nn
+import numpy as np
+import librosa
+import transformers
+# ======================================================
+# 1. CONFIGURATION
+# ======================================================
 app = Flask(__name__)
+CORS(app)
+UPLOAD_FOLDER = '/tmp/uploads'  # /tmp for read-write permissions on HF Spaces
 ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg', 'webm', 'm4a'}
+# UPDATE THIS TO YOUR NEW FILENAME
+MODEL_PATH = 'models/stutter_detector_attentive_augmented.pth'
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024  # 50MB limit
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+# ======================================================
+# 2. NEW MODEL ARCHITECTURE
+# ======================================================
+class AttentiveStatsPool(nn.Module):
+    def __init__(self, in_dim, use_std=True):
+        super().__init__()
+        self.use_std = use_std
+        self.att = nn.Sequential(
+            nn.Linear(in_dim, in_dim // 2),
+            nn.Tanh(),
+            nn.Linear(in_dim // 2, 1)
+        )
+    def forward(self, H, mask=None):
+        alpha = torch.softmax(self.att(H), dim=1)
+        mean = (alpha * H).sum(dim=1)
+        ex2 = (alpha * (H ** 2)).sum(dim=1)
+        std = torch.sqrt(torch.clamp(ex2 - mean**2, min=1e-6))
+        return torch.cat([mean, std], dim=-1)
+class Wav2VecAttentiveClassifier(nn.Module):
+    def __init__(self, hidden_dim=768, output_dim=2):
+        super().__init__()
+        # Load base wav2vec model
+        self.wav2vec = transformers.Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base")
+        # Freeze wav2vec weights
+        for p in self.wav2vec.parameters():
+            p.requires_grad = False
+        self.pool = AttentiveStatsPool(hidden_dim, use_std=True)
+        self.classifier = nn.Sequential(
+            nn.Linear(hidden_dim * 2, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(128, output_dim)
+        )
+    def forward(self, x):
+        H = self.wav2vec(x).last_hidden_state
+        z = self.pool(H)
+        return self.classifier(z)
+# ======================================================
+# 3. INFERENCE HANDLER CLASS
+# ======================================================
+class StutterInferenceService:
+    def __init__(self, model_path):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Loading model using device: {self.device}")
+        try:
+            self.model = Wav2VecAttentiveClassifier()
+            # Load weights
+            state_dict = torch.load(model_path, map_location=self.device)
+            self.model.load_state_dict(state_dict)
+            self.model.to(self.device)
+            self.model.eval()
+            print("Model loaded successfully.")
+            self.loaded = True
+        except Exception as e:
+            print(f"CRITICAL ERROR LOADING MODEL: {e}")
+            self.loaded = False
+    def analyze(self, file_path):
+        if not self.loaded:
+            raise Exception("Model not loaded")
+        # 1. Load Audio (Force 16kHz)
+        audio, sr = librosa.load(file_path, sr=16000)
+        # 2. Define segment parameters
+        SEGMENT_LENGTH = 48000  # 3 seconds exactly
+        # 3. Handle Short Audio (Pad if < 3 sec)
+        if len(audio) < SEGMENT_LENGTH:
+            padding = SEGMENT_LENGTH - len(audio)
+            audio = np.pad(audio, (0, padding), 'constant')
+        # 4. Process Logic (Sliding Window or Chunks)
+        # We will slice the audio into non-overlapping 3s chunks
+        num_chunks = len(audio) // SEGMENT_LENGTH
+        results = []
+        stutter_count = 0
+        for i in range(num_chunks):
+            start = i * SEGMENT_LENGTH
+            end = start + SEGMENT_LENGTH
+            chunk = audio[start:end]
+            # Preprocess for Pytorch
+            # Shape: (1, 48000)
+            tensor_input = torch.tensor(chunk).float().unsqueeze(0).to(self.device)
+            with torch.no_grad():
+                logits = self.model(tensor_input)
+                probs = torch.softmax(logits, dim=1)
+                # Assuming Class 1 = Stutter, Class 0 = Fluent
+                # Check your training labels! Usually 1 is the positive class.
+                stutter_prob = probs[0][1].item()
+                prediction = torch.argmax(probs, dim=1).item()
+                label = "Stutter" if prediction == 1 else "Fluent"
+                if prediction == 1:
+                    stutter_count += 1
+                results.append({
+                    "segment_id": i,
+                    "start_time": i * 3.0,
+                    "end_time": (i + 1) * 3.0,
+                    "probability": float(stutter_prob),
+                    "label": label
+                })
+        # 5. Calculate Overall Severity
+        total_segments = len(results) if len(results) > 0 else 1
+        severity_score = (stutter_count / total_segments) * 100
+        severity_label = "Normal"
+        if severity_score > 10: severity_label = "Mild"
+        if severity_score > 30: severity_label = "Moderate"
+        if severity_score > 60: severity_label = "Severe"
+        return {
+            "overall_severity": severity_label,
+            "stutter_percentage": round(severity_score, 2),
+            "details": results
+        }
+# ======================================================
+# 4. APP LOGIC
+# ======================================================
+# Initialize Service Global
+detector_service = StutterInferenceService(MODEL_PATH)
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 @app.route('/health', methods=['GET'])
 def health_check():
+    return jsonify({
+        'status': 'healthy',
+        'model_loaded': detector_service.loaded
+    }), 200
 @app.route('/upload', methods=['POST'])
 def upload_file():
+    if not detector_service.loaded:
+        return jsonify({'error': 'Model failed to load on server start.'}), 500
     if 'audio' not in request.files:
         return jsonify({'error': 'No audio file provided'}), 400
         filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
         file.save(filepath)
         try:
+            # Run analysis using the new service
+            results = detector_service.analyze(filepath)
             # Cleanup
             if os.path.exists(filepath): os.remove(filepath)
             return jsonify(results), 200
         except Exception as e:
+            print(f"Error processing file: {e}")
             if os.path.exists(filepath): os.remove(filepath)
             return jsonify({'error': str(e)}), 500
+    return jsonify({'error': 'Invalid file type'}), 400
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

models/stutter_detector_all_types.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:819effa4e2727f82295d9f7c7cd2647159ef55001c8f7eda7cb009130d45ea56
-size 378509393

 version https://git-lfs.github.com/spec/v1
+oid sha256:da5f2977f33bfac1112d588e3e5e187f0e600af13190eb9fb9bc0b3411eb6ada
+size 380482371

requirements.txt CHANGED Viewed

@@ -8,4 +8,4 @@ librosa
 soundfile
 numpy
 scipy
-gunicorn

 soundfile
 numpy
 scipy
+gunicorn