Spaces:

legusxyz
/

whisper

Sleeping

App Files Files Community

legusxyz commited on Sep 28, 2024

Commit

2218ac8

verified ·

1 Parent(s): 554e451

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -6

app.py CHANGED Viewed

@@ -1,8 +1,132 @@
 import torch
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
-else:
-    device = torch.device("cpu")
-    print("CUDA is not available, using CPU.")

+from fastapi import FastAPI, UploadFile, File
+from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
 import torch
+import tempfile
+import os
+import time
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+print(f"Is CUDA available: {torch.cuda.is_available()}")
+# True
+print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+model = load_pytorch_model()
+model = model.to("cuda")
+# Define FastAPI app
+app = FastAPI()
+# Load the Whisper model once during startup
+device = torch.device("cuda")
+asr_pipeline = pipeline(model="openai/whisper-large", device=device)  # Initialize Whisper model
+# asr_pipeline = pipeline( model="openai/whisper-small", device=device, language="pt")
+# Basic GET endpoint
+@app.get("/")
+def read_root():
+    return {"message": "Welcome to the FastAPI app on Hugging Face Spaces!"}
+# POST endpoint to transcribe audio
+@app.post("/transcribe/")
+async def transcribe_audio(file: UploadFile = File(...)):
+    start_time = time.time()
+    # Save the uploaded file using a temporary file manager
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
+        temp_audio_file.write(await file.read())
+        temp_file_path = temp_audio_file.name
+    # Transcribe the audio with long-form generation enabled
+    transcription_start = time.time()
+    transcription = asr_pipeline(temp_file_path, return_timestamps=True)  # Enable timestamp return for long audio files
+    transcription_end = time.time()
+    # Clean up temporary file after use
+    os.remove(temp_file_path)
+    # Log time durations
+    end_time = time.time()
+    print(f"Time to transcribe audio: {transcription_end - transcription_start:.4f} seconds")
+    print(f"Total execution time: {end_time - start_time:.4f} seconds")
+    return {"transcription": transcription['text']}
+@app.get("/playground/", response_class=HTMLResponse)
+def playground():
+    html_content = """
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Voice Recorder</title>
+    </head>
+    <body>
+        <h1>Record your voice</h1>
+        <button id="startBtn">Start Recording</button>
+        <button id="stopBtn" disabled>Stop Recording</button>
+        <p id="status">Press start to record your voice...</p>
+        <audio id="audioPlayback" controls style="display:none;"></audio>
+        <script>
+            let mediaRecorder;
+            let audioChunks = [];
+            const startBtn = document.getElementById('startBtn');
+            const stopBtn = document.getElementById('stopBtn');
+            const status = document.getElementById('status');
+            const audioPlayback = document.getElementById('audioPlayback');
+            // Start Recording
+            startBtn.addEventListener('click', async () => {
+                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                mediaRecorder = new MediaRecorder(stream);
+                mediaRecorder.start();
+                status.textContent = 'Recording...';
+                startBtn.disabled = true;
+                stopBtn.disabled = false;
+                mediaRecorder.ondataavailable = event => {
+                    audioChunks.push(event.data);
+                };
+            });
+            // Stop Recording
+            stopBtn.addEventListener('click', () => {
+                mediaRecorder.stop();
+                mediaRecorder.onstop = async () => {
+                    status.textContent = 'Recording stopped. Preparing to send...';
+                    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
+                    const audioUrl = URL.createObjectURL(audioBlob);
+                    audioPlayback.src = audioUrl;
+                    audioPlayback.style.display = 'block';
+                    audioChunks = [];
+                    // Send audio blob to FastAPI endpoint
+                    const formData = new FormData();
+                    formData.append('file', audioBlob, 'recording.wav');
+                    const response = await fetch('/transcribe/', {
+                        method: 'POST',
+                        body: formData,
+                    });
+                    const result = await response.json();
+                    status.textContent = 'Transcription: ' + result.transcription;
+                };
+                startBtn.disabled = false;
+                stopBtn.disabled = true;
+            });
+        </script>
+    </body>
+    </html>
+    """
+    return HTMLResponse(content=html_content)
+# If running as the main module, start Uvicorn
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)