File size: 4,689 Bytes
d248be2
8117fc8
b9cd341
 
3619068
 
5abad2e
 
8d7f55f
e32fd5c
edf3685
16d2214
b9cd341
44edf06
1512a4c
b34a6b9
 
b9cd341
edf3685
 
 
 
e32fd5c
b9cd341
4652073
 
 
 
 
 
 
b9cd341
4652073
20e96b8
4652073
20e96b8
4652073
 
 
 
 
 
 
 
 
 
 
fb29d64
5abad2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9cd341
5abad2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e32fd5c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from fastapi import FastAPI, UploadFile, File
from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
import torch
import tempfile
import os
import time
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles

# Define FastAPI app
app = FastAPI()

# Load the Whisper model once during startup
device = 0 if torch.cuda.is_available() else -1  # Use GPU if available, otherwise CPU
asr_pipeline = pipeline(model="openai/whisper-small", device=device)  # Initialize Whisper model
# asr_pipeline = pipeline( model="openai/whisper-small", device=device, language="pt")


# Basic GET endpoint
@app.get("/")
def read_root():
    return {"message": "Welcome to the FastAPI app on Hugging Face Spaces!"}

# POST endpoint to transcribe audio
@app.post("/transcribe/")
async def transcribe_audio(file: UploadFile = File(...)):
    start_time = time.time()

    # Save the uploaded file using a temporary file manager
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
        temp_audio_file.write(await file.read())
        temp_file_path = temp_audio_file.name

    # Transcribe the audio with long-form generation enabled
    transcription_start = time.time()
    transcription = asr_pipeline(temp_file_path, return_timestamps=True)  # Enable timestamp return for long audio files
    transcription_end = time.time()

    # Clean up temporary file after use
    os.remove(temp_file_path)

    # Log time durations
    end_time = time.time()
    print(f"Time to transcribe audio: {transcription_end - transcription_start:.4f} seconds")
    print(f"Total execution time: {end_time - start_time:.4f} seconds")

    return {"transcription": transcription['text']}

@app.get("/playground/", response_class=HTMLResponse)
def playground():
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Voice Recorder</title>
    </head>
    <body>
        <h1>Record your voice</h1>
        <button id="startBtn">Start Recording</button>
        <button id="stopBtn" disabled>Stop Recording</button>
        <p id="status">Press start to record your voice...</p>

        <audio id="audioPlayback" controls style="display:none;"></audio>
        <script>
            let mediaRecorder;
            let audioChunks = [];

            const startBtn = document.getElementById('startBtn');
            const stopBtn = document.getElementById('stopBtn');
            const status = document.getElementById('status');
            const audioPlayback = document.getElementById('audioPlayback');

            // Start Recording
            startBtn.addEventListener('click', async () => {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream);
                mediaRecorder.start();

                status.textContent = 'Recording...';
                startBtn.disabled = true;
                stopBtn.disabled = false;

                mediaRecorder.ondataavailable = event => {
                    audioChunks.push(event.data);
                };
            });

            // Stop Recording
            stopBtn.addEventListener('click', () => {
                mediaRecorder.stop();
                mediaRecorder.onstop = async () => {
                    status.textContent = 'Recording stopped. Preparing to send...';
                    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
                    const audioUrl = URL.createObjectURL(audioBlob);
                    audioPlayback.src = audioUrl;
                    audioPlayback.style.display = 'block';
                    audioChunks = [];

                    // Send audio blob to FastAPI endpoint
                    const formData = new FormData();
                    formData.append('file', audioBlob, 'recording.wav');

                    const response = await fetch('/transcribe/', {
                        method: 'POST',
                        body: formData,
                    });

                    const result = await response.json();
                    status.textContent = 'Transcription: ' + result.transcription;
                };

                startBtn.disabled = false;
                stopBtn.disabled = true;
            });
        </script>
    </body>
    </html>
    """
    return HTMLResponse(content=html_content)
# If running as the main module, start Uvicorn
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)