legusxyz commited on
Commit
554e451
·
verified ·
1 Parent(s): 93c6daa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -127
app.py CHANGED
@@ -1,129 +1,8 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
3
  import torch
4
- import tempfile
5
- import os
6
- import time
7
- from fastapi.responses import HTMLResponse
8
- from fastapi.staticfiles import StaticFiles
9
 
10
- print(f"Is CUDA available: {torch.cuda.is_available()}")
11
- # True
12
- print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
13
-
14
- # Define FastAPI app
15
- app = FastAPI()
16
-
17
- # Load the Whisper model once during startup
18
- device = 0 if torch.cuda.is_available() else -1 # Use GPU if available, otherwise CPU
19
- asr_pipeline = pipeline(model="openai/whisper-large", device=device) # Initialize Whisper model
20
- # asr_pipeline = pipeline( model="openai/whisper-small", device=device, language="pt")
21
-
22
-
23
- # Basic GET endpoint
24
- @app.get("/")
25
- def read_root():
26
- return {"message": "Welcome to the FastAPI app on Hugging Face Spaces!"}
27
-
28
- # POST endpoint to transcribe audio
29
- @app.post("/transcribe/")
30
- async def transcribe_audio(file: UploadFile = File(...)):
31
- start_time = time.time()
32
-
33
- # Save the uploaded file using a temporary file manager
34
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
35
- temp_audio_file.write(await file.read())
36
- temp_file_path = temp_audio_file.name
37
-
38
- # Transcribe the audio with long-form generation enabled
39
- transcription_start = time.time()
40
- transcription = asr_pipeline(temp_file_path, return_timestamps=True) # Enable timestamp return for long audio files
41
- transcription_end = time.time()
42
-
43
- # Clean up temporary file after use
44
- os.remove(temp_file_path)
45
-
46
- # Log time durations
47
- end_time = time.time()
48
- print(f"Time to transcribe audio: {transcription_end - transcription_start:.4f} seconds")
49
- print(f"Total execution time: {end_time - start_time:.4f} seconds")
50
-
51
- return {"transcription": transcription['text']}
52
-
53
- @app.get("/playground/", response_class=HTMLResponse)
54
- def playground():
55
- html_content = """
56
- <!DOCTYPE html>
57
- <html lang="en">
58
- <head>
59
- <meta charset="UTF-8">
60
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
61
- <title>Voice Recorder</title>
62
- </head>
63
- <body>
64
- <h1>Record your voice</h1>
65
- <button id="startBtn">Start Recording</button>
66
- <button id="stopBtn" disabled>Stop Recording</button>
67
- <p id="status">Press start to record your voice...</p>
68
-
69
- <audio id="audioPlayback" controls style="display:none;"></audio>
70
- <script>
71
- let mediaRecorder;
72
- let audioChunks = [];
73
-
74
- const startBtn = document.getElementById('startBtn');
75
- const stopBtn = document.getElementById('stopBtn');
76
- const status = document.getElementById('status');
77
- const audioPlayback = document.getElementById('audioPlayback');
78
-
79
- // Start Recording
80
- startBtn.addEventListener('click', async () => {
81
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
82
- mediaRecorder = new MediaRecorder(stream);
83
- mediaRecorder.start();
84
-
85
- status.textContent = 'Recording...';
86
- startBtn.disabled = true;
87
- stopBtn.disabled = false;
88
-
89
- mediaRecorder.ondataavailable = event => {
90
- audioChunks.push(event.data);
91
- };
92
- });
93
-
94
- // Stop Recording
95
- stopBtn.addEventListener('click', () => {
96
- mediaRecorder.stop();
97
- mediaRecorder.onstop = async () => {
98
- status.textContent = 'Recording stopped. Preparing to send...';
99
- const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
100
- const audioUrl = URL.createObjectURL(audioBlob);
101
- audioPlayback.src = audioUrl;
102
- audioPlayback.style.display = 'block';
103
- audioChunks = [];
104
-
105
- // Send audio blob to FastAPI endpoint
106
- const formData = new FormData();
107
- formData.append('file', audioBlob, 'recording.wav');
108
-
109
- const response = await fetch('/transcribe/', {
110
- method: 'POST',
111
- body: formData,
112
- });
113
-
114
- const result = await response.json();
115
- status.textContent = 'Transcription: ' + result.transcription;
116
- };
117
-
118
- startBtn.disabled = false;
119
- stopBtn.disabled = true;
120
- });
121
- </script>
122
- </body>
123
- </html>
124
- """
125
- return HTMLResponse(content=html_content)
126
- # If running as the main module, start Uvicorn
127
- if __name__ == "__main__":
128
- import uvicorn
129
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
1
  import torch
 
 
 
 
 
2
 
3
+ if torch.cuda.is_available():
4
+ device = torch.device("cuda")
5
+ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
6
+ else:
7
+ device = torch.device("cpu")
8
+ print("CUDA is not available, using CPU.")