Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
937af67
1
Parent(s):
ad53e40
docs: add live-transcription demo
Browse files- .gitattributes +2 -0
- examples/live-audio/audio.pcm +3 -0
- examples/live-audio/demo.mp4 +3 -0
- examples/live-audio/script.sh +19 -0
.gitattributes
CHANGED
@@ -1 +1,3 @@
|
|
1 |
/examples/youtube/the-evolution-of-the-operating-system.mp3 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
1 |
/examples/youtube/the-evolution-of-the-operating-system.mp3 filter=lfs diff=lfs merge=lfs -text
|
2 |
+
/examples/live-audio/audio.pcm filter=lfs diff=lfs merge=lfs -text
|
3 |
+
/examples/live-audio/demo.mp4 filter=lfs diff=lfs merge=lfs -text
|
examples/live-audio/audio.pcm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14e9c4e184043197bddfe2d71cd2519172571e3296a25e42de7b474391054142
|
3 |
+
size 52679948
|
examples/live-audio/demo.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71dd24efc0cd37e88434ab8fb3e34905073bcbd1bd6838a51f35c3fbab7ac352
|
3 |
+
size 13127468
|
examples/live-audio/script.sh
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
set -e
|
4 |
+
|
5 |
+
# The audio file was copied from the `youtube` example and converted to a raw, single channel, 16000 sample rate, 16-bit little-endian PCM audio file.
|
6 |
+
# cp ../youtube/the-evolution-of-the-operating-system.mp3 ./audio.mp3
|
7 |
+
# ffmpeg -y -hide_banner -loglevel quiet -i audio.mp3 -ac 1 -ar 16000 -f s16le -acodec pcm_s16le audio.pcm
|
8 |
+
# rm -f audio.mp3
|
9 |
+
|
10 |
+
export WHISPER_MODEL=distil-large-v3 # or tiny.en if you are running on a CPU for a faster inference.
|
11 |
+
|
12 |
+
# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
|
13 |
+
docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER_MODEL=$WHISPER_MODEL fedirz/faster-whisper-server:0.1-cuda
|
14 |
+
# or you can run it on a CPU
|
15 |
+
# docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER_MODEL=$WHISPER_MODEL fedirz/faster-whisper-server:0.1-cpu
|
16 |
+
|
17 |
+
# `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le`
|
18 |
+
# shellcheck disable=SC2002
|
19 |
+
cat audio.pcm | pv -qL 32000 | websocat --no-close --binary ws://localhost:8000/v1/audio/transcriptions?language=en
|