Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
Β·
39ee116
1
Parent(s):
d0feed8
chore: rename to 'faster-whisper-server'
Browse files- .github/workflows/docker-build-and-push.yaml +3 -3
- Dockerfile.cpu +3 -3
- Dockerfile.cuda +3 -3
- README.md +15 -8
- Taskfile.yaml +4 -4
- compose.yaml +6 -6
- {speaches β faster_whisper_server}/__init__.py +0 -0
- {speaches β faster_whisper_server}/asr.py +3 -3
- {speaches β faster_whisper_server}/audio.py +2 -2
- {speaches β faster_whisper_server}/config.py +0 -0
- {speaches β faster_whisper_server}/core.py +1 -1
- {speaches β faster_whisper_server}/logger.py +2 -2
- {speaches β faster_whisper_server}/main.py +14 -8
- {speaches β faster_whisper_server}/server_models.py +2 -2
- {speaches β faster_whisper_server}/transcriber.py +10 -5
- {speaches β faster_whisper_server}/utils.py +0 -0
- tests/__init__.py +0 -0
- tests/app_test.py +3 -3
.github/workflows/docker-build-and-push.yaml
CHANGED
@@ -28,7 +28,7 @@ jobs:
|
|
28 |
uses: docker/metadata-action@v5
|
29 |
with:
|
30 |
images: |
|
31 |
-
fedirz/
|
32 |
# https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
|
33 |
flavor: |
|
34 |
latest=false
|
@@ -47,5 +47,5 @@ jobs:
|
|
47 |
# platforms: linux/amd64,linux/arm64
|
48 |
tags: ${{ steps.meta.outputs.tags }}
|
49 |
# TODO: cache
|
50 |
-
# cache-from: type=registry,ref=fedirz/
|
51 |
-
# cache-to: type=registry,ref=fedirz/
|
|
|
28 |
uses: docker/metadata-action@v5
|
29 |
with:
|
30 |
images: |
|
31 |
+
fedirz/faster-whisper-server
|
32 |
# https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
|
33 |
flavor: |
|
34 |
latest=false
|
|
|
47 |
# platforms: linux/amd64,linux/arm64
|
48 |
tags: ${{ steps.meta.outputs.tags }}
|
49 |
# TODO: cache
|
50 |
+
# cache-from: type=registry,ref=fedirz/faster-whisper-server:buildcache
|
51 |
+
# cache-to: type=registry,ref=fedirz/faster-whisper-server:buildcache,mode=max
|
Dockerfile.cpu
CHANGED
@@ -9,12 +9,12 @@ RUN apt-get update && \
|
|
9 |
rm -rf /var/lib/apt/lists/* && \
|
10 |
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
|
11 |
RUN pip install --no-cache-dir poetry==1.8.2
|
12 |
-
WORKDIR /root/
|
13 |
COPY pyproject.toml poetry.lock ./
|
14 |
RUN poetry install --only main
|
15 |
-
COPY ./
|
16 |
ENTRYPOINT ["poetry", "run"]
|
17 |
-
CMD ["uvicorn", "
|
18 |
ENV WHISPER_MODEL=distil-medium.en
|
19 |
ENV WHISPER_INFERENCE_DEVICE=cpu
|
20 |
ENV WHISPER_COMPUTE_TYPE=int8
|
|
|
9 |
rm -rf /var/lib/apt/lists/* && \
|
10 |
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
|
11 |
RUN pip install --no-cache-dir poetry==1.8.2
|
12 |
+
WORKDIR /root/faster-whisper-server
|
13 |
COPY pyproject.toml poetry.lock ./
|
14 |
RUN poetry install --only main
|
15 |
+
COPY ./faster_whisper_server ./faster_whisper_server
|
16 |
ENTRYPOINT ["poetry", "run"]
|
17 |
+
CMD ["uvicorn", "faster_whisper_server.main:app"]
|
18 |
ENV WHISPER_MODEL=distil-medium.en
|
19 |
ENV WHISPER_INFERENCE_DEVICE=cpu
|
20 |
ENV WHISPER_COMPUTE_TYPE=int8
|
Dockerfile.cuda
CHANGED
@@ -9,12 +9,12 @@ RUN apt-get update && \
|
|
9 |
rm -rf /var/lib/apt/lists/* && \
|
10 |
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
|
11 |
RUN pip install --no-cache-dir poetry==1.8.2
|
12 |
-
WORKDIR /root/
|
13 |
COPY pyproject.toml poetry.lock ./
|
14 |
RUN poetry install --only main
|
15 |
-
COPY ./
|
16 |
ENTRYPOINT ["poetry", "run"]
|
17 |
-
CMD ["uvicorn", "
|
18 |
ENV WHISPER_MODEL=distil-large-v3
|
19 |
ENV WHISPER_INFERENCE_DEVICE=cuda
|
20 |
ENV UVICORN_HOST=0.0.0.0
|
|
|
9 |
rm -rf /var/lib/apt/lists/* && \
|
10 |
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
|
11 |
RUN pip install --no-cache-dir poetry==1.8.2
|
12 |
+
WORKDIR /root/faster-whisper-server
|
13 |
COPY pyproject.toml poetry.lock ./
|
14 |
RUN poetry install --only main
|
15 |
+
COPY ./faster_whisper_server ./faster_whisper_server
|
16 |
ENTRYPOINT ["poetry", "run"]
|
17 |
+
CMD ["uvicorn", "faster_whisper_server.main:app"]
|
18 |
ENV WHISPER_MODEL=distil-large-v3
|
19 |
ENV WHISPER_INFERENCE_DEVICE=cuda
|
20 |
ENV UVICORN_HOST=0.0.0.0
|
README.md
CHANGED
@@ -1,20 +1,27 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
:peach:`speaches` is a web server that supports real-time transcription using WebSockets.
|
4 |
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) is used as the backend. Both GPU and CPU inference are supported.
|
5 |
- LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) | [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for real-time transcription.
|
6 |
- Can be deployed using Docker (Compose configuration can be found in [compose.yaml](./compose.yaml)).
|
7 |
-
- All configuration is done through environment variables. See [config.py](./
|
8 |
- NOTE: only transcription of single channel, 16000 sample rate, raw, 16-bit little-endian audio is supported.
|
9 |
- NOTE: this isn't really meant to be used as a standalone tool but rather to add transcription features to other applications.
|
10 |
Please create an issue if you find a bug, have a question, or a feature suggestion.
|
11 |
# Quick Start
|
12 |
-
|
13 |
```bash
|
14 |
-
docker run --gpus=all --publish 8000:8000 --
|
15 |
# or
|
16 |
-
docker run --publish 8000:8000 --
|
17 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
Streaming audio data from a microphone. [websocat](https://github.com/vi/websocat?tab=readme-ov-file#installation) installation is required.
|
19 |
```bash
|
20 |
ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le - | websocat --binary ws://0.0.0.0:8000/v1/audio/transcriptions
|
@@ -38,7 +45,7 @@ ffmpeg -i output.wav -ac 1 -ar 16000 -f s16le output.raw
|
|
38 |
curl -X POST -F "[email protected]" http://0.0.0.0:8000/v1/audio/transcriptions
|
39 |
# Output: "{\"text\":\"One, two, three, four, five.\"}"%
|
40 |
```
|
41 |
-
|
42 |
- [ ] Support file transcription (non-streaming) of multiple formats.
|
43 |
- [ ] CLI client.
|
44 |
- [ ] Separate the web server related code from the "core", and publish "core" as a package.
|
|
|
1 |
+
## Faster Whisper Server
|
2 |
+
`faster-whisper-server` is a web server that supports real-time transcription using WebSockets.
|
|
|
3 |
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) is used as the backend. Both GPU and CPU inference are supported.
|
4 |
- LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) | [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for real-time transcription.
|
5 |
- Can be deployed using Docker (Compose configuration can be found in [compose.yaml](./compose.yaml)).
|
6 |
+
- All configuration is done through environment variables. See [config.py](./faster_whisper_server/config.py).
|
7 |
- NOTE: only transcription of single channel, 16000 sample rate, raw, 16-bit little-endian audio is supported.
|
8 |
- NOTE: this isn't really meant to be used as a standalone tool but rather to add transcription features to other applications.
|
9 |
Please create an issue if you find a bug, have a question, or a feature suggestion.
|
10 |
# Quick Start
|
11 |
+
Using Docker
|
12 |
```bash
|
13 |
+
docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:cuda
|
14 |
# or
|
15 |
+
docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:cpu
|
16 |
```
|
17 |
+
Using Docker Compose
|
18 |
+
```bash
|
19 |
+
curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
|
20 |
+
docker compose up --detach up faster-whisper-server-cuda
|
21 |
+
# or
|
22 |
+
docker compose up --detach up faster-whisper-server-cpu
|
23 |
+
```
|
24 |
+
## Usage
|
25 |
Streaming audio data from a microphone. [websocat](https://github.com/vi/websocat?tab=readme-ov-file#installation) installation is required.
|
26 |
```bash
|
27 |
ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le - | websocat --binary ws://0.0.0.0:8000/v1/audio/transcriptions
|
|
|
45 |
curl -X POST -F "[email protected]" http://0.0.0.0:8000/v1/audio/transcriptions
|
46 |
# Output: "{\"text\":\"One, two, three, four, five.\"}"%
|
47 |
```
|
48 |
+
## Roadmap
|
49 |
- [ ] Support file transcription (non-streaming) of multiple formats.
|
50 |
- [ ] CLI client.
|
51 |
- [ ] Separate the web server related code from the "core", and publish "core" as a package.
|
Taskfile.yaml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
version: "3"
|
2 |
tasks:
|
3 |
-
|
4 |
test:
|
5 |
cmds:
|
6 |
- poetry run pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
|
@@ -11,15 +11,15 @@ tasks:
|
|
11 |
- docker compose build
|
12 |
sources:
|
13 |
- Dockerfile.*
|
14 |
-
-
|
15 |
create-multi-arch-builder: docker buildx create --name main --driver=docker-container
|
16 |
build-and-push:
|
17 |
cmds:
|
18 |
- docker compose build --builder main --push
|
19 |
sources:
|
20 |
- Dockerfile.*
|
21 |
-
-
|
22 |
-
sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box
|
23 |
# Python's urllib3 takes forever when ipv6 is enabled
|
24 |
# https://support.nordvpn.com/hc/en-us/articles/20164669224337-How-to-disable-IPv6-on-Linux
|
25 |
disable-ipv6: sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 && sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1
|
|
|
1 |
version: "3"
|
2 |
tasks:
|
3 |
+
server: poetry run uvicorn --host 0.0.0.0 faster_whisper_server.main:app {{.CLI_ARGS}}
|
4 |
test:
|
5 |
cmds:
|
6 |
- poetry run pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
|
|
|
11 |
- docker compose build
|
12 |
sources:
|
13 |
- Dockerfile.*
|
14 |
+
- faster_whisper_server/*.py
|
15 |
create-multi-arch-builder: docker buildx create --name main --driver=docker-container
|
16 |
build-and-push:
|
17 |
cmds:
|
18 |
- docker compose build --builder main --push
|
19 |
sources:
|
20 |
- Dockerfile.*
|
21 |
+
- faster_whisper_server/*.py
|
22 |
+
sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box faster-whisper-server
|
23 |
# Python's urllib3 takes forever when ipv6 is enabled
|
24 |
# https://support.nordvpn.com/hc/en-us/articles/20164669224337-How-to-disable-IPv6-on-Linux
|
25 |
disable-ipv6: sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 && sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1
|
compose.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
# NOTE: arm images haven't been tested
|
2 |
services:
|
3 |
-
|
4 |
-
image: fedirz/
|
5 |
build:
|
6 |
dockerfile: Dockerfile.cuda
|
7 |
context: .
|
@@ -9,7 +9,7 @@ services:
|
|
9 |
- linux/amd64
|
10 |
- linux/arm64
|
11 |
tags:
|
12 |
-
- fedirz/
|
13 |
volumes:
|
14 |
- ~/.cache/huggingface:/root/.cache/huggingface
|
15 |
restart: unless-stopped
|
@@ -20,8 +20,8 @@ services:
|
|
20 |
reservations:
|
21 |
devices:
|
22 |
- capabilities: ["gpu"]
|
23 |
-
|
24 |
-
image: fedirz/
|
25 |
build:
|
26 |
dockerfile: Dockerfile.cpu
|
27 |
context: .
|
@@ -29,7 +29,7 @@ services:
|
|
29 |
- linux/amd64
|
30 |
- linux/arm64
|
31 |
tags:
|
32 |
-
- fedirz/
|
33 |
volumes:
|
34 |
- ~/.cache/huggingface:/root/.cache/huggingface
|
35 |
restart: unless-stopped
|
|
|
1 |
# NOTE: arm images haven't been tested
|
2 |
services:
|
3 |
+
faster-whisper-server-cuda:
|
4 |
+
image: fedirz/faster-whisper-server:cuda
|
5 |
build:
|
6 |
dockerfile: Dockerfile.cuda
|
7 |
context: .
|
|
|
9 |
- linux/amd64
|
10 |
- linux/arm64
|
11 |
tags:
|
12 |
+
- fedirz/faster-whisper-server:cuda
|
13 |
volumes:
|
14 |
- ~/.cache/huggingface:/root/.cache/huggingface
|
15 |
restart: unless-stopped
|
|
|
20 |
reservations:
|
21 |
devices:
|
22 |
- capabilities: ["gpu"]
|
23 |
+
faster-whisper-server-cpu:
|
24 |
+
image: fedirz/faster-whisper-server:cpu
|
25 |
build:
|
26 |
dockerfile: Dockerfile.cpu
|
27 |
context: .
|
|
|
29 |
- linux/amd64
|
30 |
- linux/arm64
|
31 |
tags:
|
32 |
+
- fedirz/faster-whisper-server:cpu
|
33 |
volumes:
|
34 |
- ~/.cache/huggingface:/root/.cache/huggingface
|
35 |
restart: unless-stopped
|
{speaches β faster_whisper_server}/__init__.py
RENAMED
File without changes
|
{speaches β faster_whisper_server}/asr.py
RENAMED
@@ -4,9 +4,9 @@ from typing import Iterable
|
|
4 |
|
5 |
from faster_whisper import transcribe
|
6 |
|
7 |
-
from
|
8 |
-
from
|
9 |
-
from
|
10 |
|
11 |
|
12 |
class FasterWhisperASR:
|
|
|
4 |
|
5 |
from faster_whisper import transcribe
|
6 |
|
7 |
+
from faster_whisper_server.audio import Audio
|
8 |
+
from faster_whisper_server.core import Transcription, Word
|
9 |
+
from faster_whisper_server.logger import logger
|
10 |
|
11 |
|
12 |
class FasterWhisperASR:
|
{speaches β faster_whisper_server}/audio.py
RENAMED
@@ -7,8 +7,8 @@ import numpy as np
|
|
7 |
import soundfile as sf
|
8 |
from numpy.typing import NDArray
|
9 |
|
10 |
-
from
|
11 |
-
from
|
12 |
|
13 |
|
14 |
def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
|
|
|
7 |
import soundfile as sf
|
8 |
from numpy.typing import NDArray
|
9 |
|
10 |
+
from faster_whisper_server.config import SAMPLES_PER_SECOND
|
11 |
+
from faster_whisper_server.logger import logger
|
12 |
|
13 |
|
14 |
def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
|
{speaches β faster_whisper_server}/config.py
RENAMED
File without changes
|
{speaches β faster_whisper_server}/core.py
RENAMED
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
4 |
import re
|
5 |
from dataclasses import dataclass
|
6 |
|
7 |
-
from
|
8 |
|
9 |
|
10 |
# TODO: use the `Segment` from `faster-whisper.transcribe` instead
|
|
|
4 |
import re
|
5 |
from dataclasses import dataclass
|
6 |
|
7 |
+
from faster_whisper_server.config import config
|
8 |
|
9 |
|
10 |
# TODO: use the `Segment` from `faster-whisper.transcribe` instead
|
{speaches β faster_whisper_server}/logger.py
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
import logging
|
2 |
|
3 |
-
from
|
4 |
|
5 |
-
# Disables all but `
|
6 |
|
7 |
root_logger = logging.getLogger()
|
8 |
root_logger.setLevel(logging.CRITICAL)
|
|
|
1 |
import logging
|
2 |
|
3 |
+
from faster_whisper_server.config import config
|
4 |
|
5 |
+
# Disables all but `faster_whisper_server` logger
|
6 |
|
7 |
root_logger = logging.getLogger()
|
8 |
root_logger.setLevel(logging.CRITICAL)
|
{speaches β faster_whisper_server}/main.py
RENAMED
@@ -20,16 +20,22 @@ from fastapi.websockets import WebSocketState
|
|
20 |
from faster_whisper import WhisperModel
|
21 |
from faster_whisper.vad import VadOptions, get_speech_timestamps
|
22 |
|
23 |
-
from
|
24 |
-
from
|
25 |
-
from
|
26 |
-
from
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
TranscriptionJsonResponse,
|
30 |
TranscriptionVerboseJsonResponse,
|
31 |
)
|
32 |
-
from
|
33 |
|
34 |
models: OrderedDict[Model, WhisperModel] = OrderedDict()
|
35 |
|
@@ -72,7 +78,7 @@ app = FastAPI(lifespan=lifespan)
|
|
72 |
|
73 |
@app.get("/health")
|
74 |
def health() -> Response:
|
75 |
-
return Response(status_code=200, content="
|
76 |
|
77 |
|
78 |
@app.post("/v1/audio/translations")
|
|
|
20 |
from faster_whisper import WhisperModel
|
21 |
from faster_whisper.vad import VadOptions, get_speech_timestamps
|
22 |
|
23 |
+
from faster_whisper_server import utils
|
24 |
+
from faster_whisper_server.asr import FasterWhisperASR
|
25 |
+
from faster_whisper_server.audio import AudioStream, audio_samples_from_file
|
26 |
+
from faster_whisper_server.config import (
|
27 |
+
SAMPLES_PER_SECOND,
|
28 |
+
Language,
|
29 |
+
Model,
|
30 |
+
ResponseFormat,
|
31 |
+
config,
|
32 |
+
)
|
33 |
+
from faster_whisper_server.logger import logger
|
34 |
+
from faster_whisper_server.server_models import (
|
35 |
TranscriptionJsonResponse,
|
36 |
TranscriptionVerboseJsonResponse,
|
37 |
)
|
38 |
+
from faster_whisper_server.transcriber import audio_transcriber
|
39 |
|
40 |
models: OrderedDict[Model, WhisperModel] = OrderedDict()
|
41 |
|
|
|
78 |
|
79 |
@app.get("/health")
|
80 |
def health() -> Response:
|
81 |
+
return Response(status_code=200, content="OK")
|
82 |
|
83 |
|
84 |
@app.post("/v1/audio/translations")
|
{speaches β faster_whisper_server}/server_models.py
RENAMED
@@ -3,8 +3,8 @@ from __future__ import annotations
|
|
3 |
from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
|
4 |
from pydantic import BaseModel
|
5 |
|
6 |
-
from
|
7 |
-
from
|
8 |
|
9 |
|
10 |
# https://platform.openai.com/docs/api-reference/audio/json-object
|
|
|
3 |
from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
|
4 |
from pydantic import BaseModel
|
5 |
|
6 |
+
from faster_whisper_server import utils
|
7 |
+
from faster_whisper_server.core import Transcription
|
8 |
|
9 |
|
10 |
# https://platform.openai.com/docs/api-reference/audio/json-object
|
{speaches β faster_whisper_server}/transcriber.py
RENAMED
@@ -2,11 +2,16 @@ from __future__ import annotations
|
|
2 |
|
3 |
from typing import AsyncGenerator
|
4 |
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from
|
8 |
-
from
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
class LocalAgreement:
|
|
|
2 |
|
3 |
from typing import AsyncGenerator
|
4 |
|
5 |
+
from faster_whisper_server.asr import FasterWhisperASR
|
6 |
+
from faster_whisper_server.audio import Audio, AudioStream
|
7 |
+
from faster_whisper_server.config import config
|
8 |
+
from faster_whisper_server.core import (
|
9 |
+
Transcription,
|
10 |
+
Word,
|
11 |
+
common_prefix,
|
12 |
+
to_full_sentences,
|
13 |
+
)
|
14 |
+
from faster_whisper_server.logger import logger
|
15 |
|
16 |
|
17 |
class LocalAgreement:
|
{speaches β faster_whisper_server}/utils.py
RENAMED
File without changes
|
tests/__init__.py
DELETED
File without changes
|
tests/app_test.py
CHANGED
@@ -10,9 +10,9 @@ from fastapi import WebSocketDisconnect
|
|
10 |
from fastapi.testclient import TestClient
|
11 |
from starlette.testclient import WebSocketTestSession
|
12 |
|
13 |
-
from
|
14 |
-
from
|
15 |
-
from
|
16 |
|
17 |
SIMILARITY_THRESHOLD = 0.97
|
18 |
AUDIO_FILES_LIMIT = 5
|
|
|
10 |
from fastapi.testclient import TestClient
|
11 |
from starlette.testclient import WebSocketTestSession
|
12 |
|
13 |
+
from faster_whisper_server.config import BYTES_PER_SECOND
|
14 |
+
from faster_whisper_server.main import app
|
15 |
+
from faster_whisper_server.server_models import TranscriptionVerboseJsonResponse
|
16 |
|
17 |
SIMILARITY_THRESHOLD = 0.97
|
18 |
AUDIO_FILES_LIMIT = 5
|