Spaces:

fedirz
/

faster-whisper-server

Configuration error

App Files Files Community

Fedir Zadniprovskyi commited on May 26, 2024

Commit

39ee116

1 Parent(s): d0feed8

chore: rename to 'faster-whisper-server'

Browse files

Files changed (18) hide show

.github/workflows/docker-build-and-push.yaml +3 -3
Dockerfile.cpu +3 -3
Dockerfile.cuda +3 -3
README.md +15 -8
Taskfile.yaml +4 -4
compose.yaml +6 -6
{speaches → faster_whisper_server}/__init__.py +0 -0
{speaches → faster_whisper_server}/asr.py +3 -3
{speaches → faster_whisper_server}/audio.py +2 -2
{speaches → faster_whisper_server}/config.py +0 -0
{speaches → faster_whisper_server}/core.py +1 -1
{speaches → faster_whisper_server}/logger.py +2 -2
{speaches → faster_whisper_server}/main.py +14 -8
{speaches → faster_whisper_server}/server_models.py +2 -2
{speaches → faster_whisper_server}/transcriber.py +10 -5
{speaches → faster_whisper_server}/utils.py +0 -0
tests/__init__.py +0 -0
tests/app_test.py +3 -3

.github/workflows/docker-build-and-push.yaml CHANGED Viewed

@@ -28,7 +28,7 @@ jobs:
         uses: docker/metadata-action@v5
         with:
           images: |
-            fedirz/speaches
           # https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
           flavor: |
             latest=false
@@ -47,5 +47,5 @@ jobs:
           # platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}
           # TODO: cache
-          # cache-from: type=registry,ref=fedirz/speaches:buildcache
-          # cache-to: type=registry,ref=fedirz/speaches:buildcache,mode=max

         uses: docker/metadata-action@v5
         with:
           images: |
+            fedirz/faster-whisper-server
           # https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
           flavor: |
             latest=false
           # platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}
           # TODO: cache
+          # cache-from: type=registry,ref=fedirz/faster-whisper-server:buildcache
+          # cache-to: type=registry,ref=fedirz/faster-whisper-server:buildcache,mode=max

Dockerfile.cpu CHANGED Viewed

@@ -9,12 +9,12 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
-WORKDIR /root/speaches
 COPY pyproject.toml poetry.lock ./
 RUN poetry install --only main
-COPY ./speaches ./speaches
 ENTRYPOINT ["poetry", "run"]
-CMD ["uvicorn", "speaches.main:app"]
 ENV WHISPER_MODEL=distil-medium.en
 ENV WHISPER_INFERENCE_DEVICE=cpu
 ENV WHISPER_COMPUTE_TYPE=int8

     rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
+WORKDIR /root/faster-whisper-server
 COPY pyproject.toml poetry.lock ./
 RUN poetry install --only main
+COPY ./faster_whisper_server ./faster_whisper_server
 ENTRYPOINT ["poetry", "run"]
+CMD ["uvicorn", "faster_whisper_server.main:app"]
 ENV WHISPER_MODEL=distil-medium.en
 ENV WHISPER_INFERENCE_DEVICE=cpu
 ENV WHISPER_COMPUTE_TYPE=int8

Dockerfile.cuda CHANGED Viewed

@@ -9,12 +9,12 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
-WORKDIR /root/speaches
 COPY pyproject.toml poetry.lock ./
 RUN poetry install --only main
-COPY ./speaches ./speaches
 ENTRYPOINT ["poetry", "run"]
-CMD ["uvicorn", "speaches.main:app"]
 ENV WHISPER_MODEL=distil-large-v3
 ENV WHISPER_INFERENCE_DEVICE=cuda
 ENV UVICORN_HOST=0.0.0.0

     rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
+WORKDIR /root/faster-whisper-server
 COPY pyproject.toml poetry.lock ./
 RUN poetry install --only main
+COPY ./faster_whisper_server ./faster_whisper_server
 ENTRYPOINT ["poetry", "run"]
+CMD ["uvicorn", "faster_whisper_server.main:app"]
 ENV WHISPER_MODEL=distil-large-v3
 ENV WHISPER_INFERENCE_DEVICE=cuda
 ENV UVICORN_HOST=0.0.0.0

README.md CHANGED Viewed

@@ -1,20 +1,27 @@
-# WARN: WIP (code is ugly, bad documentation, may have bugs, test files aren't included, CPU inference was barely tested, etc.)
-# Intro
-:peach:`speaches` is a web server that supports real-time transcription using WebSockets.
 - [faster-whisper](https://github.com/SYSTRAN/faster-whisper) is used as the backend. Both GPU and CPU inference are supported.
 - LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) | [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for real-time transcription.
 - Can be deployed using Docker (Compose configuration can be found in [compose.yaml](./compose.yaml)).
-- All configuration is done through environment variables. See [config.py](./speaches/config.py).
 - NOTE: only transcription of single channel, 16000 sample rate, raw, 16-bit little-endian audio is supported.
 - NOTE: this isn't really meant to be used as a standalone tool but rather to add transcription features to other applications.
 Please create an issue if you find a bug, have a question, or a feature suggestion.
 # Quick Start
-Spinning up a `speaches` web server
 ```bash
-docker run --gpus=all --publish 8000:8000 --mount type=bind,source=$HOME/.cache/huggingface,target=/root/.cache/huggingface fedirz/speaches:cuda
 # or
-docker run --publish 8000:8000 --mount type=bind,source=$HOME/.cache/huggingface,target=/root/.cache/huggingface fedirz/speaches:cpu
 ```
 Streaming audio data from a microphone. [websocat](https://github.com/vi/websocat?tab=readme-ov-file#installation) installation is required.
 ```bash
 ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le - | websocat --binary ws://0.0.0.0:8000/v1/audio/transcriptions
@@ -38,7 +45,7 @@ ffmpeg -i output.wav -ac 1 -ar 16000 -f s16le output.raw
 curl -X POST -F "[email protected]" http://0.0.0.0:8000/v1/audio/transcriptions
 # Output: "{\"text\":\"One,  two,  three,  four,  five.\"}"%
 ```
-# Roadmap
 - [ ] Support file transcription (non-streaming) of multiple formats.
 - [ ] CLI client.
 - [ ] Separate the web server related code from the "core", and publish "core" as a package.

+## Faster Whisper Server
+`faster-whisper-server` is a web server that supports real-time transcription using WebSockets.
 - [faster-whisper](https://github.com/SYSTRAN/faster-whisper) is used as the backend. Both GPU and CPU inference are supported.
 - LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) | [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for real-time transcription.
 - Can be deployed using Docker (Compose configuration can be found in [compose.yaml](./compose.yaml)).
+- All configuration is done through environment variables. See [config.py](./faster_whisper_server/config.py).
 - NOTE: only transcription of single channel, 16000 sample rate, raw, 16-bit little-endian audio is supported.
 - NOTE: this isn't really meant to be used as a standalone tool but rather to add transcription features to other applications.
 Please create an issue if you find a bug, have a question, or a feature suggestion.
 # Quick Start
+Using Docker
 ```bash
+docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:cuda
 # or
+docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:cpu
 ```
+Using Docker Compose
+```bash
+curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
+docker compose up --detach up faster-whisper-server-cuda
+# or
+docker compose up --detach up faster-whisper-server-cpu
+```
+## Usage
 Streaming audio data from a microphone. [websocat](https://github.com/vi/websocat?tab=readme-ov-file#installation) installation is required.
 ```bash
 ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le - | websocat --binary ws://0.0.0.0:8000/v1/audio/transcriptions
 curl -X POST -F "[email protected]" http://0.0.0.0:8000/v1/audio/transcriptions
 # Output: "{\"text\":\"One,  two,  three,  four,  five.\"}"%
 ```
+## Roadmap
 - [ ] Support file transcription (non-streaming) of multiple formats.
 - [ ] CLI client.
 - [ ] Separate the web server related code from the "core", and publish "core" as a package.

Taskfile.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 version: "3"
 tasks:
-  speaches: poetry run uvicorn --host 0.0.0.0 speaches.main:app {{.CLI_ARGS}}
   test:
     cmds:
       - poetry run pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
@@ -11,15 +11,15 @@ tasks:
       - docker compose build
     sources:
       - Dockerfile.*
-      - speaches/*.py
   create-multi-arch-builder: docker buildx create --name main --driver=docker-container
   build-and-push:
     cmds:
       - docker compose build --builder main --push
     sources:
       - Dockerfile.*
-      - speaches/*.py
-  sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box speaches
   # Python's urllib3 takes forever when ipv6 is enabled
   # https://support.nordvpn.com/hc/en-us/articles/20164669224337-How-to-disable-IPv6-on-Linux
   disable-ipv6: sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 && sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1

 version: "3"
 tasks:
+  server: poetry run uvicorn --host 0.0.0.0 faster_whisper_server.main:app {{.CLI_ARGS}}
   test:
     cmds:
       - poetry run pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
       - docker compose build
     sources:
       - Dockerfile.*
+      - faster_whisper_server/*.py
   create-multi-arch-builder: docker buildx create --name main --driver=docker-container
   build-and-push:
     cmds:
       - docker compose build --builder main --push
     sources:
       - Dockerfile.*
+      - faster_whisper_server/*.py
+  sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box faster-whisper-server
   # Python's urllib3 takes forever when ipv6 is enabled
   # https://support.nordvpn.com/hc/en-us/articles/20164669224337-How-to-disable-IPv6-on-Linux
   disable-ipv6: sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 && sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1

compose.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 # NOTE: arm images haven't been tested
 services:
-  speaches-cuda:
-    image: fedirz/speaches:cuda
     build:
       dockerfile: Dockerfile.cuda
       context: .
@@ -9,7 +9,7 @@ services:
         - linux/amd64
         - linux/arm64
       tags:
-        - fedirz/speaches:cuda
     volumes:
       - ~/.cache/huggingface:/root/.cache/huggingface
     restart: unless-stopped
@@ -20,8 +20,8 @@ services:
         reservations:
           devices:
             - capabilities: ["gpu"]
-  speaches-cpu:
-    image: fedirz/speaches:cpu
     build:
       dockerfile: Dockerfile.cpu
       context: .
@@ -29,7 +29,7 @@ services:
         - linux/amd64
         - linux/arm64
       tags:
-        - fedirz/speaches:cpu
     volumes:
       - ~/.cache/huggingface:/root/.cache/huggingface
     restart: unless-stopped

 # NOTE: arm images haven't been tested
 services:
+  faster-whisper-server-cuda:
+    image: fedirz/faster-whisper-server:cuda
     build:
       dockerfile: Dockerfile.cuda
       context: .
         - linux/amd64
         - linux/arm64
       tags:
+        - fedirz/faster-whisper-server:cuda
     volumes:
       - ~/.cache/huggingface:/root/.cache/huggingface
     restart: unless-stopped
         reservations:
           devices:
             - capabilities: ["gpu"]
+  faster-whisper-server-cpu:
+    image: fedirz/faster-whisper-server:cpu
     build:
       dockerfile: Dockerfile.cpu
       context: .
         - linux/amd64
         - linux/arm64
       tags:
+        - fedirz/faster-whisper-server:cpu
     volumes:
       - ~/.cache/huggingface:/root/.cache/huggingface
     restart: unless-stopped

{speaches → faster_whisper_server}/__init__.py RENAMED Viewed

File without changes

{speaches → faster_whisper_server}/asr.py RENAMED Viewed

@@ -4,9 +4,9 @@ from typing import Iterable
 from faster_whisper import transcribe
-from speaches.audio import Audio
-from speaches.core import Transcription, Word
-from speaches.logger import logger
 class FasterWhisperASR:

 from faster_whisper import transcribe
+from faster_whisper_server.audio import Audio
+from faster_whisper_server.core import Transcription, Word
+from faster_whisper_server.logger import logger
 class FasterWhisperASR:

{speaches → faster_whisper_server}/audio.py RENAMED Viewed

@@ -7,8 +7,8 @@ import numpy as np
 import soundfile as sf
 from numpy.typing import NDArray
-from speaches.config import SAMPLES_PER_SECOND
-from speaches.logger import logger
 def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:

 import soundfile as sf
 from numpy.typing import NDArray
+from faster_whisper_server.config import SAMPLES_PER_SECOND
+from faster_whisper_server.logger import logger
 def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:

{speaches → faster_whisper_server}/config.py RENAMED Viewed

File without changes

{speaches → faster_whisper_server}/core.py RENAMED Viewed

@@ -4,7 +4,7 @@ from __future__ import annotations
 import re
 from dataclasses import dataclass
-from speaches.config import config
 # TODO: use the `Segment` from `faster-whisper.transcribe` instead

 import re
 from dataclasses import dataclass
+from faster_whisper_server.config import config
 # TODO: use the `Segment` from `faster-whisper.transcribe` instead

{speaches → faster_whisper_server}/logger.py RENAMED Viewed

@@ -1,8 +1,8 @@
 import logging
-from speaches.config import config
-# Disables all but `speaches` logger
 root_logger = logging.getLogger()
 root_logger.setLevel(logging.CRITICAL)

 import logging
+from faster_whisper_server.config import config
+# Disables all but `faster_whisper_server` logger
 root_logger = logging.getLogger()
 root_logger.setLevel(logging.CRITICAL)

{speaches → faster_whisper_server}/main.py RENAMED Viewed

@@ -20,16 +20,22 @@ from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
 from faster_whisper.vad import VadOptions, get_speech_timestamps
-from speaches import utils
-from speaches.asr import FasterWhisperASR
-from speaches.audio import AudioStream, audio_samples_from_file
-from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config
-from speaches.logger import logger
-from speaches.server_models import (
     TranscriptionJsonResponse,
     TranscriptionVerboseJsonResponse,
 )
-from speaches.transcriber import audio_transcriber
 models: OrderedDict[Model, WhisperModel] = OrderedDict()
@@ -72,7 +78,7 @@ app = FastAPI(lifespan=lifespan)
 @app.get("/health")
 def health() -> Response:
-    return Response(status_code=200, content="Everything is peachy!")
 @app.post("/v1/audio/translations")

 from faster_whisper import WhisperModel
 from faster_whisper.vad import VadOptions, get_speech_timestamps
+from faster_whisper_server import utils
+from faster_whisper_server.asr import FasterWhisperASR
+from faster_whisper_server.audio import AudioStream, audio_samples_from_file
+from faster_whisper_server.config import (
+    SAMPLES_PER_SECOND,
+    Language,
+    Model,
+    ResponseFormat,
+    config,
+)
+from faster_whisper_server.logger import logger
+from faster_whisper_server.server_models import (
     TranscriptionJsonResponse,
     TranscriptionVerboseJsonResponse,
 )
+from faster_whisper_server.transcriber import audio_transcriber
 models: OrderedDict[Model, WhisperModel] = OrderedDict()
 @app.get("/health")
 def health() -> Response:
+    return Response(status_code=200, content="OK")
 @app.post("/v1/audio/translations")

{speaches → faster_whisper_server}/server_models.py RENAMED Viewed

@@ -3,8 +3,8 @@ from __future__ import annotations
 from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
 from pydantic import BaseModel
-from speaches import utils
-from speaches.core import Transcription
 # https://platform.openai.com/docs/api-reference/audio/json-object

 from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
 from pydantic import BaseModel
+from faster_whisper_server import utils
+from faster_whisper_server.core import Transcription
 # https://platform.openai.com/docs/api-reference/audio/json-object

{speaches → faster_whisper_server}/transcriber.py RENAMED Viewed

@@ -2,11 +2,16 @@ from __future__ import annotations
 from typing import AsyncGenerator
-from speaches.asr import FasterWhisperASR
-from speaches.audio import Audio, AudioStream
-from speaches.config import config
-from speaches.core import Transcription, Word, common_prefix, to_full_sentences
-from speaches.logger import logger
 class LocalAgreement:

 from typing import AsyncGenerator
+from faster_whisper_server.asr import FasterWhisperASR
+from faster_whisper_server.audio import Audio, AudioStream
+from faster_whisper_server.config import config
+from faster_whisper_server.core import (
+    Transcription,
+    Word,
+    common_prefix,
+    to_full_sentences,
+)
+from faster_whisper_server.logger import logger
 class LocalAgreement:

{speaches → faster_whisper_server}/utils.py RENAMED Viewed

File without changes

tests/__init__.py DELETED Viewed

File without changes

tests/app_test.py CHANGED Viewed

@@ -10,9 +10,9 @@ from fastapi import WebSocketDisconnect
 from fastapi.testclient import TestClient
 from starlette.testclient import WebSocketTestSession
-from speaches.config import BYTES_PER_SECOND
-from speaches.main import app
-from speaches.server_models import TranscriptionVerboseJsonResponse
 SIMILARITY_THRESHOLD = 0.97
 AUDIO_FILES_LIMIT = 5

 from fastapi.testclient import TestClient
 from starlette.testclient import WebSocketTestSession
+from faster_whisper_server.config import BYTES_PER_SECOND
+from faster_whisper_server.main import app
+from faster_whisper_server.server_models import TranscriptionVerboseJsonResponse
 SIMILARITY_THRESHOLD = 0.97
 AUDIO_FILES_LIMIT = 5