WhatShutup

Runtime error

danelkay commited on about 16 hours ago

Commit

bac20d5

1 Parent(s): cd0f407

🔥 chore: remove unused CLI and alternative requirements files

The commit removes legacy CLI files and alternative requirements files, consolidating dependencies into pyproject.toml.

This change:
- Removes app-local.py, app-network.py and app-shared.py
- Removes cli.py as functionality is now handled differently
- Removes requirements-fasterWhisper.txt and requirements-whisper.txt
- Adds pyproject.toml with proper dependencies configuration

Files changed (9) hide show

app-local.py +0 -5
app-network.py +0 -5
app-shared.py +0 -5
cli.py +0 -206
pyproject.toml +106 -0
requirements-fasterWhisper.txt +0 -16
requirements-whisper.txt +0 -16
requirements.txt +1 -1
uv.lock +0 -0

app-local.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Run the app with no audio file restrictions
-from app import create_ui
-from src.config import ApplicationConfig
-create_ui(ApplicationConfig.create_default(input_audio_max_duration=-1))

app-network.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Run the app with no audio file restrictions, and make it available on the network
-from app import create_ui
-from src.config import ApplicationConfig
-create_ui(ApplicationConfig.create_default(input_audio_max_duration=-1, server_name="0.0.0.0"))

app-shared.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Run the app with no audio file restrictions
-from app import create_ui
-from src.config import ApplicationConfig
-create_ui(ApplicationConfig.create_default(input_audio_max_duration=-1, share=True))

cli.py DELETED Viewed

@@ -1,206 +0,0 @@
-import argparse
-import os
-import pathlib
-from urllib.parse import urlparse
-import warnings
-import numpy as np
-import torch
-from app import VadOptions, WhisperTranscriber
-from src.config import VAD_INITIAL_PROMPT_MODE_VALUES, ApplicationConfig, VadInitialPromptMode
-from src.diarization.diarization import Diarization
-from src.download import download_url
-from src.languages import get_language_names
-from src.utils import optional_float, optional_int, str2bool
-from src.whisper.whisperFactory import create_whisper_container
-def cli():
-    app_config = ApplicationConfig.create_default()
-    whisper_models = app_config.get_model_names()
-    # For the CLI, we fallback to saving the output to the current directory
-    output_dir = app_config.output_dir if app_config.output_dir is not None else "."
-    # Environment variable overrides
-    default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", app_config.whisper_implementation)
-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("audio", nargs="+", type=str, \
-                        help="audio file(s) to transcribe")
-    parser.add_argument("--model", default=app_config.default_model_name, choices=whisper_models, \
-                        help="name of the Whisper model to use") # medium
-    parser.add_argument("--model_dir", type=str, default=app_config.model_dir, \
-                        help="the path to save model files; uses ~/.cache/whisper by default")
-    parser.add_argument("--device", default=app_config.device, \
-                        help="device to use for PyTorch inference")
-    parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
-                        help="directory to save the outputs")
-    parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
-                        help="whether to print out the progress and debug messages")
-    parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
-                        help="the Whisper implementation to use")
-    parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
-                        help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
-    parser.add_argument("--language", type=str, default=app_config.language, choices=sorted(get_language_names()), \
-                        help="language spoken in the audio, specify None to perform language detection")
-    parser.add_argument("--vad", type=str, default=app_config.default_vad, choices=["none", "silero-vad", "silero-vad-skip-gaps", "silero-vad-expand-into-gaps", "periodic-vad"], \
-                        help="The voice activity detection algorithm to use") # silero-vad
-    parser.add_argument("--vad_initial_prompt_mode", type=str, default=app_config.vad_initial_prompt_mode, choices=VAD_INITIAL_PROMPT_MODE_VALUES, \
-                        help="Whether or not to prepend the initial prompt to each VAD segment (prepend_all_segments), or just the first segment (prepend_first_segment)") # prepend_first_segment
-    parser.add_argument("--vad_merge_window", type=optional_float, default=app_config.vad_merge_window, \
-                        help="The window size (in seconds) to merge voice segments")
-    parser.add_argument("--vad_max_merge_size", type=optional_float, default=app_config.vad_max_merge_size,\
-                         help="The maximum size (in seconds) of a voice segment")
-    parser.add_argument("--vad_padding", type=optional_float, default=app_config.vad_padding, \
-                        help="The padding (in seconds) to add to each voice segment")
-    parser.add_argument("--vad_prompt_window", type=optional_float, default=app_config.vad_prompt_window, \
-                        help="The window size of the prompt to pass to Whisper")
-    parser.add_argument("--vad_cpu_cores", type=int, default=app_config.vad_cpu_cores, \
-                        help="The number of CPU cores to use for VAD pre-processing.") # 1
-    parser.add_argument("--vad_parallel_devices", type=str, default=app_config.vad_parallel_devices, \
-                        help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
-    parser.add_argument("--auto_parallel", type=bool, default=app_config.auto_parallel, \
-                        help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
-    parser.add_argument("--temperature", type=float, default=app_config.temperature, \
-                        help="temperature to use for sampling")
-    parser.add_argument("--best_of", type=optional_int, default=app_config.best_of, \
-                        help="number of candidates when sampling with non-zero temperature")
-    parser.add_argument("--beam_size", type=optional_int, default=app_config.beam_size, \
-                        help="number of beams in beam search, only applicable when temperature is zero")
-    parser.add_argument("--patience", type=float, default=app_config.patience, \
-                        help="optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search")
-    parser.add_argument("--length_penalty", type=float, default=app_config.length_penalty, \
-                        help="optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple lengt normalization by default")
-    parser.add_argument("--suppress_tokens", type=str, default=app_config.suppress_tokens, \
-                        help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations")
-    parser.add_argument("--initial_prompt", type=str, default=app_config.initial_prompt, \
-                        help="optional text to provide as a prompt for the first window.")
-    parser.add_argument("--condition_on_previous_text", type=str2bool, default=app_config.condition_on_previous_text, \
-                        help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop")
-    parser.add_argument("--fp16", type=str2bool, default=app_config.fp16, \
-                        help="whether to perform inference in fp16; True by default")
-    parser.add_argument("--compute_type", type=str, default=app_config.compute_type, choices=["default", "auto", "int8", "int8_float16", "int16", "float16", "float32"], \
-                        help="the compute type to use for inference")
-    parser.add_argument("--temperature_increment_on_fallback", type=optional_float, default=app_config.temperature_increment_on_fallback, \
-                        help="temperature to increase when falling back when the decoding fails to meet either of the thresholds below")
-    parser.add_argument("--compression_ratio_threshold", type=optional_float, default=app_config.compression_ratio_threshold, \
-                        help="if the gzip compression ratio is higher than this value, treat the decoding as failed")
-    parser.add_argument("--logprob_threshold", type=optional_float, default=app_config.logprob_threshold, \
-                        help="if the average log probability is lower than this value, treat the decoding as failed")
-    parser.add_argument("--no_speech_threshold", type=optional_float, default=app_config.no_speech_threshold, \
-                        help="if the probability of the <|nospeech|> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence")
-    parser.add_argument("--word_timestamps", type=str2bool, default=app_config.word_timestamps,
-                        help="(experimental) extract word-level timestamps and refine the results based on them")
-    parser.add_argument("--prepend_punctuations", type=str, default=app_config.prepend_punctuations,
-                        help="if word_timestamps is True, merge these punctuation symbols with the next word")
-    parser.add_argument("--append_punctuations", type=str, default=app_config.append_punctuations,
-                        help="if word_timestamps is True, merge these punctuation symbols with the previous word")
-    parser.add_argument("--highlight_words", type=str2bool, default=app_config.highlight_words,
-                        help="(requires --word_timestamps True) underline each word as it is spoken in srt and vtt")
-    parser.add_argument("--threads", type=optional_int, default=0,
-                        help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
-    # Diarization
-    parser.add_argument('--auth_token', type=str, default=app_config.auth_token, help='HuggingFace API Token (optional)')
-    parser.add_argument("--diarization", type=str2bool, default=app_config.diarization, \
-                        help="whether to perform speaker diarization")
-    parser.add_argument("--diarization_num_speakers", type=int, default=app_config.diarization_speakers, help="Number of speakers")
-    parser.add_argument("--diarization_min_speakers", type=int, default=app_config.diarization_min_speakers, help="Minimum number of speakers")
-    parser.add_argument("--diarization_max_speakers", type=int, default=app_config.diarization_max_speakers, help="Maximum number of speakers")
-    args = parser.parse_args().__dict__
-    model_name: str = args.pop("model")
-    model_dir: str = args.pop("model_dir")
-    output_dir: str = args.pop("output_dir")
-    device: str = args.pop("device")
-    os.makedirs(output_dir, exist_ok=True)
-    if (threads := args.pop("threads")) > 0:
-        torch.set_num_threads(threads)
-    whisper_implementation = args.pop("whisper_implementation")
-    print(f"Using {whisper_implementation} for Whisper")
-    if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
-        warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
-        args["language"] = "en"
-    temperature = args.pop("temperature")
-    temperature_increment_on_fallback = args.pop("temperature_increment_on_fallback")
-    if temperature_increment_on_fallback is not None:
-        temperature = tuple(np.arange(temperature, 1.0 + 1e-6, temperature_increment_on_fallback))
-    else:
-        temperature = [temperature]
-    vad = args.pop("vad")
-    vad_initial_prompt_mode = args.pop("vad_initial_prompt_mode")
-    vad_merge_window = args.pop("vad_merge_window")
-    vad_max_merge_size = args.pop("vad_max_merge_size")
-    vad_padding = args.pop("vad_padding")
-    vad_prompt_window = args.pop("vad_prompt_window")
-    vad_cpu_cores = args.pop("vad_cpu_cores")
-    auto_parallel = args.pop("auto_parallel")
-    compute_type = args.pop("compute_type")
-    highlight_words = args.pop("highlight_words")
-    auth_token = args.pop("auth_token")
-    diarization = args.pop("diarization")
-    num_speakers = args.pop("diarization_num_speakers")
-    min_speakers = args.pop("diarization_min_speakers")
-    max_speakers = args.pop("diarization_max_speakers")
-    transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores, app_config=app_config)
-    transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
-    transcriber.set_auto_parallel(auto_parallel)
-    if diarization:
-        transcriber.set_diarization(auth_token=auth_token, enable_daemon_process=False, num_speakers=num_speakers, min_speakers=min_speakers, max_speakers=max_speakers)
-    model = create_whisper_container(whisper_implementation=whisper_implementation, model_name=model_name,
-                                     device=device, compute_type=compute_type, download_root=model_dir, models=app_config.models)
-    if (transcriber._has_parallel_devices()):
-        print("Using parallel devices:", transcriber.parallel_device_list)
-    for audio_path in args.pop("audio"):
-        sources = []
-        # Detect URL and download the audio
-        if (uri_validator(audio_path)):
-            # Download from YouTube/URL directly
-            for source_path in  download_url(audio_path, maxDuration=-1, destinationDirectory=output_dir, playlistItems=None):
-                source_name = os.path.basename(source_path)
-                sources.append({ "path": source_path, "name": source_name })
-        else:
-            sources.append({ "path": audio_path, "name": os.path.basename(audio_path) })
-        for source in sources:
-            source_path = source["path"]
-            source_name = source["name"]
-            vadOptions = VadOptions(vad, vad_merge_window, vad_max_merge_size, vad_padding, vad_prompt_window,
-                                    VadInitialPromptMode.from_string(vad_initial_prompt_mode))
-            result = transcriber.transcribe_file(model, source_path, temperature=temperature, vadOptions=vadOptions, **args)
-            transcriber.write_result(result, source_name, output_dir, highlight_words)
-    transcriber.close()
-def uri_validator(x):
-    try:
-        result = urlparse(x)
-        return all([result.scheme, result.netloc])
-    except:
-        return False
-if __name__ == '__main__':
-    cli()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,106 @@

+[project]
+name = "whatshutup"
+version = "0.1.0"
+description = "Add your description here"
+requires-python = ">=3.12"
+dependencies = [
+    "faster-whisper>=1.1.1",
+    "ffmpeg>=1.4",
+    "gradio>=5.14.0",
+    "huggingface-hub>=0.28.1",
+    "pydub>=0.25.1",
+    "sentencepiece>=0.2.0",
+    "torch>=2.6.0",
+    "transformers>=4.48.2",
+]
+[dependency-groups]
+app = [
+    "aiofiles==23.2.1",
+    "annotated-types==0.7.0",
+    "anyio==4.8.0",
+    "audioop-lts==0.2.1 ; python_full_version >= '3.13'",
+    "av==14.1.0",
+    "certifi==2025.1.31",
+    "charset-normalizer==3.4.1",
+    "click==8.1.8 ; sys_platform != 'emscripten'",
+    "colorama==0.4.6 ; sys_platform == 'win32'",
+    "coloredlogs==15.0.1",
+    "ctranslate2==4.5.0",
+    "fastapi==0.115.8",
+    "faster-whisper==1.1.1",
+    "ffmpeg==1.4",
+    "ffmpy==0.5.0",
+    "filelock==3.17.0",
+    "flatbuffers==25.1.24",
+    "fsspec==2024.12.0",
+    "gradio==5.14.0",
+    "gradio-client==1.7.0",
+    "h11==0.14.0",
+    "httpcore==1.0.7",
+    "httpx==0.28.1",
+    "huggingface-hub==0.28.1",
+    "humanfriendly==10.0",
+    "idna==3.10",
+    "jinja2==3.1.5",
+    "markdown-it-py==3.0.0 ; sys_platform != 'emscripten'",
+    "markupsafe==2.1.5",
+    "mdurl==0.1.2 ; sys_platform != 'emscripten'",
+    "mpmath==1.3.0",
+    "networkx==3.4.2",
+    "numpy==2.2.2",
+    "nvidia-cublas-cu12==12.4.5.8 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cuda-cupti-cu12==12.4.127 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cuda-nvrtc-cu12==12.4.127 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cuda-runtime-cu12==12.4.127 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cudnn-cu12==9.1.0.70 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cufft-cu12==11.2.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-curand-cu12==10.3.5.147 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cusolver-cu12==11.6.1.9 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cusparse-cu12==12.3.1.170 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-cusparselt-cu12==0.6.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-nccl-cu12==2.21.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-nvjitlink-cu12==12.4.127 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "nvidia-nvtx-cu12==12.4.127 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "onnxruntime==1.20.1",
+    "orjson==3.10.15",
+    "packaging==24.2",
+    "pandas==2.2.3",
+    "pillow==11.1.0",
+    "protobuf==5.29.3",
+    "pydantic==2.10.6",
+    "pydantic-core==2.27.2",
+    "pydub==0.25.1",
+    "pygments==2.19.1 ; sys_platform != 'emscripten'",
+    "pyreadline3==3.5.4 ; sys_platform == 'win32'",
+    "python-dateutil==2.9.0.post0",
+    "python-multipart==0.0.20",
+    "pytz==2025.1",
+    "pyyaml==6.0.2",
+    "regex==2024.11.6",
+    "requests==2.32.3",
+    "rich==13.9.4 ; sys_platform != 'emscripten'",
+    "ruff==0.9.4 ; sys_platform != 'emscripten'",
+    "safehttpx==0.1.6",
+    "safetensors==0.5.2",
+    "semantic-version==2.10.0",
+    "sentencepiece==0.2.0",
+    "setuptools==75.8.0",
+    "shellingham==1.5.4 ; sys_platform != 'emscripten'",
+    "six==1.17.0",
+    "sniffio==1.3.1",
+    "starlette==0.45.3",
+    "sympy==1.13.1",
+    "tokenizers==0.21.0",
+    "tomlkit==0.13.2",
+    "torch==2.6.0",
+    "tqdm==4.67.1",
+    "transformers==4.48.2",
+    "triton==3.2.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'",
+    "typer==0.15.1 ; sys_platform != 'emscripten'",
+    "typing-extensions==4.12.2",
+    "tzdata==2025.1",
+    "urllib3==2.3.0",
+    "uvicorn==0.34.0 ; sys_platform != 'emscripten'",
+    "websockets==14.2",
+]

requirements-fasterWhisper.txt DELETED Viewed

@@ -1,16 +0,0 @@
-ctranslate2
-faster-whisper
-ffmpeg-python==0.2.0
-gradio==3.38.0
-gradio-client==0.8.1
-yt-dlp
-json5
-torch
-torchaudio
-more_itertools
-# Needed by diarization
-intervaltree
-srt
-torch
-https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip

requirements-whisper.txt DELETED Viewed

@@ -1,16 +0,0 @@
-git+https://github.com/huggingface/transformers
-git+https://github.com/openai/whisper.git
-transformers
-ffmpeg-python==0.2.0
-gradio==3.38.0
-gradio-client==0.8.1
-yt-dlp
-torchaudio
-altair
-json5
-# Needed by diarization
-intervaltree
-srt
-torch
-https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export
 aiofiles==23.2.1 \
     --hash=sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107 \
     --hash=sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a

 # This file was autogenerated by uv via the following command:
+#    uv export --output-file .\requirements.txt
 aiofiles==23.2.1 \
     --hash=sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107 \
     --hash=sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff