# Configuration settings for the Whisper Transcription Space # Model configurations WHISPER_MODEL = "distil-whisper/distil-large-v3" DIARIZATION_MODEL = "pyannote/speaker-diarization-3.1" # Audio processing settings AUDIO_SAMPLE_RATE = 16000 AUDIO_CHANNELS = 1 MAX_AUDIO_DURATION = 600 # 10 minutes in seconds # Transcription settings DEFAULT_BEAM_SIZE = 5 DEFAULT_LANGUAGE = None # Auto-detect DEFAULT_TRANSLATE = False # Diarization settings MAX_SPEAKERS = 20 DEFAULT_NUM_SPEAKERS = None # Auto-detect # Segment grouping settings MAX_SEGMENT_GAP = 1.0 # seconds MAX_SEGMENT_DURATION = 30.0 # seconds # Flash attention settings FLASH_ATTENTION_ENABLED = True TORCH_DTYPE = "float16" # ZeroGPU settings GPU_MEMORY_FRACTION = 0.8 CUDA_DEVICE = "cuda:0" # Gradio interface settings GRADIO_THEME = "soft" GRADIO_DEBUG = False GRADIO_SHARE = False # Environment variables HF_TOKEN_ENV_VAR = "HF_TOKEN" # Supported audio formats SUPPORTED_AUDIO_FORMATS = [ ".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac", ".wma", ".opus", ".webm" ] # Language codes SUPPORTED_LANGUAGES = { "auto": "Auto-detect", "en": "English", "es": "Spanish", "fr": "French", "de": "German", "it": "Italian", "pt": "Portuguese", "ru": "Russian", "ja": "Japanese", "ko": "Korean", "zh": "Chinese", "ar": "Arabic", "hi": "Hindi", "tr": "Turkish", "pl": "Polish", "nl": "Dutch", "sv": "Swedish", "da": "Danish", "no": "Norwegian", "fi": "Finnish" }