File size: 4,579 Bytes
78cb487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from pathlib import Path
import os
from dotenv import load_dotenv

load_dotenv()


def init_espeak():
    """Initialize eSpeak environment variables. Must be called before any other imports."""
    os.environ["PHONEMIZER_ESPEAK_LIBRARY"] = (
        r"C:\Program Files\eSpeak NG\libespeak-ng.dll"
    )
    os.environ["PHONEMIZER_ESPEAK_PATH"] = r"C:\Program Files\eSpeak NG\espeak-ng.exe"


init_espeak()

from pydantic_settings import BaseSettings
from pydantic import Field
from typing import Optional


class Settings(BaseSettings):
    """Settings class to manage application configurations."""

    BASE_DIR: Path = Path(__file__).parent.parent.parent
    MODELS_DIR: Path = BASE_DIR / "data" / "models"
    VOICES_DIR: Path = BASE_DIR / "data" / "voices"
    OUTPUT_DIR: Path = BASE_DIR / "output"
    RECORDINGS_DIR: Path = BASE_DIR / "recordings"

    ESPEAK_LIBRARY_PATH: str = r"C:\Program Files\eSpeak NG\libespeak-ng.dll"
    ESPEAK_PATH: str = r"C:\Program Files\eSpeak NG\espeak-ng.exe"

    TTS_MODEL: str = Field(..., env="TTS_MODEL")
    VOICE_NAME: str = Field(..., env="VOICE_NAME")
    SPEED: float = Field(default=1.0, env="SPEED")
    HUGGINGFACE_TOKEN: str = Field(..., env="HUGGINGFACE_TOKEN")

    LM_STUDIO_URL: str = Field(..., env="LM_STUDIO_URL")
    OLLAMA_URL: str = Field(..., env="OLLAMA_URL")
    DEFAULT_SYSTEM_PROMPT: str = Field(..., env="DEFAULT_SYSTEM_PROMPT")
    LLM_MODEL: str = Field(..., env="LLM_MODEL")
    NUM_THREADS: int = Field(default=2, env="NUM_THREADS")
    MAX_TOKENS: int = Field(default=512, env="MAX_TOKENS")
    LLM_TEMPERATURE: float = Field(default=0.7, env="LMM_TEMPERATURE")
    LLM_STREAM: bool = Field(default=False, env="LLM_STREAM")
    LLM_RETRY_DELAY: float = Field(default=0.5, env="LLM_RETRY_DELAY")
    MAX_RETRIES: int = Field(default=3, env="MAX_RETRIES")

    WHISPER_MODEL: str = Field(default="openai/whisper-tiny.en", env="WHISPER_MODEL")

    VAD_MODEL: str = Field(default="pyannote/segmentation-3.0", env="VAD_MODEL")
    VAD_MIN_DURATION_ON: float = Field(default=0.1, env="VAD_MIN_DURATION_ON")
    VAD_MIN_DURATION_OFF: float = Field(default=0.1, env="VAD_MIN_DURATION_OFF")

    CHUNK: int = Field(default=1024, env="CHUNK")
    FORMAT: str = Field(default="pyaudio.paFloat32", env="FORMAT")
    CHANNELS: int = Field(default=1, env="CHANNELS")
    RATE: int = Field(default=16000, env="RATE")
    OUTPUT_SAMPLE_RATE: int = Field(default=24000, env="OUTPUT_SAMPLE_RATE")
    RECORD_DURATION: int = Field(default=5, env="RECORD_DURATION")
    SILENCE_THRESHOLD: float = Field(default=0.01, env="SILENCE_THRESHOLD")
    INTERRUPTION_THRESHOLD: float = Field(default=0.02, env="INTERRUPTION_THRESHOLD")
    MAX_SILENCE_DURATION: int = Field(default=1, env="MAX_SILENCE_DURATION")
    SPEECH_CHECK_TIMEOUT: float = Field(default=0.1, env="SPEECH_CHECK_TIMEOUT")
    SPEECH_CHECK_THRESHOLD: float = Field(default=0.02, env="SPEECH_CHECK_THRESHOLD")
    ROLLING_BUFFER_TIME: float = Field(default=0.5, env="ROLLING_BUFFER_TIME")
    TARGET_SIZE: int = Field(default=15, env="TARGET_SIZE")
    FIRST_SENTENCE_SIZE: int = Field(default=3, env="FIRST_SENTENCE_SIZE")
    PLAYBACK_DELAY: float = Field(default=0.005, env="PLAYBACK_DELAY")

    def setup_directories(self):
        """Create necessary directories if they don't exist"""
        self.MODELS_DIR.mkdir(parents=True, exist_ok=True)
        self.VOICES_DIR.mkdir(parents=True, exist_ok=True)
        self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        self.RECORDINGS_DIR.mkdir(parents=True, exist_ok=True)

    class Config:
        env_file = ".env"
        env_file_encoding = "utf-8"


settings = Settings()


def configure_logging():
    """Configure logging to suppress all logs"""
    import logging
    import warnings

    warnings.filterwarnings("ignore")

    logging.getLogger().setLevel(logging.ERROR)

    logging.getLogger("urllib3").setLevel(logging.ERROR)
    logging.getLogger("PIL").setLevel(logging.ERROR)
    logging.getLogger("matplotlib").setLevel(logging.ERROR)
    logging.getLogger("torch").setLevel(logging.ERROR)
    logging.getLogger("tensorflow").setLevel(logging.ERROR)
    logging.getLogger("whisper").setLevel(logging.ERROR)
    logging.getLogger("transformers").setLevel(logging.ERROR)
    logging.getLogger("pyannote").setLevel(logging.ERROR)
    logging.getLogger("sounddevice").setLevel(logging.ERROR)
    logging.getLogger("soundfile").setLevel(logging.ERROR)
    logging.getLogger("uvicorn").setLevel(logging.ERROR)
    logging.getLogger("fastapi").setLevel(logging.ERROR)


configure_logging()