Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ try:
|
|
25 |
if not CSM_1B_HF_WATERMARK:
|
26 |
raise ValueError("WATERMARK_KEY not found or invalid in environment variables.")
|
27 |
|
28 |
-
gpu_timeout = int(os.getenv("GPU_TIMEOUT",
|
29 |
except (ValueError, TypeError) as e:
|
30 |
logging.error(f"Configuration error: {e}")
|
31 |
raise
|
@@ -42,7 +42,7 @@ This demo allows you to have a conversation with Sesame CSM 1B, leveraging Whisp
|
|
42 |
|
43 |
# --- Constants --- (Constants can stay outside)
|
44 |
SPEAKER_ID = 0
|
45 |
-
MAX_CONTEXT_SEGMENTS =
|
46 |
MAX_GEMMA_LENGTH = 150
|
47 |
|
48 |
# --- Global Conversation History ---
|
@@ -62,7 +62,7 @@ def transcribe_audio(audio_path: str, whisper_model) -> str: # Pass whisper_mod
|
|
62 |
|
63 |
def generate_response(text: str, model_gemma, tokenizer_gemma, device) -> str: # Pass model and tokenizer
|
64 |
try:
|
65 |
-
input_text = "Reapond to the
|
66 |
input = tokenizer_gemma(input_text, return_tensors="pt").to(device)
|
67 |
generated_output = model_gemma.generate(**input, max_length=MAX_GEMMA_LENGTH, early_stopping=True)
|
68 |
return tokenizer_gemma.decode(generated_output[0], skip_special_tokens=True)
|
@@ -136,7 +136,7 @@ def _infer(user_audio, generator, whisper_model, tokenizer_gemma, model_gemma, d
|
|
136 |
text=ai_text,
|
137 |
speaker=SPEAKER_ID,
|
138 |
context=conversation_history,
|
139 |
-
max_audio_length_ms=
|
140 |
)
|
141 |
logging.info("Audio generated successfully.")
|
142 |
except Exception as e:
|
|
|
25 |
if not CSM_1B_HF_WATERMARK:
|
26 |
raise ValueError("WATERMARK_KEY not found or invalid in environment variables.")
|
27 |
|
28 |
+
gpu_timeout = int(os.getenv("GPU_TIMEOUT", 120))
|
29 |
except (ValueError, TypeError) as e:
|
30 |
logging.error(f"Configuration error: {e}")
|
31 |
raise
|
|
|
42 |
|
43 |
# --- Constants --- (Constants can stay outside)
|
44 |
SPEAKER_ID = 0
|
45 |
+
MAX_CONTEXT_SEGMENTS = 1
|
46 |
MAX_GEMMA_LENGTH = 150
|
47 |
|
48 |
# --- Global Conversation History ---
|
|
|
62 |
|
63 |
def generate_response(text: str, model_gemma, tokenizer_gemma, device) -> str: # Pass model and tokenizer
|
64 |
try:
|
65 |
+
input_text = "Reapond to the users prompt: " + text
|
66 |
input = tokenizer_gemma(input_text, return_tensors="pt").to(device)
|
67 |
generated_output = model_gemma.generate(**input, max_length=MAX_GEMMA_LENGTH, early_stopping=True)
|
68 |
return tokenizer_gemma.decode(generated_output[0], skip_special_tokens=True)
|
|
|
136 |
text=ai_text,
|
137 |
speaker=SPEAKER_ID,
|
138 |
context=conversation_history,
|
139 |
+
max_audio_length_ms=10_000,
|
140 |
)
|
141 |
logging.info("Audio generated successfully.")
|
142 |
except Exception as e:
|