Spaces:
Bradarr
/
Running on Zero

Bradarr commited on
Commit
73cb637
·
verified ·
1 Parent(s): a7d7d4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -25,7 +25,7 @@ try:
25
  if not CSM_1B_HF_WATERMARK:
26
  raise ValueError("WATERMARK_KEY not found or invalid in environment variables.")
27
 
28
- gpu_timeout = int(os.getenv("GPU_TIMEOUT", 180))
29
  except (ValueError, TypeError) as e:
30
  logging.error(f"Configuration error: {e}")
31
  raise
@@ -42,7 +42,7 @@ This demo allows you to have a conversation with Sesame CSM 1B, leveraging Whisp
42
 
43
  # --- Constants --- (Constants can stay outside)
44
  SPEAKER_ID = 0
45
- MAX_CONTEXT_SEGMENTS = 5
46
  MAX_GEMMA_LENGTH = 150
47
 
48
  # --- Global Conversation History ---
@@ -62,7 +62,7 @@ def transcribe_audio(audio_path: str, whisper_model) -> str: # Pass whisper_mod
62
 
63
  def generate_response(text: str, model_gemma, tokenizer_gemma, device) -> str: # Pass model and tokenizer
64
  try:
65
- input_text = "Reapond to the user: " + text
66
  input = tokenizer_gemma(input_text, return_tensors="pt").to(device)
67
  generated_output = model_gemma.generate(**input, max_length=MAX_GEMMA_LENGTH, early_stopping=True)
68
  return tokenizer_gemma.decode(generated_output[0], skip_special_tokens=True)
@@ -136,7 +136,7 @@ def _infer(user_audio, generator, whisper_model, tokenizer_gemma, model_gemma, d
136
  text=ai_text,
137
  speaker=SPEAKER_ID,
138
  context=conversation_history,
139
- max_audio_length_ms=30_000,
140
  )
141
  logging.info("Audio generated successfully.")
142
  except Exception as e:
 
25
  if not CSM_1B_HF_WATERMARK:
26
  raise ValueError("WATERMARK_KEY not found or invalid in environment variables.")
27
 
28
+ gpu_timeout = int(os.getenv("GPU_TIMEOUT", 120))
29
  except (ValueError, TypeError) as e:
30
  logging.error(f"Configuration error: {e}")
31
  raise
 
42
 
43
  # --- Constants --- (Constants can stay outside)
44
  SPEAKER_ID = 0
45
+ MAX_CONTEXT_SEGMENTS = 1
46
  MAX_GEMMA_LENGTH = 150
47
 
48
  # --- Global Conversation History ---
 
62
 
63
  def generate_response(text: str, model_gemma, tokenizer_gemma, device) -> str: # Pass model and tokenizer
64
  try:
65
+ input_text = "Reapond to the users prompt: " + text
66
  input = tokenizer_gemma(input_text, return_tensors="pt").to(device)
67
  generated_output = model_gemma.generate(**input, max_length=MAX_GEMMA_LENGTH, early_stopping=True)
68
  return tokenizer_gemma.decode(generated_output[0], skip_special_tokens=True)
 
136
  text=ai_text,
137
  speaker=SPEAKER_ID,
138
  context=conversation_history,
139
+ max_audio_length_ms=10_000,
140
  )
141
  logging.info("Audio generated successfully.")
142
  except Exception as e: