ALLARD Marc-Antoine commited on
Commit
965877f
Β·
1 Parent(s): 06e4dbb

handle large file for HF

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +43 -22
src/streamlit_app.py CHANGED
@@ -383,35 +383,56 @@ def show_home_page():
383
  )
384
  st.session_state.annotation_type = annotation_type
385
 
386
- # File upload
387
  st.subheader("2. Upload Audio File")
 
 
 
 
388
  uploaded_file = st.file_uploader(
389
  "Upload an audio file",
390
  type=['wav', 'mp3', 'flac', 'm4a'],
391
- help="Supported formats: WAV, MP3, FLAC, M4A"
392
  )
393
 
394
  if uploaded_file is not None:
395
- st.session_state.audio_file = uploaded_file.read()
396
-
397
- # Save temporary file to get duration
398
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
399
- tmp_file.write(st.session_state.audio_file)
400
- st.session_state.audio_duration = get_audio_duration(tmp_file.name)
401
- os.unlink(tmp_file.name)
402
-
403
- st.success(f"βœ… Audio file uploaded successfully!")
404
- st.info(f"Duration: {format_time(st.session_state.audio_duration)}")
405
-
406
- # Show audio player
407
- st.subheader("Audio Preview")
408
- audio_html = create_audio_player_html(st.session_state.audio_file)
409
- st.components.v1.html(audio_html, height=120)
410
-
411
- # Continue button
412
- if st.button("Continue to Transcription β†’", type="primary"):
413
- st.session_state.current_page = "transcription"
414
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
  def show_transcription_page():
417
  """Transcription page - text annotation"""
 
383
  )
384
  st.session_state.annotation_type = annotation_type
385
 
386
+ # File upload with better error handling
387
  st.subheader("2. Upload Audio File")
388
+
389
+ # Add file size warning for Hugging Face Spaces
390
+ st.info("πŸ’‘ **Tip for Hugging Face Spaces:** Large files (>10MB) may fail to upload. Try smaller audio files or compress your audio if you encounter issues.")
391
+
392
  uploaded_file = st.file_uploader(
393
  "Upload an audio file",
394
  type=['wav', 'mp3', 'flac', 'm4a'],
395
+ help="Supported formats: WAV, MP3, FLAC, M4A. Keep files under 10MB for best compatibility on Hugging Face Spaces."
396
  )
397
 
398
  if uploaded_file is not None:
399
+ try:
400
+ # Read file data
401
+ audio_data = uploaded_file.read()
402
+
403
+ # Check file size (warn if >10MB for HF Spaces)
404
+ file_size_mb = len(audio_data) / (1024 * 1024)
405
+ if file_size_mb > 10:
406
+ st.warning(f"⚠️ File size: {file_size_mb:.1f}MB. Large files may cause issues on Hugging Face Spaces.")
407
+
408
+ # Store in session state
409
+ st.session_state.audio_file = audio_data
410
+
411
+ # Get duration
412
+ st.session_state.audio_duration = get_audio_duration(audio_data)
413
+
414
+ st.success(f"βœ… Audio file uploaded successfully! ({file_size_mb:.1f}MB)")
415
+
416
+ if st.session_state.audio_duration > 0:
417
+ st.info(f"Duration: {format_time(st.session_state.audio_duration)}")
418
+
419
+ # Show audio player
420
+ st.subheader("Audio Preview")
421
+ audio_html = create_audio_player_html(st.session_state.audio_file)
422
+ st.components.v1.html(audio_html, height=120)
423
+
424
+ # Continue button
425
+ if st.button("Continue to Transcription β†’", type="primary"):
426
+ st.session_state.current_page = "transcription"
427
+ st.rerun()
428
+
429
+ except Exception as e:
430
+ st.error(f"❌ Error processing audio file: {str(e)}")
431
+ st.error("This might be due to:")
432
+ st.error("- File format not supported")
433
+ st.error("- File too large for Hugging Face Spaces")
434
+ st.error("- Corrupted audio file")
435
+ st.info("Try converting your audio to WAV format and reducing the file size.")
436
 
437
  def show_transcription_page():
438
  """Transcription page - text annotation"""