ALLARD Marc-Antoine commited on
Commit
8cade8e
Β·
1 Parent(s): c30da2c

refactor and text simplification

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +51 -87
src/streamlit_app.py CHANGED
@@ -5,7 +5,7 @@ import wave
5
  import numpy as np
6
  from datetime import timedelta
7
  import base64
8
- from io import BytesIO
9
  import tempfile
10
 
11
  # Page configuration
@@ -29,8 +29,6 @@ if 'current_page' not in st.session_state:
29
  st.session_state.current_page = "home"
30
  if 'audio_duration' not in st.session_state:
31
  st.session_state.audio_duration = 0
32
- if 'save_path' not in st.session_state:
33
- st.session_state.save_path = ""
34
 
35
  def get_audio_duration(audio_file):
36
  """Get audio duration in seconds"""
@@ -334,44 +332,19 @@ def format_srt_time(seconds):
334
  millisecs = int((seconds % 1) * 1000)
335
  return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
336
 
337
- def save_files(transcript, segments=None, save_path=""):
338
- """Save transcript and SRT files"""
339
- if not save_path:
340
- save_path = "."
341
-
342
- # Save transcript
343
- transcript_path = os.path.join(save_path, "transcript.txt")
344
- with open(transcript_path, "w", encoding="utf-8") as f:
345
- f.write(transcript)
346
-
347
- if segments:
348
- # Save SRT
349
- srt_content = generate_srt(segments, transcript)
350
- srt_path = os.path.join(save_path, "transcript.srt")
351
- with open(srt_path, "w", encoding="utf-8") as f:
352
- f.write(srt_content)
353
-
354
- return transcript_path, srt_path
355
-
356
- return transcript_path, None
357
 
358
  # Main App Layout
359
  def main():
360
  st.title("🎀 ASR Annotation Tool")
361
- st.markdown("Professional tool for creating ASR evaluation datasets")
362
 
363
  # Sidebar for navigation and settings
364
  with st.sidebar:
365
- st.header("Settings")
366
-
367
- # Save path configuration
368
- st.session_state.save_path = st.text_input(
369
- "Save Path",
370
- value=st.session_state.save_path,
371
- help="Directory where files will be saved"
372
- )
373
-
374
- # Navigation
375
  st.header("Navigation")
376
  if st.button("🏠 Home", use_container_width=True):
377
  st.session_state.current_page = "home"
@@ -388,7 +361,6 @@ def main():
388
  if st.button("πŸ“Š Assignment", use_container_width=True):
389
  st.session_state.current_page = "assignment"
390
 
391
- # Main content area
392
  if st.session_state.current_page == "home":
393
  show_home_page()
394
  elif st.session_state.current_page == "transcription":
@@ -400,12 +372,11 @@ def main():
400
 
401
  def show_home_page():
402
  """Home page - annotation type selection and file upload"""
403
- st.header("Welcome to ASR Annotation Tool")
404
 
405
  # Annotation type selection
406
  st.subheader("1. Select Annotation Type")
407
  annotation_type = st.radio(
408
- "Choose the type of annotation:",
409
  ["single_speaker", "multi_speaker"],
410
  format_func=lambda x: "Single Speaker (Simple ASR)" if x == "single_speaker" else "Multi Speaker (Diarization)",
411
  key="annotation_type_radio"
@@ -415,7 +386,7 @@ def show_home_page():
415
  # File upload
416
  st.subheader("2. Upload Audio File")
417
  uploaded_file = st.file_uploader(
418
- "Choose an audio file",
419
  type=['wav', 'mp3', 'flac', 'm4a'],
420
  help="Supported formats: WAV, MP3, FLAC, M4A"
421
  )
@@ -461,7 +432,7 @@ def show_transcription_page():
461
  "Write your transcription here:",
462
  value=st.session_state.transcript,
463
  height=300,
464
- help="Follow the annotation guidelines for accurate transcription"
465
  )
466
  st.session_state.transcript = transcript
467
 
@@ -469,27 +440,22 @@ def show_transcription_page():
469
  with st.expander("πŸ“‹ Transcription Guidelines"):
470
  st.markdown("""
471
  **Key Guidelines:**
472
- - Transcribe exactly what is said (verbatim)
473
- - Include false starts, filled pauses (um, uh)
474
- - Use standard punctuation
475
  - Write numbers 1-10 as words, 11+ as digits
476
- - Mark unclear speech as [unclear] or [inaudible]
477
- - For multi-speaker: transcribe all audible speech
478
  """)
479
 
480
  # Action buttons
481
  col1, col2, col3 = st.columns(3)
482
 
483
  with col1:
484
- if st.button("πŸ’Ύ Save Transcript", type="primary"):
485
- if transcript.strip():
486
- try:
487
- transcript_path, _ = save_files(transcript, save_path=st.session_state.save_path)
488
- st.success(f"βœ… Transcript saved to: {transcript_path}")
489
- except Exception as e:
490
- st.error(f"Error saving file: {e}")
491
- else:
492
- st.warning("Please write a transcript first!")
493
 
494
  with col2:
495
  if st.session_state.annotation_type == "multi_speaker" and transcript.strip():
@@ -500,12 +466,10 @@ def show_transcription_page():
500
  with col3:
501
  if st.session_state.annotation_type == "single_speaker" and transcript.strip():
502
  if st.button("βœ… Finish Annotation"):
503
- try:
504
- transcript_path, _ = save_files(transcript, save_path=st.session_state.save_path)
505
- st.balloons()
506
- st.success(f"πŸŽ‰ Single speaker annotation completed!\nSaved to: {transcript_path}")
507
- except Exception as e:
508
- st.error(f"Error saving file: {e}")
509
 
510
  def show_segmentation_page():
511
  """Segmentation page - audio region selection"""
@@ -523,6 +487,7 @@ def show_segmentation_page():
523
 
524
  # Manual segment addition
525
  st.subheader("Manual Segment Addition")
 
526
  col1, col2, col3, col4 = st.columns(4)
527
 
528
  with col1:
@@ -571,7 +536,7 @@ def show_assignment_page():
571
  st.error("Please create segments first!")
572
  return
573
 
574
- st.info("Assign portions of your transcript to each audio segment to create the final annotation.")
575
 
576
  # Display transcript
577
  st.subheader("Original Transcript")
@@ -588,7 +553,7 @@ def show_assignment_page():
588
  f"Text for segment {i+1}:",
589
  key=f"segment_text_{i}",
590
  height=100,
591
- help="Copy and paste the relevant portion of the transcript for this segment"
592
  )
593
 
594
  assigned_segments.append({
@@ -605,36 +570,35 @@ def show_assignment_page():
605
  st.code(srt_preview, language="text")
606
 
607
  # Final save
608
- st.subheader("Save Final Annotation")
609
  col1, col2 = st.columns(2)
610
 
611
  with col1:
612
- if st.button("πŸ’Ύ Save Transcript + SRT", type="primary"):
613
- try:
614
- # Create enhanced transcript with speaker labels
615
- enhanced_transcript = create_speaker_transcript(assigned_segments)
616
-
617
- # Save files
618
- transcript_path = os.path.join(st.session_state.save_path or ".", "final_transcript.txt")
619
- srt_path = os.path.join(st.session_state.save_path or ".", "final_transcript.srt")
620
-
621
- with open(transcript_path, "w", encoding="utf-8") as f:
622
- f.write(enhanced_transcript)
623
-
624
- srt_content = generate_srt_with_text(assigned_segments)
625
- with open(srt_path, "w", encoding="utf-8") as f:
626
- f.write(srt_content)
627
-
628
- st.balloons()
629
- st.success(f"πŸŽ‰ Multi-speaker annotation completed!\n\nFiles saved:\n- {transcript_path}\n- {srt_path}")
630
-
631
- except Exception as e:
632
- st.error(f"Error saving files: {e}")
633
 
634
  with col2:
635
- if st.button("πŸ”„ Back to Segmentation"):
636
- st.session_state.current_page = "segmentation"
637
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638
 
639
  def generate_srt_with_text(segments):
640
  """Generate SRT with actual text content"""
@@ -663,4 +627,4 @@ def create_speaker_transcript(segments):
663
  return "\n\n".join(transcript_lines)
664
 
665
  if __name__ == "__main__":
666
- main()
 
5
  import numpy as np
6
  from datetime import timedelta
7
  import base64
8
+ from io import BytesIO, StringIO
9
  import tempfile
10
 
11
  # Page configuration
 
29
  st.session_state.current_page = "home"
30
  if 'audio_duration' not in st.session_state:
31
  st.session_state.audio_duration = 0
 
 
32
 
33
  def get_audio_duration(audio_file):
34
  """Get audio duration in seconds"""
 
332
  millisecs = int((seconds % 1) * 1000)
333
  return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
334
 
335
+ def get_download_link(content, filename, label="Download file"):
336
+ """Generate download link for text content"""
337
+ b64 = base64.b64encode(content.encode()).decode()
338
+ href = f'<a href="data:file/txt;base64,{b64}" download="{filename}">{label}</a>'
339
+ return href
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
  # Main App Layout
342
  def main():
343
  st.title("🎀 ASR Annotation Tool")
344
+ st.markdown("Simple tool for transcribing, segmenting, and annotating audio for ASR dataset creation.")
345
 
346
  # Sidebar for navigation and settings
347
  with st.sidebar:
 
 
 
 
 
 
 
 
 
 
348
  st.header("Navigation")
349
  if st.button("🏠 Home", use_container_width=True):
350
  st.session_state.current_page = "home"
 
361
  if st.button("πŸ“Š Assignment", use_container_width=True):
362
  st.session_state.current_page = "assignment"
363
 
 
364
  if st.session_state.current_page == "home":
365
  show_home_page()
366
  elif st.session_state.current_page == "transcription":
 
372
 
373
  def show_home_page():
374
  """Home page - annotation type selection and file upload"""
 
375
 
376
  # Annotation type selection
377
  st.subheader("1. Select Annotation Type")
378
  annotation_type = st.radio(
379
+ "How many speakers are in your audio?",
380
  ["single_speaker", "multi_speaker"],
381
  format_func=lambda x: "Single Speaker (Simple ASR)" if x == "single_speaker" else "Multi Speaker (Diarization)",
382
  key="annotation_type_radio"
 
386
  # File upload
387
  st.subheader("2. Upload Audio File")
388
  uploaded_file = st.file_uploader(
389
+ "Upload an audio file",
390
  type=['wav', 'mp3', 'flac', 'm4a'],
391
  help="Supported formats: WAV, MP3, FLAC, M4A"
392
  )
 
432
  "Write your transcription here:",
433
  value=st.session_state.transcript,
434
  height=300,
435
+ help="Check the guidelines below to help you transcribe accurately."
436
  )
437
  st.session_state.transcript = transcript
438
 
 
440
  with st.expander("πŸ“‹ Transcription Guidelines"):
441
  st.markdown("""
442
  **Key Guidelines:**
443
+ - Transcribe exactly what is said
444
+ - Use standard punctuation and capitalization (tip: Get punctuation from natural pauses in dialogue)
 
445
  - Write numbers 1-10 as words, 11+ as digits
446
+ - Ignore unclear speech or marked as [unclear] or [inaudible]
447
+ - For multi-speaker: transcribe all audible speech without identifying speakers
448
  """)
449
 
450
  # Action buttons
451
  col1, col2, col3 = st.columns(3)
452
 
453
  with col1:
454
+ if transcript.strip():
455
+ download_link = get_download_link(transcript, "transcript.txt", "πŸ’Ύ Download Transcript")
456
+ st.markdown(download_link, unsafe_allow_html=True)
457
+ else:
458
+ st.button("πŸ’Ύ Download Transcript", disabled=True)
 
 
 
 
459
 
460
  with col2:
461
  if st.session_state.annotation_type == "multi_speaker" and transcript.strip():
 
466
  with col3:
467
  if st.session_state.annotation_type == "single_speaker" and transcript.strip():
468
  if st.button("βœ… Finish Annotation"):
469
+ st.balloons()
470
+ st.success("πŸŽ‰ Single speaker annotation completed!")
471
+ download_link = get_download_link(transcript, "transcript.txt", "πŸ“₯ Download Final Transcript")
472
+ st.markdown(download_link, unsafe_allow_html=True)
 
 
473
 
474
  def show_segmentation_page():
475
  """Segmentation page - audio region selection"""
 
487
 
488
  # Manual segment addition
489
  st.subheader("Manual Segment Addition")
490
+ st.info("After having segmented the wav using our wav surfer, you can manually add segments here. Don't hesitate to replay and pause for the best results.")
491
  col1, col2, col3, col4 = st.columns(4)
492
 
493
  with col1:
 
536
  st.error("Please create segments first!")
537
  return
538
 
539
+ st.info("Assign portions of your text transcript to each audio segment to create the final annotation.")
540
 
541
  # Display transcript
542
  st.subheader("Original Transcript")
 
553
  f"Text for segment {i+1}:",
554
  key=f"segment_text_{i}",
555
  height=100,
556
+ help="Copy and paste the relevant portion of the text transcript for this segment"
557
  )
558
 
559
  assigned_segments.append({
 
570
  st.code(srt_preview, language="text")
571
 
572
  # Final save
573
+ st.subheader("Download Final Annotation")
574
  col1, col2 = st.columns(2)
575
 
576
  with col1:
577
+ # Create enhanced transcript with speaker labels
578
+ enhanced_transcript = create_speaker_transcript(assigned_segments)
579
+ download_transcript = get_download_link(enhanced_transcript, "final_transcript.txt", "πŸ’Ύ Download Transcript")
580
+ st.markdown(download_transcript, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
 
582
  with col2:
583
+ srt_content = generate_srt_with_text(assigned_segments)
584
+ download_srt = get_download_link(srt_content, "final_transcript.srt", "πŸ’Ύ Download SRT")
585
+ st.markdown(download_srt, unsafe_allow_html=True)
586
+
587
+ if st.button("πŸŽ‰ Finish Annotation", type="primary"):
588
+ st.balloons()
589
+ st.success("πŸŽ‰ Yihawww or Youhouuuu Multi-speaker annotation completed!")
590
+
591
+ # Final downloads
592
+ st.subheader("Download your files:")
593
+ download_transcript = get_download_link(enhanced_transcript, "final_transcript.txt", "πŸ“₯ Download Transcript")
594
+ download_srt = get_download_link(srt_content, "final_transcript.srt", "πŸ“₯ Download SRT")
595
+
596
+ st.markdown(download_transcript, unsafe_allow_html=True)
597
+ st.markdown(download_srt, unsafe_allow_html=True)
598
+
599
+ if st.button("πŸ”„ Back to Segmentation"):
600
+ st.session_state.current_page = "segmentation"
601
+ st.rerun()
602
 
603
  def generate_srt_with_text(segments):
604
  """Generate SRT with actual text content"""
 
627
  return "\n\n".join(transcript_lines)
628
 
629
  if __name__ == "__main__":
630
+ main()