Spaces:
Running
Running
ALLARD Marc-Antoine
commited on
Commit
Β·
8cade8e
1
Parent(s):
c30da2c
refactor and text simplification
Browse files- src/streamlit_app.py +51 -87
src/streamlit_app.py
CHANGED
@@ -5,7 +5,7 @@ import wave
|
|
5 |
import numpy as np
|
6 |
from datetime import timedelta
|
7 |
import base64
|
8 |
-
from io import BytesIO
|
9 |
import tempfile
|
10 |
|
11 |
# Page configuration
|
@@ -29,8 +29,6 @@ if 'current_page' not in st.session_state:
|
|
29 |
st.session_state.current_page = "home"
|
30 |
if 'audio_duration' not in st.session_state:
|
31 |
st.session_state.audio_duration = 0
|
32 |
-
if 'save_path' not in st.session_state:
|
33 |
-
st.session_state.save_path = ""
|
34 |
|
35 |
def get_audio_duration(audio_file):
|
36 |
"""Get audio duration in seconds"""
|
@@ -334,44 +332,19 @@ def format_srt_time(seconds):
|
|
334 |
millisecs = int((seconds % 1) * 1000)
|
335 |
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
|
336 |
|
337 |
-
def
|
338 |
-
"""
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
# Save transcript
|
343 |
-
transcript_path = os.path.join(save_path, "transcript.txt")
|
344 |
-
with open(transcript_path, "w", encoding="utf-8") as f:
|
345 |
-
f.write(transcript)
|
346 |
-
|
347 |
-
if segments:
|
348 |
-
# Save SRT
|
349 |
-
srt_content = generate_srt(segments, transcript)
|
350 |
-
srt_path = os.path.join(save_path, "transcript.srt")
|
351 |
-
with open(srt_path, "w", encoding="utf-8") as f:
|
352 |
-
f.write(srt_content)
|
353 |
-
|
354 |
-
return transcript_path, srt_path
|
355 |
-
|
356 |
-
return transcript_path, None
|
357 |
|
358 |
# Main App Layout
|
359 |
def main():
|
360 |
st.title("π€ ASR Annotation Tool")
|
361 |
-
st.markdown("
|
362 |
|
363 |
# Sidebar for navigation and settings
|
364 |
with st.sidebar:
|
365 |
-
st.header("Settings")
|
366 |
-
|
367 |
-
# Save path configuration
|
368 |
-
st.session_state.save_path = st.text_input(
|
369 |
-
"Save Path",
|
370 |
-
value=st.session_state.save_path,
|
371 |
-
help="Directory where files will be saved"
|
372 |
-
)
|
373 |
-
|
374 |
-
# Navigation
|
375 |
st.header("Navigation")
|
376 |
if st.button("π Home", use_container_width=True):
|
377 |
st.session_state.current_page = "home"
|
@@ -388,7 +361,6 @@ def main():
|
|
388 |
if st.button("π Assignment", use_container_width=True):
|
389 |
st.session_state.current_page = "assignment"
|
390 |
|
391 |
-
# Main content area
|
392 |
if st.session_state.current_page == "home":
|
393 |
show_home_page()
|
394 |
elif st.session_state.current_page == "transcription":
|
@@ -400,12 +372,11 @@ def main():
|
|
400 |
|
401 |
def show_home_page():
|
402 |
"""Home page - annotation type selection and file upload"""
|
403 |
-
st.header("Welcome to ASR Annotation Tool")
|
404 |
|
405 |
# Annotation type selection
|
406 |
st.subheader("1. Select Annotation Type")
|
407 |
annotation_type = st.radio(
|
408 |
-
"
|
409 |
["single_speaker", "multi_speaker"],
|
410 |
format_func=lambda x: "Single Speaker (Simple ASR)" if x == "single_speaker" else "Multi Speaker (Diarization)",
|
411 |
key="annotation_type_radio"
|
@@ -415,7 +386,7 @@ def show_home_page():
|
|
415 |
# File upload
|
416 |
st.subheader("2. Upload Audio File")
|
417 |
uploaded_file = st.file_uploader(
|
418 |
-
"
|
419 |
type=['wav', 'mp3', 'flac', 'm4a'],
|
420 |
help="Supported formats: WAV, MP3, FLAC, M4A"
|
421 |
)
|
@@ -461,7 +432,7 @@ def show_transcription_page():
|
|
461 |
"Write your transcription here:",
|
462 |
value=st.session_state.transcript,
|
463 |
height=300,
|
464 |
-
help="
|
465 |
)
|
466 |
st.session_state.transcript = transcript
|
467 |
|
@@ -469,27 +440,22 @@ def show_transcription_page():
|
|
469 |
with st.expander("π Transcription Guidelines"):
|
470 |
st.markdown("""
|
471 |
**Key Guidelines:**
|
472 |
-
- Transcribe exactly what is said
|
473 |
-
-
|
474 |
-
- Use standard punctuation
|
475 |
- Write numbers 1-10 as words, 11+ as digits
|
476 |
-
-
|
477 |
-
- For multi-speaker: transcribe all audible speech
|
478 |
""")
|
479 |
|
480 |
# Action buttons
|
481 |
col1, col2, col3 = st.columns(3)
|
482 |
|
483 |
with col1:
|
484 |
-
if
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
except Exception as e:
|
490 |
-
st.error(f"Error saving file: {e}")
|
491 |
-
else:
|
492 |
-
st.warning("Please write a transcript first!")
|
493 |
|
494 |
with col2:
|
495 |
if st.session_state.annotation_type == "multi_speaker" and transcript.strip():
|
@@ -500,12 +466,10 @@ def show_transcription_page():
|
|
500 |
with col3:
|
501 |
if st.session_state.annotation_type == "single_speaker" and transcript.strip():
|
502 |
if st.button("β
Finish Annotation"):
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
except Exception as e:
|
508 |
-
st.error(f"Error saving file: {e}")
|
509 |
|
510 |
def show_segmentation_page():
|
511 |
"""Segmentation page - audio region selection"""
|
@@ -523,6 +487,7 @@ def show_segmentation_page():
|
|
523 |
|
524 |
# Manual segment addition
|
525 |
st.subheader("Manual Segment Addition")
|
|
|
526 |
col1, col2, col3, col4 = st.columns(4)
|
527 |
|
528 |
with col1:
|
@@ -571,7 +536,7 @@ def show_assignment_page():
|
|
571 |
st.error("Please create segments first!")
|
572 |
return
|
573 |
|
574 |
-
st.info("Assign portions of your transcript to each audio segment to create the final annotation.")
|
575 |
|
576 |
# Display transcript
|
577 |
st.subheader("Original Transcript")
|
@@ -588,7 +553,7 @@ def show_assignment_page():
|
|
588 |
f"Text for segment {i+1}:",
|
589 |
key=f"segment_text_{i}",
|
590 |
height=100,
|
591 |
-
help="Copy and paste the relevant portion of the transcript for this segment"
|
592 |
)
|
593 |
|
594 |
assigned_segments.append({
|
@@ -605,36 +570,35 @@ def show_assignment_page():
|
|
605 |
st.code(srt_preview, language="text")
|
606 |
|
607 |
# Final save
|
608 |
-
st.subheader("
|
609 |
col1, col2 = st.columns(2)
|
610 |
|
611 |
with col1:
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
# Save files
|
618 |
-
transcript_path = os.path.join(st.session_state.save_path or ".", "final_transcript.txt")
|
619 |
-
srt_path = os.path.join(st.session_state.save_path or ".", "final_transcript.srt")
|
620 |
-
|
621 |
-
with open(transcript_path, "w", encoding="utf-8") as f:
|
622 |
-
f.write(enhanced_transcript)
|
623 |
-
|
624 |
-
srt_content = generate_srt_with_text(assigned_segments)
|
625 |
-
with open(srt_path, "w", encoding="utf-8") as f:
|
626 |
-
f.write(srt_content)
|
627 |
-
|
628 |
-
st.balloons()
|
629 |
-
st.success(f"π Multi-speaker annotation completed!\n\nFiles saved:\n- {transcript_path}\n- {srt_path}")
|
630 |
-
|
631 |
-
except Exception as e:
|
632 |
-
st.error(f"Error saving files: {e}")
|
633 |
|
634 |
with col2:
|
635 |
-
|
636 |
-
|
637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
638 |
|
639 |
def generate_srt_with_text(segments):
|
640 |
"""Generate SRT with actual text content"""
|
@@ -663,4 +627,4 @@ def create_speaker_transcript(segments):
|
|
663 |
return "\n\n".join(transcript_lines)
|
664 |
|
665 |
if __name__ == "__main__":
|
666 |
-
main()
|
|
|
5 |
import numpy as np
|
6 |
from datetime import timedelta
|
7 |
import base64
|
8 |
+
from io import BytesIO, StringIO
|
9 |
import tempfile
|
10 |
|
11 |
# Page configuration
|
|
|
29 |
st.session_state.current_page = "home"
|
30 |
if 'audio_duration' not in st.session_state:
|
31 |
st.session_state.audio_duration = 0
|
|
|
|
|
32 |
|
33 |
def get_audio_duration(audio_file):
|
34 |
"""Get audio duration in seconds"""
|
|
|
332 |
millisecs = int((seconds % 1) * 1000)
|
333 |
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
|
334 |
|
335 |
+
def get_download_link(content, filename, label="Download file"):
|
336 |
+
"""Generate download link for text content"""
|
337 |
+
b64 = base64.b64encode(content.encode()).decode()
|
338 |
+
href = f'<a href="data:file/txt;base64,{b64}" download="{filename}">{label}</a>'
|
339 |
+
return href
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
|
341 |
# Main App Layout
|
342 |
def main():
|
343 |
st.title("π€ ASR Annotation Tool")
|
344 |
+
st.markdown("Simple tool for transcribing, segmenting, and annotating audio for ASR dataset creation.")
|
345 |
|
346 |
# Sidebar for navigation and settings
|
347 |
with st.sidebar:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
st.header("Navigation")
|
349 |
if st.button("π Home", use_container_width=True):
|
350 |
st.session_state.current_page = "home"
|
|
|
361 |
if st.button("π Assignment", use_container_width=True):
|
362 |
st.session_state.current_page = "assignment"
|
363 |
|
|
|
364 |
if st.session_state.current_page == "home":
|
365 |
show_home_page()
|
366 |
elif st.session_state.current_page == "transcription":
|
|
|
372 |
|
373 |
def show_home_page():
|
374 |
"""Home page - annotation type selection and file upload"""
|
|
|
375 |
|
376 |
# Annotation type selection
|
377 |
st.subheader("1. Select Annotation Type")
|
378 |
annotation_type = st.radio(
|
379 |
+
"How many speakers are in your audio?",
|
380 |
["single_speaker", "multi_speaker"],
|
381 |
format_func=lambda x: "Single Speaker (Simple ASR)" if x == "single_speaker" else "Multi Speaker (Diarization)",
|
382 |
key="annotation_type_radio"
|
|
|
386 |
# File upload
|
387 |
st.subheader("2. Upload Audio File")
|
388 |
uploaded_file = st.file_uploader(
|
389 |
+
"Upload an audio file",
|
390 |
type=['wav', 'mp3', 'flac', 'm4a'],
|
391 |
help="Supported formats: WAV, MP3, FLAC, M4A"
|
392 |
)
|
|
|
432 |
"Write your transcription here:",
|
433 |
value=st.session_state.transcript,
|
434 |
height=300,
|
435 |
+
help="Check the guidelines below to help you transcribe accurately."
|
436 |
)
|
437 |
st.session_state.transcript = transcript
|
438 |
|
|
|
440 |
with st.expander("π Transcription Guidelines"):
|
441 |
st.markdown("""
|
442 |
**Key Guidelines:**
|
443 |
+
- Transcribe exactly what is said
|
444 |
+
- Use standard punctuation and capitalization (tip: Get punctuation from natural pauses in dialogue)
|
|
|
445 |
- Write numbers 1-10 as words, 11+ as digits
|
446 |
+
- Ignore unclear speech or marked as [unclear] or [inaudible]
|
447 |
+
- For multi-speaker: transcribe all audible speech without identifying speakers
|
448 |
""")
|
449 |
|
450 |
# Action buttons
|
451 |
col1, col2, col3 = st.columns(3)
|
452 |
|
453 |
with col1:
|
454 |
+
if transcript.strip():
|
455 |
+
download_link = get_download_link(transcript, "transcript.txt", "πΎ Download Transcript")
|
456 |
+
st.markdown(download_link, unsafe_allow_html=True)
|
457 |
+
else:
|
458 |
+
st.button("πΎ Download Transcript", disabled=True)
|
|
|
|
|
|
|
|
|
459 |
|
460 |
with col2:
|
461 |
if st.session_state.annotation_type == "multi_speaker" and transcript.strip():
|
|
|
466 |
with col3:
|
467 |
if st.session_state.annotation_type == "single_speaker" and transcript.strip():
|
468 |
if st.button("β
Finish Annotation"):
|
469 |
+
st.balloons()
|
470 |
+
st.success("π Single speaker annotation completed!")
|
471 |
+
download_link = get_download_link(transcript, "transcript.txt", "π₯ Download Final Transcript")
|
472 |
+
st.markdown(download_link, unsafe_allow_html=True)
|
|
|
|
|
473 |
|
474 |
def show_segmentation_page():
|
475 |
"""Segmentation page - audio region selection"""
|
|
|
487 |
|
488 |
# Manual segment addition
|
489 |
st.subheader("Manual Segment Addition")
|
490 |
+
st.info("After having segmented the wav using our wav surfer, you can manually add segments here. Don't hesitate to replay and pause for the best results.")
|
491 |
col1, col2, col3, col4 = st.columns(4)
|
492 |
|
493 |
with col1:
|
|
|
536 |
st.error("Please create segments first!")
|
537 |
return
|
538 |
|
539 |
+
st.info("Assign portions of your text transcript to each audio segment to create the final annotation.")
|
540 |
|
541 |
# Display transcript
|
542 |
st.subheader("Original Transcript")
|
|
|
553 |
f"Text for segment {i+1}:",
|
554 |
key=f"segment_text_{i}",
|
555 |
height=100,
|
556 |
+
help="Copy and paste the relevant portion of the text transcript for this segment"
|
557 |
)
|
558 |
|
559 |
assigned_segments.append({
|
|
|
570 |
st.code(srt_preview, language="text")
|
571 |
|
572 |
# Final save
|
573 |
+
st.subheader("Download Final Annotation")
|
574 |
col1, col2 = st.columns(2)
|
575 |
|
576 |
with col1:
|
577 |
+
# Create enhanced transcript with speaker labels
|
578 |
+
enhanced_transcript = create_speaker_transcript(assigned_segments)
|
579 |
+
download_transcript = get_download_link(enhanced_transcript, "final_transcript.txt", "πΎ Download Transcript")
|
580 |
+
st.markdown(download_transcript, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
|
582 |
with col2:
|
583 |
+
srt_content = generate_srt_with_text(assigned_segments)
|
584 |
+
download_srt = get_download_link(srt_content, "final_transcript.srt", "πΎ Download SRT")
|
585 |
+
st.markdown(download_srt, unsafe_allow_html=True)
|
586 |
+
|
587 |
+
if st.button("π Finish Annotation", type="primary"):
|
588 |
+
st.balloons()
|
589 |
+
st.success("π Yihawww or Youhouuuu Multi-speaker annotation completed!")
|
590 |
+
|
591 |
+
# Final downloads
|
592 |
+
st.subheader("Download your files:")
|
593 |
+
download_transcript = get_download_link(enhanced_transcript, "final_transcript.txt", "π₯ Download Transcript")
|
594 |
+
download_srt = get_download_link(srt_content, "final_transcript.srt", "π₯ Download SRT")
|
595 |
+
|
596 |
+
st.markdown(download_transcript, unsafe_allow_html=True)
|
597 |
+
st.markdown(download_srt, unsafe_allow_html=True)
|
598 |
+
|
599 |
+
if st.button("π Back to Segmentation"):
|
600 |
+
st.session_state.current_page = "segmentation"
|
601 |
+
st.rerun()
|
602 |
|
603 |
def generate_srt_with_text(segments):
|
604 |
"""Generate SRT with actual text content"""
|
|
|
627 |
return "\n\n".join(transcript_lines)
|
628 |
|
629 |
if __name__ == "__main__":
|
630 |
+
main()
|