Spaces:

Mallard74
/

asr-transcription-lab

Running

App Files Files Community

ALLARD Marc-Antoine commited on May 30

Commit

8cade8e

1 Parent(s): c30da2c

refactor and text simplification

Browse files

Files changed (1) hide show

src/streamlit_app.py +51 -87

src/streamlit_app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import wave
 import numpy as np
 from datetime import timedelta
 import base64
-from io import BytesIO
 import tempfile
 # Page configuration
@@ -29,8 +29,6 @@ if 'current_page' not in st.session_state:
     st.session_state.current_page = "home"
 if 'audio_duration' not in st.session_state:
     st.session_state.audio_duration = 0
-if 'save_path' not in st.session_state:
-    st.session_state.save_path = ""
 def get_audio_duration(audio_file):
     """Get audio duration in seconds"""
@@ -334,44 +332,19 @@ def format_srt_time(seconds):
     millisecs = int((seconds % 1) * 1000)
     return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
-def save_files(transcript, segments=None, save_path=""):
-    """Save transcript and SRT files"""
-    if not save_path:
-        save_path = "."
-    # Save transcript
-    transcript_path = os.path.join(save_path, "transcript.txt")
-    with open(transcript_path, "w", encoding="utf-8") as f:
-        f.write(transcript)
-    if segments:
-        # Save SRT
-        srt_content = generate_srt(segments, transcript)
-        srt_path = os.path.join(save_path, "transcript.srt")
-        with open(srt_path, "w", encoding="utf-8") as f:
-            f.write(srt_content)
-        return transcript_path, srt_path
-    return transcript_path, None
 # Main App Layout
 def main():
     st.title("🎤 ASR Annotation Tool")
-    st.markdown("Professional tool for creating ASR evaluation datasets")
     # Sidebar for navigation and settings
     with st.sidebar:
-        st.header("Settings")
-        # Save path configuration
-        st.session_state.save_path = st.text_input(
-            "Save Path",
-            value=st.session_state.save_path,
-            help="Directory where files will be saved"
-        )
-        # Navigation
         st.header("Navigation")
         if st.button("🏠 Home", use_container_width=True):
             st.session_state.current_page = "home"
@@ -388,7 +361,6 @@ def main():
                     if st.button("📊 Assignment", use_container_width=True):
                         st.session_state.current_page = "assignment"
-    # Main content area
     if st.session_state.current_page == "home":
         show_home_page()
     elif st.session_state.current_page == "transcription":
@@ -400,12 +372,11 @@ def main():
 def show_home_page():
     """Home page - annotation type selection and file upload"""
-    st.header("Welcome to ASR Annotation Tool")
     # Annotation type selection
     st.subheader("1. Select Annotation Type")
     annotation_type = st.radio(
-        "Choose the type of annotation:",
         ["single_speaker", "multi_speaker"],
         format_func=lambda x: "Single Speaker (Simple ASR)" if x == "single_speaker" else "Multi Speaker (Diarization)",
         key="annotation_type_radio"
@@ -415,7 +386,7 @@ def show_home_page():
     # File upload
     st.subheader("2. Upload Audio File")
     uploaded_file = st.file_uploader(
-        "Choose an audio file",
         type=['wav', 'mp3', 'flac', 'm4a'],
         help="Supported formats: WAV, MP3, FLAC, M4A"
     )
@@ -461,7 +432,7 @@ def show_transcription_page():
         "Write your transcription here:",
         value=st.session_state.transcript,
         height=300,
-        help="Follow the annotation guidelines for accurate transcription"
     )
     st.session_state.transcript = transcript
@@ -469,27 +440,22 @@ def show_transcription_page():
     with st.expander("📋 Transcription Guidelines"):
         st.markdown("""
         **Key Guidelines:**
-        - Transcribe exactly what is said (verbatim)
-        - Include false starts, filled pauses (um, uh)
-        - Use standard punctuation
         - Write numbers 1-10 as words, 11+ as digits
-        - Mark unclear speech as [unclear] or [inaudible]
-        - For multi-speaker: transcribe all audible speech
         """)
     # Action buttons
     col1, col2, col3 = st.columns(3)
     with col1:
-        if st.button("💾 Save Transcript", type="primary"):
-            if transcript.strip():
-                try:
-                    transcript_path, _ = save_files(transcript, save_path=st.session_state.save_path)
-                    st.success(f"✅ Transcript saved to: {transcript_path}")
-                except Exception as e:
-                    st.error(f"Error saving file: {e}")
-            else:
-                st.warning("Please write a transcript first!")
     with col2:
         if st.session_state.annotation_type == "multi_speaker" and transcript.strip():
@@ -500,12 +466,10 @@ def show_transcription_page():
     with col3:
         if st.session_state.annotation_type == "single_speaker" and transcript.strip():
             if st.button("✅ Finish Annotation"):
-                try:
-                    transcript_path, _ = save_files(transcript, save_path=st.session_state.save_path)
-                    st.balloons()
-                    st.success(f"🎉 Single speaker annotation completed!\nSaved to: {transcript_path}")
-                except Exception as e:
-                    st.error(f"Error saving file: {e}")
 def show_segmentation_page():
     """Segmentation page - audio region selection"""
@@ -523,6 +487,7 @@ def show_segmentation_page():
     # Manual segment addition
     st.subheader("Manual Segment Addition")
     col1, col2, col3, col4 = st.columns(4)
     with col1:
@@ -571,7 +536,7 @@ def show_assignment_page():
         st.error("Please create segments first!")
         return
-    st.info("Assign portions of your transcript to each audio segment to create the final annotation.")
     # Display transcript
     st.subheader("Original Transcript")
@@ -588,7 +553,7 @@ def show_assignment_page():
             f"Text for segment {i+1}:",
             key=f"segment_text_{i}",
             height=100,
-            help="Copy and paste the relevant portion of the transcript for this segment"
         )
         assigned_segments.append({
@@ -605,36 +570,35 @@ def show_assignment_page():
         st.code(srt_preview, language="text")
     # Final save
-    st.subheader("Save Final Annotation")
     col1, col2 = st.columns(2)
     with col1:
-        if st.button("💾 Save Transcript + SRT", type="primary"):
-            try:
-                # Create enhanced transcript with speaker labels
-                enhanced_transcript = create_speaker_transcript(assigned_segments)
-                # Save files
-                transcript_path = os.path.join(st.session_state.save_path or ".", "final_transcript.txt")
-                srt_path = os.path.join(st.session_state.save_path or ".", "final_transcript.srt")
-                with open(transcript_path, "w", encoding="utf-8") as f:
-                    f.write(enhanced_transcript)
-                srt_content = generate_srt_with_text(assigned_segments)
-                with open(srt_path, "w", encoding="utf-8") as f:
-                    f.write(srt_content)
-                st.balloons()
-                st.success(f"🎉 Multi-speaker annotation completed!\n\nFiles saved:\n- {transcript_path}\n- {srt_path}")
-            except Exception as e:
-                st.error(f"Error saving files: {e}")
     with col2:
-        if st.button("🔄 Back to Segmentation"):
-            st.session_state.current_page = "segmentation"
-            st.rerun()
 def generate_srt_with_text(segments):
     """Generate SRT with actual text content"""
@@ -663,4 +627,4 @@ def create_speaker_transcript(segments):
     return "\n\n".join(transcript_lines)
 if __name__ == "__main__":
-    main()

 import numpy as np
 from datetime import timedelta
 import base64
+from io import BytesIO, StringIO
 import tempfile
 # Page configuration
     st.session_state.current_page = "home"
 if 'audio_duration' not in st.session_state:
     st.session_state.audio_duration = 0
 def get_audio_duration(audio_file):
     """Get audio duration in seconds"""
     millisecs = int((seconds % 1) * 1000)
     return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
+def get_download_link(content, filename, label="Download file"):
+    """Generate download link for text content"""
+    b64 = base64.b64encode(content.encode()).decode()
+    href = f'<a href="data:file/txt;base64,{b64}" download="{filename}">{label}</a>'
+    return href
 # Main App Layout
 def main():
     st.title("🎤 ASR Annotation Tool")
+    st.markdown("Simple tool for transcribing, segmenting, and annotating audio for ASR dataset creation.")
     # Sidebar for navigation and settings
     with st.sidebar:
         st.header("Navigation")
         if st.button("🏠 Home", use_container_width=True):
             st.session_state.current_page = "home"
                     if st.button("📊 Assignment", use_container_width=True):
                         st.session_state.current_page = "assignment"
     if st.session_state.current_page == "home":
         show_home_page()
     elif st.session_state.current_page == "transcription":
 def show_home_page():
     """Home page - annotation type selection and file upload"""
     # Annotation type selection
     st.subheader("1. Select Annotation Type")
     annotation_type = st.radio(
+        "How many speakers are in your audio?",
         ["single_speaker", "multi_speaker"],
         format_func=lambda x: "Single Speaker (Simple ASR)" if x == "single_speaker" else "Multi Speaker (Diarization)",
         key="annotation_type_radio"
     # File upload
     st.subheader("2. Upload Audio File")
     uploaded_file = st.file_uploader(
+        "Upload an audio file",
         type=['wav', 'mp3', 'flac', 'm4a'],
         help="Supported formats: WAV, MP3, FLAC, M4A"
     )
         "Write your transcription here:",
         value=st.session_state.transcript,
         height=300,
+        help="Check the guidelines below to help you transcribe accurately."
     )
     st.session_state.transcript = transcript
     with st.expander("📋 Transcription Guidelines"):
         st.markdown("""
         **Key Guidelines:**
+        - Transcribe exactly what is said
+        - Use standard punctuation and capitalization (tip: Get punctuation from natural pauses in dialogue)
         - Write numbers 1-10 as words, 11+ as digits
+        - Ignore unclear speech or marked as [unclear] or [inaudible]
+        - For multi-speaker: transcribe all audible speech without identifying speakers
         """)
     # Action buttons
     col1, col2, col3 = st.columns(3)
     with col1:
+        if transcript.strip():
+            download_link = get_download_link(transcript, "transcript.txt", "💾 Download Transcript")
+            st.markdown(download_link, unsafe_allow_html=True)
+        else:
+            st.button("💾 Download Transcript", disabled=True)
     with col2:
         if st.session_state.annotation_type == "multi_speaker" and transcript.strip():
     with col3:
         if st.session_state.annotation_type == "single_speaker" and transcript.strip():
             if st.button("✅ Finish Annotation"):
+                st.balloons()
+                st.success("🎉 Single speaker annotation completed!")
+                download_link = get_download_link(transcript, "transcript.txt", "📥 Download Final Transcript")
+                st.markdown(download_link, unsafe_allow_html=True)
 def show_segmentation_page():
     """Segmentation page - audio region selection"""
     # Manual segment addition
     st.subheader("Manual Segment Addition")
+    st.info("After having segmented the wav using our wav surfer, you can manually add segments here. Don't hesitate to replay and pause for the best results.")
     col1, col2, col3, col4 = st.columns(4)
     with col1:
         st.error("Please create segments first!")
         return
+    st.info("Assign portions of your text transcript to each audio segment to create the final annotation.")
     # Display transcript
     st.subheader("Original Transcript")
             f"Text for segment {i+1}:",
             key=f"segment_text_{i}",
             height=100,
+            help="Copy and paste the relevant portion of the text transcript for this segment"
         )
         assigned_segments.append({
         st.code(srt_preview, language="text")
     # Final save
+    st.subheader("Download Final Annotation")
     col1, col2 = st.columns(2)
     with col1:
+        # Create enhanced transcript with speaker labels
+        enhanced_transcript = create_speaker_transcript(assigned_segments)
+        download_transcript = get_download_link(enhanced_transcript, "final_transcript.txt", "💾 Download Transcript")
+        st.markdown(download_transcript, unsafe_allow_html=True)
     with col2:
+        srt_content = generate_srt_with_text(assigned_segments)
+        download_srt = get_download_link(srt_content, "final_transcript.srt", "💾 Download SRT")
+        st.markdown(download_srt, unsafe_allow_html=True)
+    if st.button("🎉 Finish Annotation", type="primary"):
+        st.balloons()
+        st.success("🎉 Yihawww or Youhouuuu Multi-speaker annotation completed!")
+        # Final downloads
+        st.subheader("Download your files:")
+        download_transcript = get_download_link(enhanced_transcript, "final_transcript.txt", "📥 Download Transcript")
+        download_srt = get_download_link(srt_content, "final_transcript.srt", "📥 Download SRT")
+        st.markdown(download_transcript, unsafe_allow_html=True)
+        st.markdown(download_srt, unsafe_allow_html=True)
+    if st.button("🔄 Back to Segmentation"):
+        st.session_state.current_page = "segmentation"
+        st.rerun()
 def generate_srt_with_text(segments):
     """Generate SRT with actual text content"""
     return "\n\n".join(transcript_lines)
 if __name__ == "__main__":
+    main()