Spaces:

Chamin09
/

BrailleMenuGen

Runtime error

File size: 6,932 Bytes

87d0988

import streamlit as st
from PIL import Image
import io
import numpy as np

# Import our custom modules
from utils.image_preprocessing import preprocess_image
from models.document_ai import extract_text_and_layout
from models.text_processor import process_menu_text
from models.braille_translator import text_to_braille, get_braille_metadata
from utils.braille_display import create_braille_html, create_braille_comparison

# App title and description
st.title("Menu to Braille Converter")
st.write("Upload a menu image to convert it to Braille text")

# Sidebar for model settings
st.sidebar.header("Settings")
use_llm = st.sidebar.checkbox("Use LLM for text processing", value=True)
use_context = st.sidebar.checkbox("Use AI for context enhancement", value=True)
show_comparison = st.sidebar.checkbox("Show text/Braille comparison", value=True)

# Add information about the application
st.sidebar.markdown("---")
st.sidebar.subheader("About")
st.sidebar.info(
    "This application converts menu images to Braille text using AI. "
    "It extracts text from images using document AI, processes the text with LLMs, "
    "and converts to Braille."
)

# File uploader
uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])

# Display uploaded image and process it
if uploaded_file is not None:
    # Load and display image
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Menu", use_column_width=True)
    
    # Add a button to process the image
    if st.button("Process Menu"):
        with st.spinner("Processing image..."):
            # Preprocess the image
            preprocessed_img = preprocess_image(image)
            
            # Extract text using LayoutLMv2
            try:
                result = extract_text_and_layout(preprocessed_img)
                
                # Display extracted words
                if result['words']:
                    raw_text = ' '.join(result['words'])
                    
                    # Show raw text in an expandable section
                    with st.expander("Raw Extracted Text"):
                        st.text_area("Raw OCR Output", raw_text, height=150)
                    
                    # Process text with LLM if enabled
                    if use_llm:
                        st.subheader("Processed Menu Text")
                        with st.spinner("Enhancing text with AI..."):
                            processed_result = process_menu_text(raw_text)
                            
                            if processed_result['success']:
                                processed_text = processed_result['structured_text']
                                st.text_area("Structured Menu Text", processed_text, height=200)
                                
                                # Store the processed result for later use
                                st.session_state.processed_text = processed_text
                                st.session_state.menu_data = processed_result.get('menu_data', {})
                            else:
                                st.warning(f"AI processing failed: {processed_result.get('error', 'Unknown error')}")
                                processed_text = raw_text
                                st.text_area("Text Output", processed_text, height=200)
                                st.session_state.processed_text = processed_text
                    else:
                        # Just use the raw text
                        st.subheader("Extracted Text")
                        processed_text = raw_text
                        st.text_area("Text Output", processed_text, height=200)
                        st.session_state.processed_text = processed_text
                    
                    # Translate to Braille
                    st.subheader("Braille Translation")
                    with st.spinner("Translating to Braille..."):
                        braille_result = text_to_braille(processed_text, use_context=use_context)
                        
                        if braille_result['success']:
                            # Store for download
                            st.session_state.braille_text = braille_result['formatted_braille']
                            
                            # Display options
                            display_option = st.radio(
                                "Display format:",
                                ["Text Only", "Visual Braille", "Side-by-Side Comparison"]
                            )
                            
                            if display_option == "Text Only":
                                # Display Braille text as plain text
                                st.text_area("Braille Output", braille_result['formatted_braille'], height=300)
                            
                            elif display_option == "Visual Braille":
                                # Display Braille with visual representation
                                braille_html = create_braille_html(braille_result['formatted_braille'])
                                st.markdown(braille_html, unsafe_allow_html=True)
                            
                            else:  # Side-by-Side Comparison
                                # Display side-by-side comparison
                                comparison_html = create_braille_comparison(
                                    processed_text, braille_result['formatted_braille']
                                )
                                st.markdown(comparison_html, unsafe_allow_html=True)
                            
                            # Display metadata
                            metadata = get_braille_metadata(processed_text)
                            st.info(f"Translation contains {metadata['word_count']} words, "
                                   f"{metadata['character_count']} characters, "
                                   f"{metadata['line_count']} lines.")
                            
                            # Show context summary if available
                            if braille_result.get('context_summary'):
                                with st.expander("AI Context Understanding"):
                                    st.write(braille_result['context_summary'])
                        else:
                            st.error(f"Braille translation failed: {braille_result.get('error', 'Unknown error')}")
                    
                    # Download options placeholder
                    st.subheader("Download Options")
                    st.info("PDF download will be implemented in Phase 5")
                else:
                    st.warning("No text was extracted from the image.")
                    
            except Exception as e:
                st.error(f"Error processing image: {str(e)}")