Chamin09 commited on
Commit
87d0988
·
verified ·
1 Parent(s): 535ca2c

Upload 17 files

Browse files
README.md CHANGED
@@ -1,20 +1,61 @@
1
- ---
2
- title: BrailleMenuGen
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Convert Menu images to Braille
12
- license: mit
13
- ---
14
-
15
- # Welcome to Streamlit!
16
-
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
-
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Menu to Braille Converter
2
+
3
+ An AI-powered application that converts food menu images to Braille text for visually impaired users.
4
+
5
+ ## Features
6
+
7
+ - Upload menu images
8
+ - Extract text using AI-powered document understanding (LayoutLMv2)
9
+ - Process and structure menu text using LLMs
10
+ - Convert text to Braille
11
+ - Display Braille in multiple formats (text, visual, side-by-side)
12
+ - Download as PDF in different formats
13
+
14
+ ## Deployment on Hugging Face Spaces
15
+
16
+ ### Option 1: Direct GitHub Repository Deployment
17
+
18
+ 1. Fork this repository to your GitHub account
19
+ 2. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
20
+ 3. Click "Create new Space"
21
+ 4. Choose "Streamlit" as the SDK
22
+ 5. Connect your GitHub account and select this repository
23
+ 6. Choose hardware requirements (recommend at least GPU for better performance)
24
+ 7. Click "Create Space"
25
+
26
+ ### Option 2: Manual Deployment
27
+
28
+ 1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
29
+ 2. Click "Create new Space"
30
+ 3. Choose "Streamlit" as the SDK
31
+ 4. Give your Space a name
32
+ 5. Choose hardware requirements (recommend at least GPU for better performance)
33
+ 6. Click "Create Space"
34
+ 7. Clone the Space repository locally
35
+ 8. Copy all files from this project to the cloned repository
36
+ 9. Push the changes to the Space repository
37
+
38
+ ## Hardware Requirements
39
+
40
+ - **Minimum**: CPU (2 vCPUs, 16 GB RAM)
41
+ - **Recommended**: GPU (T4 or better)
42
+
43
+ ## Models Used
44
+
45
+ - **Document AI**: microsoft/layoutlmv2-base-uncased
46
+ - **Text Processing**: meta-llama/Meta-Llama-3-8B-Instruct (with fallback to mistralai/Mistral-7B-Instruct-v0.2)
47
+ - **Context Enhancement**: facebook/bart-large-cnn
48
+
49
+ ## Local Development
50
+
51
+ 1. Clone this repository
52
+ 2. Install dependencies: `pip install -r requirements.txt`
53
+ 3. Run the application: `streamlit run app.py`
54
+
55
+
56
+ ## Future Enhancements
57
+
58
+ - Improved menu section recognition
59
+ - Support for multiple languages
60
+ - Physical Braille printer integration
61
+ - Mobile app version
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import io
4
+ import numpy as np
5
+ import base64
6
+
7
+ # Import our custom modules
8
+ from utils.image_preprocessing import preprocess_image
9
+ from models.document_ai import extract_text_and_layout
10
+ from models.text_processor import process_menu_text
11
+ from models.braille_translator import text_to_braille, get_braille_metadata
12
+ from utils.braille_display import create_braille_html, create_braille_comparison
13
+ from utils.pdf_generator import create_braille_pdf, create_braille_pdf_with_comparison
14
+
15
+ # Function to create a download link for a PDF
16
+ def get_pdf_download_link(pdf_bytes, filename="braille_menu.pdf", text="Download PDF"):
17
+ """Generate a link to download the PDF file."""
18
+ b64 = base64.b64encode(pdf_bytes.read()).decode()
19
+ href = f'<a href="data:application/pdf;base64,{b64}" download="{filename}">{text}</a>'
20
+ return href
21
+
22
+ # App title and description
23
+ st.title("Menu to Braille Converter")
24
+ st.write("Upload a menu image to convert it to Braille text")
25
+
26
+ # Sidebar for model settings
27
+ st.sidebar.header("Settings")
28
+ use_llm = st.sidebar.checkbox("Use LLM for text processing", value=True)
29
+ use_context = st.sidebar.checkbox("Use AI for context enhancement", value=True)
30
+ show_comparison = st.sidebar.checkbox("Show text/Braille comparison", value=True)
31
+
32
+ # Add information about the application
33
+ st.sidebar.markdown("---")
34
+ st.sidebar.subheader("About")
35
+ st.sidebar.info(
36
+ "This application converts menu images to Braille text using AI. "
37
+ "It extracts text from images using document AI, processes the text with LLMs, "
38
+ "and converts to Braille."
39
+ )
40
+
41
+ # File uploader
42
+ uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
43
+
44
+ # Display uploaded image and process it
45
+ if uploaded_file is not None:
46
+ # Load and display image
47
+ image = Image.open(uploaded_file)
48
+ st.image(image, caption="Uploaded Menu", use_column_width=True)
49
+
50
+ # Add a button to process the image
51
+ if st.button("Process Menu"):
52
+ with st.spinner("Processing image..."):
53
+ # Preprocess the image
54
+ preprocessed_img = preprocess_image(image)
55
+
56
+ # Extract text using LayoutLMv2
57
+ try:
58
+ result = extract_text_and_layout(preprocessed_img)
59
+
60
+ # Display extracted words
61
+ if result['words']:
62
+ raw_text = ' '.join(result['words'])
63
+
64
+ # Show raw text in an expandable section
65
+ with st.expander("Raw Extracted Text"):
66
+ st.text_area("Raw OCR Output", raw_text, height=150)
67
+
68
+ # Process text with LLM if enabled
69
+ if use_llm:
70
+ st.subheader("Processed Menu Text")
71
+ with st.spinner("Enhancing text with AI..."):
72
+ processed_result = process_menu_text(raw_text)
73
+
74
+ if processed_result['success']:
75
+ processed_text = processed_result['structured_text']
76
+ st.text_area("Structured Menu Text", processed_text, height=200)
77
+
78
+ # Store the processed result for later use
79
+ st.session_state.processed_text = processed_text
80
+ st.session_state.menu_data = processed_result.get('menu_data', {})
81
+ else:
82
+ st.warning(f"AI processing failed: {processed_result.get('error', 'Unknown error')}")
83
+ processed_text = raw_text
84
+ st.text_area("Text Output", processed_text, height=200)
85
+ st.session_state.processed_text = processed_text
86
+ else:
87
+ # Just use the raw text
88
+ st.subheader("Extracted Text")
89
+ processed_text = raw_text
90
+ st.text_area("Text Output", processed_text, height=200)
91
+ st.session_state.processed_text = processed_text
92
+
93
+ # Translate to Braille
94
+ st.subheader("Braille Translation")
95
+ with st.spinner("Translating to Braille..."):
96
+ braille_result = text_to_braille(processed_text, use_context=use_context)
97
+
98
+ if braille_result['success']:
99
+ # Store for download
100
+ st.session_state.braille_text = braille_result['formatted_braille']
101
+
102
+ # Display options
103
+ display_option = st.radio(
104
+ "Display format:",
105
+ ["Text Only", "Visual Braille", "Side-by-Side Comparison"]
106
+ )
107
+
108
+ if display_option == "Text Only":
109
+ # Display Braille text as plain text
110
+ st.text_area("Braille Output", braille_result['formatted_braille'], height=300)
111
+
112
+ elif display_option == "Visual Braille":
113
+ # Display Braille with visual representation
114
+ braille_html = create_braille_html(braille_result['formatted_braille'])
115
+ st.markdown(braille_html, unsafe_allow_html=True)
116
+
117
+ else: # Side-by-Side Comparison
118
+ # Display side-by-side comparison
119
+ comparison_html = create_braille_comparison(
120
+ processed_text, braille_result['formatted_braille']
121
+ )
122
+ st.markdown(comparison_html, unsafe_allow_html=True)
123
+
124
+ # Display metadata
125
+ metadata = get_braille_metadata(processed_text)
126
+ st.info(f"Translation contains {metadata['word_count']} words, "
127
+ f"{metadata['character_count']} characters, "
128
+ f"{metadata['line_count']} lines.")
129
+
130
+ # Show context summary if available
131
+ if braille_result.get('context_summary'):
132
+ with st.expander("AI Context Understanding"):
133
+ st.write(braille_result['context_summary'])
134
+
135
+ # PDF Download section
136
+ st.subheader("Download Options")
137
+
138
+ pdf_option = st.selectbox(
139
+ "Select PDF format:",
140
+ ["Sequential (Text then Braille)", "Side-by-Side Comparison"]
141
+ )
142
+
143
+ pdf_title = st.text_input("PDF Title:", "Menu in Braille")
144
+
145
+ if st.button("Generate PDF"):
146
+ with st.spinner("Generating PDF..."):
147
+ if pdf_option == "Sequential (Text then Braille)":
148
+ pdf_buffer = create_braille_pdf(
149
+ processed_text,
150
+ braille_result['formatted_braille'],
151
+ title=pdf_title
152
+ )
153
+ else: # Side-by-Side Comparison
154
+ pdf_buffer = create_braille_pdf_with_comparison(
155
+ processed_text,
156
+ braille_result['formatted_braille'],
157
+ title=pdf_title
158
+ )
159
+
160
+ # Create download link
161
+ st.markdown(
162
+ get_pdf_download_link(pdf_buffer, f"{pdf_title.lower().replace(' ', '_')}.pdf"),
163
+ unsafe_allow_html=True
164
+ )
165
+ else:
166
+ st.error(f"Braille translation failed: {braille_result.get('error', 'Unknown error')}")
167
+ else:
168
+ st.warning("No text was extracted from the image.")
169
+
170
+ except Exception as e:
171
+ st.error(f"Error processing image: {str(e)}")
app_bk.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import io
4
+ import numpy as np
5
+
6
+ # Import our custom modules
7
+ from utils.image_preprocessing import preprocess_image
8
+ from models.document_ai import extract_text_and_layout
9
+
10
+ # App title and description
11
+ st.title("Menu to Braille Converter")
12
+ st.write("Upload a menu image to convert it to Braille text")
13
+
14
+ # File uploader
15
+ uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
16
+
17
+ # Display uploaded image and process it
18
+ if uploaded_file is not None:
19
+ # Load and display image
20
+ image = Image.open(uploaded_file)
21
+ st.image(image, caption="Uploaded Menu", use_column_width=True)
22
+
23
+ # Add a button to process the image
24
+ if st.button("Extract Text"):
25
+ with st.spinner("Processing image..."):
26
+ # Preprocess the image
27
+ st.subheader("Preprocessed Image")
28
+ preprocessed_img = preprocess_image(image)
29
+ st.image(preprocessed_img, caption="Preprocessed Image", use_column_width=True)
30
+
31
+ # Extract text using LayoutLMv2
32
+ st.subheader("Extracted Text")
33
+ try:
34
+ result = extract_text_and_layout(preprocessed_img)
35
+
36
+ # Display extracted words
37
+ if result['words']:
38
+ text = ' '.join(result['words'])
39
+ st.text_area("Extracted Text", text, height=200)
40
+ else:
41
+ st.warning("No text was extracted from the image.")
42
+
43
+ except Exception as e:
44
+ st.error(f"Error processing image: {str(e)}")
45
+
46
+ # Placeholders for future functionality
47
+ st.subheader("Braille Translation")
48
+ st.info("Braille translation will be implemented in Phase 4")
49
+
50
+ st.subheader("Download Options")
51
+ st.info("PDF download will be implemented in Phase 5")
app_bk2.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import io
4
+ import numpy as np
5
+
6
+ # Import our custom modules
7
+ from utils.image_preprocessing import preprocess_image
8
+ from models.document_ai import extract_text_and_layout
9
+ from models.text_processor import process_menu_text
10
+
11
+ # App title and description
12
+ st.title("Menu to Braille Converter")
13
+ st.write("Upload a menu image to convert it to Braille text")
14
+
15
+ # Sidebar for model settings
16
+ st.sidebar.header("Settings")
17
+ use_llm = st.sidebar.checkbox("Use LLM for text processing", value=True)
18
+
19
+ # Add information about the application
20
+ st.sidebar.markdown("---")
21
+ st.sidebar.subheader("About")
22
+ st.sidebar.info(
23
+ "This application converts menu images to Braille text using AI. "
24
+ "It extracts text from images using document AI, processes the text with LLMs, "
25
+ "and will convert to Braille in future versions."
26
+ )
27
+
28
+ # File uploader
29
+ uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
30
+
31
+ # Display uploaded image and process it
32
+ if uploaded_file is not None:
33
+ # Load and display image
34
+ image = Image.open(uploaded_file)
35
+ st.image(image, caption="Uploaded Menu", use_column_width=True)
36
+
37
+ # Add a button to process the image
38
+ if st.button("Process Menu"):
39
+ with st.spinner("Processing image..."):
40
+ # Preprocess the image
41
+ preprocessed_img = preprocess_image(image)
42
+
43
+ # Extract text using LayoutLMv2
44
+ try:
45
+ result = extract_text_and_layout(preprocessed_img)
46
+
47
+ # Display extracted words
48
+ if result['words']:
49
+ raw_text = ' '.join(result['words'])
50
+
51
+ # Show raw text in an expandable section
52
+ with st.expander("Raw Extracted Text"):
53
+ st.text_area("Raw OCR Output", raw_text, height=150)
54
+
55
+ # Process text with LLM if enabled
56
+ if use_llm:
57
+ st.subheader("Processed Menu Text")
58
+ with st.spinner("Enhancing text with AI..."):
59
+ processed_result = process_menu_text(raw_text)
60
+
61
+ if processed_result['success']:
62
+ st.text_area("Structured Menu Text",
63
+ processed_result['structured_text'],
64
+ height=300)
65
+
66
+ # Store the processed result for later use
67
+ st.session_state.processed_text = processed_result['structured_text']
68
+ st.session_state.menu_data = processed_result.get('menu_data', {})
69
+ else:
70
+ st.warning(f"AI processing failed: {processed_result.get('error', 'Unknown error')}")
71
+ st.text_area("Text Output", raw_text, height=300)
72
+ st.session_state.processed_text = raw_text
73
+ else:
74
+ # Just use the raw text
75
+ st.subheader("Extracted Text")
76
+ st.text_area("Text Output", raw_text, height=300)
77
+ st.session_state.processed_text = raw_text
78
+ else:
79
+ st.warning("No text was extracted from the image.")
80
+
81
+ except Exception as e:
82
+ st.error(f"Error processing image: {str(e)}")
83
+
84
+ # Placeholders for future functionality
85
+ st.subheader("Braille Translation")
86
+ st.info("Braille translation will be implemented in Phase 4")
87
+
88
+ st.subheader("Download Options")
89
+ st.info("PDF download will be implemented in Phase 5")
app_bk3.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import io
4
+ import numpy as np
5
+
6
+ # Import our custom modules
7
+ from utils.image_preprocessing import preprocess_image
8
+ from models.document_ai import extract_text_and_layout
9
+ from models.text_processor import process_menu_text
10
+ from models.braille_translator import text_to_braille, get_braille_metadata
11
+ from utils.braille_display import create_braille_html, create_braille_comparison
12
+
13
+ # App title and description
14
+ st.title("Menu to Braille Converter")
15
+ st.write("Upload a menu image to convert it to Braille text")
16
+
17
+ # Sidebar for model settings
18
+ st.sidebar.header("Settings")
19
+ use_llm = st.sidebar.checkbox("Use LLM for text processing", value=True)
20
+ use_context = st.sidebar.checkbox("Use AI for context enhancement", value=True)
21
+ show_comparison = st.sidebar.checkbox("Show text/Braille comparison", value=True)
22
+
23
+ # Add information about the application
24
+ st.sidebar.markdown("---")
25
+ st.sidebar.subheader("About")
26
+ st.sidebar.info(
27
+ "This application converts menu images to Braille text using AI. "
28
+ "It extracts text from images using document AI, processes the text with LLMs, "
29
+ "and converts to Braille."
30
+ )
31
+
32
+ # File uploader
33
+ uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
34
+
35
+ # Display uploaded image and process it
36
+ if uploaded_file is not None:
37
+ # Load and display image
38
+ image = Image.open(uploaded_file)
39
+ st.image(image, caption="Uploaded Menu", use_column_width=True)
40
+
41
+ # Add a button to process the image
42
+ if st.button("Process Menu"):
43
+ with st.spinner("Processing image..."):
44
+ # Preprocess the image
45
+ preprocessed_img = preprocess_image(image)
46
+
47
+ # Extract text using LayoutLMv2
48
+ try:
49
+ result = extract_text_and_layout(preprocessed_img)
50
+
51
+ # Display extracted words
52
+ if result['words']:
53
+ raw_text = ' '.join(result['words'])
54
+
55
+ # Show raw text in an expandable section
56
+ with st.expander("Raw Extracted Text"):
57
+ st.text_area("Raw OCR Output", raw_text, height=150)
58
+
59
+ # Process text with LLM if enabled
60
+ if use_llm:
61
+ st.subheader("Processed Menu Text")
62
+ with st.spinner("Enhancing text with AI..."):
63
+ processed_result = process_menu_text(raw_text)
64
+
65
+ if processed_result['success']:
66
+ processed_text = processed_result['structured_text']
67
+ st.text_area("Structured Menu Text", processed_text, height=200)
68
+
69
+ # Store the processed result for later use
70
+ st.session_state.processed_text = processed_text
71
+ st.session_state.menu_data = processed_result.get('menu_data', {})
72
+ else:
73
+ st.warning(f"AI processing failed: {processed_result.get('error', 'Unknown error')}")
74
+ processed_text = raw_text
75
+ st.text_area("Text Output", processed_text, height=200)
76
+ st.session_state.processed_text = processed_text
77
+ else:
78
+ # Just use the raw text
79
+ st.subheader("Extracted Text")
80
+ processed_text = raw_text
81
+ st.text_area("Text Output", processed_text, height=200)
82
+ st.session_state.processed_text = processed_text
83
+
84
+ # Translate to Braille
85
+ st.subheader("Braille Translation")
86
+ with st.spinner("Translating to Braille..."):
87
+ braille_result = text_to_braille(processed_text, use_context=use_context)
88
+
89
+ if braille_result['success']:
90
+ # Store for download
91
+ st.session_state.braille_text = braille_result['formatted_braille']
92
+
93
+ # Display options
94
+ display_option = st.radio(
95
+ "Display format:",
96
+ ["Text Only", "Visual Braille", "Side-by-Side Comparison"]
97
+ )
98
+
99
+ if display_option == "Text Only":
100
+ # Display Braille text as plain text
101
+ st.text_area("Braille Output", braille_result['formatted_braille'], height=300)
102
+
103
+ elif display_option == "Visual Braille":
104
+ # Display Braille with visual representation
105
+ braille_html = create_braille_html(braille_result['formatted_braille'])
106
+ st.markdown(braille_html, unsafe_allow_html=True)
107
+
108
+ else: # Side-by-Side Comparison
109
+ # Display side-by-side comparison
110
+ comparison_html = create_braille_comparison(
111
+ processed_text, braille_result['formatted_braille']
112
+ )
113
+ st.markdown(comparison_html, unsafe_allow_html=True)
114
+
115
+ # Display metadata
116
+ metadata = get_braille_metadata(processed_text)
117
+ st.info(f"Translation contains {metadata['word_count']} words, "
118
+ f"{metadata['character_count']} characters, "
119
+ f"{metadata['line_count']} lines.")
120
+
121
+ # Show context summary if available
122
+ if braille_result.get('context_summary'):
123
+ with st.expander("AI Context Understanding"):
124
+ st.write(braille_result['context_summary'])
125
+ else:
126
+ st.error(f"Braille translation failed: {braille_result.get('error', 'Unknown error')}")
127
+
128
+ # Download options placeholder
129
+ st.subheader("Download Options")
130
+ st.info("PDF download will be implemented in Phase 5")
131
+ else:
132
+ st.warning("No text was extracted from the image.")
133
+
134
+ except Exception as e:
135
+ st.error(f"Error processing image: {str(e)}")
models/braille_translator.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import braille
2
+ from transformers import pipeline
3
+ import re
4
+
5
+ # Initialize the summarization pipeline for context understanding
6
+ summarizer = None
7
+
8
+ def get_summarizer():
9
+ """Get or initialize the summarization model."""
10
+ global summarizer
11
+ if summarizer is None:
12
+ try:
13
+ # Use a small, efficient model for summarization
14
+ summarizer = pipeline(
15
+ "summarization",
16
+ model="facebook/bart-large-cnn",
17
+ max_length=100,
18
+ min_length=30,
19
+ truncation=True
20
+ )
21
+ except Exception as e:
22
+ print(f"Error loading summarizer: {str(e)}")
23
+ return summarizer
24
+
25
+ def text_to_braille(text, use_context=True):
26
+ """
27
+ Convert text to Braille, with optional context enhancement.
28
+
29
+ Args:
30
+ text: Text to convert to Braille
31
+ use_context: Whether to use AI to enhance context understanding
32
+
33
+ Returns:
34
+ Dictionary with Braille text and metadata
35
+ """
36
+ try:
37
+ # Basic Braille translation
38
+ braille_text = braille.grade2(text)
39
+
40
+ # If context enhancement is enabled
41
+ context_summary = None
42
+ if use_context and len(text) > 200: # Only for longer texts
43
+ summarizer = get_summarizer()
44
+ if summarizer:
45
+ try:
46
+ # Generate a summary to understand context
47
+ summary_result = summarizer(text)
48
+ if summary_result and len(summary_result) > 0:
49
+ context_summary = summary_result[0]['summary_text']
50
+ except Exception as e:
51
+ print(f"Summarization error: {str(e)}")
52
+
53
+ # Format the Braille text for better readability
54
+ formatted_braille = format_braille_text(braille_text)
55
+
56
+ return {
57
+ 'braille_text': braille_text,
58
+ 'formatted_braille': formatted_braille,
59
+ 'context_summary': context_summary,
60
+ 'success': True
61
+ }
62
+ except Exception as e:
63
+ return {
64
+ 'braille_text': '',
65
+ 'error': str(e),
66
+ 'success': False
67
+ }
68
+
69
+ def format_braille_text(braille_text, line_length=32):
70
+ """
71
+ Format Braille text for better readability.
72
+
73
+ Args:
74
+ braille_text: Raw Braille text
75
+ line_length: Maximum characters per line
76
+
77
+ Returns:
78
+ Formatted Braille text
79
+ """
80
+ # Split text by existing newlines first
81
+ paragraphs = braille_text.split('\n')
82
+ formatted_paragraphs = []
83
+
84
+ for paragraph in paragraphs:
85
+ # Skip empty paragraphs
86
+ if not paragraph.strip():
87
+ formatted_paragraphs.append('')
88
+ continue
89
+
90
+ # Word wrap to line_length
91
+ words = paragraph.split(' ')
92
+ lines = []
93
+ current_line = []
94
+ current_length = 0
95
+
96
+ for word in words:
97
+ # If adding this word exceeds line length, start a new line
98
+ if current_length + len(word) + (1 if current_length > 0 else 0) > line_length:
99
+ lines.append(' '.join(current_line))
100
+ current_line = [word]
101
+ current_length = len(word)
102
+ else:
103
+ if current_length > 0:
104
+ current_length += 1 # Space
105
+ current_line.append(word)
106
+ current_length += len(word)
107
+
108
+ # Add the last line if not empty
109
+ if current_line:
110
+ lines.append(' '.join(current_line))
111
+
112
+ formatted_paragraphs.append('\n'.join(lines))
113
+
114
+ # Join paragraphs with double newlines
115
+ return '\n\n'.join(formatted_paragraphs)
116
+
117
+ def get_braille_metadata(text):
118
+ """
119
+ Get metadata about the Braille translation.
120
+
121
+ Args:
122
+ text: Original text
123
+
124
+ Returns:
125
+ Dictionary with metadata
126
+ """
127
+ word_count = len(re.findall(r'\b\w+\b', text))
128
+ character_count = len(text)
129
+ line_count = len(text.split('\n'))
130
+
131
+ return {
132
+ 'word_count': word_count,
133
+ 'character_count': character_count,
134
+ 'line_count': line_count
135
+ }
models/document_ai.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import LayoutLMv2Processor, LayoutLMv2ForTokenClassification
3
+ from PIL import Image
4
+ import numpy as np
5
+
6
+ # Initialize the model and processor with caching
7
+ processor = None
8
+ model = None
9
+
10
+ def get_document_ai_models():
11
+ """Get or initialize document AI models with proper caching."""
12
+ global processor, model
13
+ if processor is None:
14
+ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
15
+ if model is None:
16
+ model = LayoutLMv2ForTokenClassification.from_pretrained("microsoft/layoutlmv2-base-uncased")
17
+ return processor, model
18
+
19
+ def extract_text_and_layout(image):
20
+ """
21
+ Extract text and layout information using LayoutLMv2.
22
+
23
+ Args:
24
+ image: PIL Image object
25
+
26
+ Returns:
27
+ Dictionary with extracted text and layout information
28
+ """
29
+ # Get models with lazy loading
30
+ processor, model = get_document_ai_models()
31
+
32
+ # Convert numpy array to PIL Image if needed
33
+ if isinstance(image, np.ndarray):
34
+ image = Image.fromarray(image).convert("RGB")
35
+
36
+ # Prepare inputs for the model
37
+ encoding = processor(image, return_tensors="pt")
38
+
39
+ # Get the input_ids (tokenized text)
40
+ input_ids = encoding.input_ids
41
+
42
+ # Get words from input_ids
43
+ tokens = processor.tokenizer.convert_ids_to_tokens(input_ids[0])
44
+ words = processor.tokenizer.convert_tokens_to_string(tokens).split()
45
+
46
+ # Get bounding boxes
47
+ bbox = encoding.bbox[0]
48
+
49
+ return {
50
+ 'words': words,
51
+ 'boxes': bbox.tolist(),
52
+ 'encoding': encoding, # Keep for future processing
53
+ }
models/text_processor.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
+ import torch
3
+ import json
4
+
5
+ # Model ID for a smaller model suitable for Spaces
6
+ MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
7
+ FALLBACK_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
8
+
9
+ # Initialize with None - will be loaded on first use
10
+ tokenizer = None
11
+ text_generation_pipeline = None
12
+
13
+ def get_text_pipeline():
14
+ """
15
+ Initialize or return the text generation pipeline.
16
+ Uses smaller models that work well on Spaces.
17
+ """
18
+ global tokenizer, text_generation_pipeline
19
+
20
+ if text_generation_pipeline is None:
21
+ try:
22
+ # Try to load primary model
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
24
+
25
+ # Use 8-bit quantization to reduce memory usage
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ MODEL_ID,
28
+ device_map="auto",
29
+ torch_dtype=torch.float16,
30
+ load_in_8bit=True
31
+ )
32
+
33
+ # Create the pipeline
34
+ text_generation_pipeline = pipeline(
35
+ "text-generation",
36
+ model=model,
37
+ tokenizer=tokenizer,
38
+ max_new_tokens=1024,
39
+ do_sample=True,
40
+ temperature=0.3,
41
+ top_p=0.95,
42
+ repetition_penalty=1.15
43
+ )
44
+
45
+ except Exception as e:
46
+ print(f"Error loading primary model: {str(e)}")
47
+ print(f"Falling back to {FALLBACK_MODEL_ID}")
48
+
49
+ try:
50
+ # Fall back to Mistral model which is more widely available
51
+ tokenizer = AutoTokenizer.from_pretrained(FALLBACK_MODEL_ID)
52
+ model = AutoModelForCausalLM.from_pretrained(
53
+ FALLBACK_MODEL_ID,
54
+ device_map="auto",
55
+ torch_dtype=torch.float16,
56
+ load_in_8bit=True
57
+ )
58
+
59
+ text_generation_pipeline = pipeline(
60
+ "text-generation",
61
+ model=model,
62
+ tokenizer=tokenizer,
63
+ max_new_tokens=1024,
64
+ do_sample=True,
65
+ temperature=0.3,
66
+ top_p=0.95,
67
+ repetition_penalty=1.15
68
+ )
69
+ except Exception as e2:
70
+ print(f"Error loading fallback model: {str(e2)}")
71
+ return None
72
+
73
+ return text_generation_pipeline
74
+
75
+ def process_menu_text(raw_text):
76
+ """
77
+ Process raw OCR text using LLM to improve structure and readability.
78
+
79
+ Args:
80
+ raw_text: Raw text extracted from menu image
81
+
82
+ Returns:
83
+ Processed and structured menu text
84
+ """
85
+ # Get the pipeline
86
+ pipeline = get_text_pipeline()
87
+
88
+ if pipeline is None:
89
+ # Fallback to simple processing if model not available
90
+ return {
91
+ 'structured_text': raw_text,
92
+ 'menu_sections': [],
93
+ 'success': False,
94
+ 'error': "LLM model not available"
95
+ }
96
+
97
+ # Construct prompt for the LLM
98
+ prompt = f"""<|system|>
99
+ You are an AI assistant that helps structure menu text from OCR.
100
+ Your task is to clean up the text, correct obvious OCR errors, and structure it properly.
101
+ Identify menu sections, items, and prices.
102
+ Format your response as JSON with menu sections, items, and prices.
103
+ <|user|>
104
+ Here is the raw text extracted from a menu image:
105
+
106
+ {raw_text}
107
+
108
+ Please clean and structure this menu text. Format your response as JSON with the following structure:
109
+ {{
110
+ "menu_sections": [
111
+ {{
112
+ "section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
113
+ "items": [
114
+ {{
115
+ "name": "Item name",
116
+ "description": "Item description if available",
117
+ "price": "Price if available"
118
+ }}
119
+ ]
120
+ }}
121
+ ]
122
+ }}
123
+ <|assistant|>
124
+ """
125
+
126
+ try:
127
+ # Generate response from LLM
128
+ response = pipeline(prompt, return_full_text=False)[0]['generated_text']
129
+
130
+ # Extract JSON from response
131
+ response_text = response.strip()
132
+
133
+ # Find JSON in the response
134
+ json_start = response_text.find('{')
135
+ json_end = response_text.rfind('}') + 1
136
+
137
+ if json_start >= 0 and json_end > json_start:
138
+ json_str = response_text[json_start:json_end]
139
+ menu_data = json.loads(json_str)
140
+
141
+ # Reconstruct structured text
142
+ structured_text = ""
143
+ for section in menu_data.get('menu_sections', []):
144
+ structured_text += f"{section.get('section_name', 'Menu Items')}\n"
145
+ structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
146
+
147
+ for item in section.get('items', []):
148
+ structured_text += f"{item.get('name', '')}"
149
+ if item.get('price'):
150
+ structured_text += f" - {item.get('price')}"
151
+ structured_text += "\n"
152
+
153
+ if item.get('description'):
154
+ structured_text += f" {item.get('description')}\n"
155
+
156
+ structured_text += "\n"
157
+
158
+ structured_text += "\n"
159
+
160
+ return {
161
+ 'structured_text': structured_text,
162
+ 'menu_data': menu_data,
163
+ 'success': True
164
+ }
165
+ else:
166
+ # Fallback to simple processing
167
+ return {
168
+ 'structured_text': raw_text,
169
+ 'menu_sections': [],
170
+ 'success': False,
171
+ 'error': "Failed to parse LLM response as JSON"
172
+ }
173
+
174
+ except Exception as e:
175
+ return {
176
+ 'structured_text': raw_text,
177
+ 'menu_sections': [],
178
+ 'success': False,
179
+ 'error': str(e)
180
+ }
models/text_processor_bk.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ import os
3
+ import json
4
+
5
+ # Initialize the model (will download on first run)
6
+ MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf")
7
+
8
+ # Check if model exists, if not provide instructions
9
+ if not os.path.exists(MODEL_PATH):
10
+ os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
11
+ print(f"Model not found at {MODEL_PATH}")
12
+ print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF")
13
+ print("and place it in the models directory")
14
+
15
+ # Initialize model with lazy loading
16
+ llm = None
17
+
18
+ def get_llm():
19
+ """Get or initialize the LLM."""
20
+ global llm
21
+ if llm is None and os.path.exists(MODEL_PATH):
22
+ llm = Llama(
23
+ model_path=MODEL_PATH,
24
+ n_ctx=4096, # Context window
25
+ n_gpu_layers=-1 # Use GPU if available
26
+ )
27
+ return llm
28
+
29
+ def process_menu_text(raw_text):
30
+ """
31
+ Process raw OCR text using LLM to improve structure and readability.
32
+
33
+ Args:
34
+ raw_text: Raw text extracted from menu image
35
+
36
+ Returns:
37
+ Processed and structured menu text
38
+ """
39
+ llm = get_llm()
40
+
41
+ if llm is None:
42
+ # Fallback to simple processing if model not available
43
+ return {
44
+ 'structured_text': raw_text,
45
+ 'menu_sections': [],
46
+ 'success': False,
47
+ 'error': "LLM model not available"
48
+ }
49
+
50
+ # Construct prompt for the LLM
51
+ prompt = f"""
52
+ You are an AI assistant that helps structure menu text from OCR.
53
+ Below is the raw text extracted from a menu image.
54
+ Please clean it up, correct any obvious OCR errors, and structure it properly.
55
+ Identify menu sections, items, and prices.
56
+
57
+ RAW MENU TEXT:
58
+ {raw_text}
59
+
60
+ Format your response as JSON with the following structure:
61
+ {{
62
+ "menu_sections": [
63
+ {{
64
+ "section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
65
+ "items": [
66
+ {{
67
+ "name": "Item name",
68
+ "description": "Item description if available",
69
+ "price": "Price if available"
70
+ }}
71
+ ]
72
+ }}
73
+ ]
74
+ }}
75
+
76
+ Only respond with the JSON, nothing else.
77
+ """
78
+
79
+ try:
80
+ # Generate response from LLM
81
+ response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"])
82
+
83
+ # Extract JSON from response
84
+ response_text = response['choices'][0]['text'].strip()
85
+
86
+ # Find JSON in the response
87
+ json_start = response_text.find('{')
88
+ json_end = response_text.rfind('}') + 1
89
+
90
+ if json_start >= 0 and json_end > json_start:
91
+ json_str = response_text[json_start:json_end]
92
+ menu_data = json.loads(json_str)
93
+
94
+ # Reconstruct structured text
95
+ structured_text = ""
96
+ for section in menu_data.get('menu_sections', []):
97
+ structured_text += f"{section.get('section_name', 'Menu Items')}\n"
98
+ structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
99
+
100
+ for item in section.get('items', []):
101
+ structured_text += f"{item.get('name', '')}"
102
+ if item.get('price'):
103
+ structured_text += f" - {item.get('price')}"
104
+ structured_text += "\n"
105
+
106
+ if item.get('description'):
107
+ structured_text += f" {item.get('description')}\n"
108
+
109
+ structured_text += "\n"
110
+
111
+ structured_text += "\n"
112
+
113
+ return {
114
+ 'structured_text': structured_text,
115
+ 'menu_data': menu_data,
116
+ 'success': True
117
+ }
118
+ else:
119
+ # Fallback to simple processing
120
+ return {
121
+ 'structured_text': raw_text,
122
+ 'menu_sections': [],
123
+ 'success': False,
124
+ 'error': "Failed to parse LLM response as JSON"
125
+ }
126
+
127
+ except Exception as e:
128
+ return {
129
+ 'structured_text': raw_text,
130
+ 'menu_sections': [],
131
+ 'success': False,
132
+ 'error': str(e)
133
+ }
requirements.txt CHANGED
@@ -1,3 +1,13 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.22.0
2
+ pillow>=9.0.0
3
+ numpy>=1.22.0
4
+ torch>=2.0.0
5
+ transformers>=4.30.0
6
+ layoutlmv2>=0.1.0
7
+ pytesseract>=0.3.10
8
+ opencv-python>=4.7.0
9
+ sentence-transformers>=2.2.2
10
+ python-braille>=0.1.0
11
+ reportlab>=3.6.12
12
+
13
+
scripts/download_model.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import requests
4
+ from tqdm import tqdm
5
+ import huggingface_hub
6
+
7
+ # Add parent directory to path
8
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ def download_model():
11
+ """
12
+ Download the Llama 3 model from Hugging Face.
13
+ """
14
+ model_name = "TheBloke/Llama-3-8B-Instruct-GGUF"
15
+ filename = "llama-3-8b-instruct.Q4_K_M.gguf"
16
+
17
+ # Create models directory if it doesn't exist
18
+ models_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "models")
19
+ os.makedirs(models_dir, exist_ok=True)
20
+
21
+ model_path = os.path.join(models_dir, filename)
22
+
23
+ if os.path.exists(model_path):
24
+ print(f"Model already exists at {model_path}")
25
+ return model_path
26
+
27
+ print(f"Downloading {filename} from {model_name}...")
28
+
29
+ try:
30
+ # Download using huggingface_hub
31
+ huggingface_hub.hf_hub_download(
32
+ repo_id=model_name,
33
+ filename=filename,
34
+ local_dir=models_dir,
35
+ local_dir_use_symlinks=False
36
+ )
37
+
38
+ print(f"Model downloaded successfully to {model_path}")
39
+ return model_path
40
+
41
+ except Exception as e:
42
+ print(f"Error downloading model: {str(e)}")
43
+ print("\nManual download instructions:")
44
+ print(f"1. Go to https://huggingface.co/{model_name}/tree/main")
45
+ print(f"2. Download the file {filename}")
46
+ print(f"3. Place it in the models directory at {models_dir}")
47
+ return None
48
+
49
+ if __name__ == "__main__":
50
+ download_model()
tests/test_braille.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+
5
+ # Add the parent directory to the path so we can import our modules
6
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+
8
+ from models.braille_translator import text_to_braille, get_braille_metadata
9
+
10
+ def test_braille_translation(text):
11
+ """
12
+ Test Braille translation on a given text.
13
+
14
+ Args:
15
+ text: Text to translate to Braille
16
+
17
+ Returns:
18
+ Dictionary with test results
19
+ """
20
+ start_time = time.time()
21
+
22
+ # Translate to Braille
23
+ try:
24
+ result = text_to_braille(text, use_context=True)
25
+ success = result['success']
26
+ braille_text = result.get('formatted_braille', '')
27
+ error = result.get('error', None)
28
+ except Exception as e:
29
+ success = False
30
+ braille_text = ''
31
+ error = str(e)
32
+
33
+ end_time = time.time()
34
+
35
+ # Get metadata
36
+ metadata = get_braille_metadata(text)
37
+
38
+ # Compile results
39
+ test_results = {
40
+ 'original_text': text,
41
+ 'success': success,
42
+ 'processing_time': end_time - start_time,
43
+ 'braille_text': braille_text[:100] + '...' if len(braille_text) > 100 else braille_text,
44
+ 'word_count': metadata['word_count'],
45
+ 'character_count': metadata['character_count'],
46
+ 'line_count': metadata['line_count']
47
+ }
48
+
49
+ if not success:
50
+ test_results['error'] = error
51
+
52
+ return test_results
53
+
54
+ def run_braille_tests():
55
+ """
56
+ Run tests on sample menu texts.
57
+
58
+ Returns:
59
+ List of test results
60
+ """
61
+ # Sample menu texts
62
+ sample_texts = [
63
+ # Simple menu item
64
+ "Cheeseburger - $10.99\nServed with fries and a pickle.",
65
+
66
+ # Menu section
67
+ "APPETIZERS\n-----------\nMozzarella Sticks - $7.99\nLoaded Nachos - $9.99\nBuffalo Wings - $12.99",
68
+
69
+ # Complex menu with formatting
70
+ """MAIN COURSE
71
+ -------------
72
+ Grilled Salmon - $18.99
73
+ Fresh Atlantic salmon served with seasonal vegetables and rice pilaf.
74
+
75
+ Filet Mignon - $24.99
76
+ 8oz center-cut filet served with mashed potatoes and asparagus.
77
+
78
+ Vegetable Pasta - $14.99
79
+ Penne pasta with seasonal vegetables in a creamy garlic sauce."""
80
+ ]
81
+
82
+ results = []
83
+
84
+ for i, text in enumerate(sample_texts):
85
+ print(f"\nTesting sample {i+1}...")
86
+ result = test_braille_translation(text)
87
+ results.append(result)
88
+
89
+ # Print progress
90
+ status = "SUCCESS" if result['success'] else "FAILED"
91
+ print(f"Sample {i+1}: {status}")
92
+ print(f"Words: {result['word_count']}, Time: {result['processing_time']:.2f}s")
93
+ print(f"Braille sample: {result['braille_text'][:50]}...")
94
+
95
+ return results
96
+
97
+ if __name__ == "__main__":
98
+ print("Testing Braille translation functionality...")
99
+ results = run_braille_tests()
100
+
101
+ # Print summary
102
+ success_count = sum(1 for r in results if r['success'])
103
+ print(f"\nSummary: {success_count}/{len(results)} tests passed")
104
+
105
+ if results:
106
+ avg_time = sum(r['processing_time'] for r in results) / len(results)
107
+ print(f"Average processing time: {avg_time:.2f} seconds")
tests/test_ocr.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ from PIL import Image
5
+ import numpy as np
6
+
7
+ # Add the parent directory to the path so we can import our modules
8
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ from utils.image_preprocessing import preprocess_image
11
+ from models.document_ai import extract_text_and_layout
12
+
13
+ def test_menu_extraction(image_path):
14
+ """
15
+ Test the OCR extraction on a single menu image.
16
+
17
+ Args:
18
+ image_path: Path to the menu image
19
+
20
+ Returns:
21
+ Dictionary with test results
22
+ """
23
+ start_time = time.time()
24
+
25
+ # Load and preprocess image
26
+ image = Image.open(image_path)
27
+ preprocessed_img = preprocess_image(image)
28
+
29
+ # Extract text
30
+ try:
31
+ result = extract_text_and_layout(preprocessed_img)
32
+ extracted_text = ' '.join(result['words']) if 'words' in result else ''
33
+ success = True
34
+ except Exception as e:
35
+ extracted_text = ''
36
+ success = False
37
+ error = str(e)
38
+
39
+ end_time = time.time()
40
+
41
+ # Compile results
42
+ test_results = {
43
+ 'image_path': image_path,
44
+ 'success': success,
45
+ 'processing_time': end_time - start_time,
46
+ 'extracted_text': extracted_text,
47
+ 'text_length': len(extracted_text),
48
+ 'word_count': len(extracted_text.split()) if extracted_text else 0
49
+ }
50
+
51
+ if not success:
52
+ test_results['error'] = error
53
+
54
+ return test_results
55
+
56
+ def run_batch_test(image_dir):
57
+ """
58
+ Run tests on all images in a directory.
59
+
60
+ Args:
61
+ image_dir: Directory containing menu images
62
+
63
+ Returns:
64
+ List of test results
65
+ """
66
+ results = []
67
+
68
+ for filename in os.listdir(image_dir):
69
+ if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
70
+ image_path = os.path.join(image_dir, filename)
71
+ result = test_menu_extraction(image_path)
72
+ results.append(result)
73
+
74
+ # Print progress
75
+ status = "SUCCESS" if result['success'] else "FAILED"
76
+ print(f"{filename}: {status} - {result['word_count']} words extracted")
77
+
78
+ return results
79
+
80
+ if __name__ == "__main__":
81
+ # Test with sample menus in the assets directory
82
+ sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
83
+ "assets", "sample_menus")
84
+
85
+ if not os.path.exists(sample_dir):
86
+ print(f"Sample directory not found: {sample_dir}")
87
+ print("Creating directory and downloading sample images...")
88
+ os.makedirs(sample_dir, exist_ok=True)
89
+ # You would add code here to download sample images
90
+ # For now, just create a note to add sample images manually
91
+ with open(os.path.join(sample_dir, "README.txt"), "w") as f:
92
+ f.write("Add sample menu images to this directory for testing.")
93
+
94
+ results = run_batch_test(sample_dir)
95
+
96
+ # Print summary
97
+ success_count = sum(1 for r in results if r['success'])
98
+ print(f"\nSummary: {success_count}/{len(results)} tests passed")
99
+
100
+ if results:
101
+ avg_words = sum(r['word_count'] for r in results) / len(results)
102
+ avg_time = sum(r['processing_time'] for r in results) / len(results)
103
+ print(f"Average words extracted: {avg_words:.1f}")
104
+ print(f"Average processing time: {avg_time:.2f} seconds")
utils/__init__.py ADDED
File without changes
utils/braille_display.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def text_to_unicode_braille(braille_text):
2
+ """
3
+ Convert Braille dots notation to Unicode Braille symbols.
4
+
5
+ Args:
6
+ braille_text: Braille text in dots notation
7
+
8
+ Returns:
9
+ Text with Unicode Braille symbols
10
+ """
11
+ # Mapping from Braille dots to Unicode Braille patterns
12
+ # Unicode Braille patterns start at U+2800 (⠀)
13
+ unicode_base = 0x2800
14
+
15
+ # Convert each Braille character to its Unicode equivalent
16
+ unicode_braille = ""
17
+ for char in braille_text:
18
+ # Check if the character is a standard Braille pattern
19
+ if char in "⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿":
20
+ unicode_braille += char
21
+ else:
22
+ # For non-Braille characters, keep them as is
23
+ unicode_braille += char
24
+
25
+ return unicode_braille
26
+
27
+ def create_braille_html(braille_text):
28
+ """
29
+ Create HTML to display Braille with proper styling.
30
+
31
+ Args:
32
+ braille_text: Braille text (either in dots or Unicode)
33
+
34
+ Returns:
35
+ HTML string for displaying Braille
36
+ """
37
+ # Convert to Unicode Braille if not already
38
+ unicode_braille = text_to_unicode_braille(braille_text)
39
+
40
+ # Create HTML with proper styling
41
+ html = f"""
42
+ <div style="font-family: 'Courier New', monospace; font-size: 20px; line-height: 1.5;
43
+ background-color: #f5f5f5; padding: 15px; border-radius: 5px;">
44
+ {unicode_braille.replace('\n', '<br>')}
45
+ </div>
46
+ """
47
+
48
+ return html
49
+
50
+ def create_braille_comparison(text, braille_text):
51
+ """
52
+ Create a side-by-side comparison of text and its Braille representation.
53
+
54
+ Args:
55
+ text: Original text
56
+ braille_text: Braille translation
57
+
58
+ Returns:
59
+ HTML string for displaying the comparison
60
+ """
61
+ # Convert to Unicode Braille
62
+ unicode_braille = text_to_unicode_braille(braille_text)
63
+
64
+ # Split into lines
65
+ text_lines = text.split('\n')
66
+ braille_lines = unicode_braille.split('\n')
67
+
68
+ # Ensure both lists have the same length
69
+ max_lines = max(len(text_lines), len(braille_lines))
70
+ text_lines = text_lines + [''] * (max_lines - len(text_lines))
71
+ braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
72
+
73
+ # Create HTML table for comparison
74
+ html = """
75
+ <style>
76
+ .braille-table {
77
+ width: 100%;
78
+ border-collapse: collapse;
79
+ }
80
+ .braille-table td {
81
+ padding: 8px;
82
+ vertical-align: top;
83
+ border-bottom: 1px solid #ddd;
84
+ }
85
+ .braille-text {
86
+ font-family: 'Courier New', monospace;
87
+ font-size: 20px;
88
+ background-color: #f5f5f5;
89
+ }
90
+ .original-text {
91
+ font-family: Arial, sans-serif;
92
+ }
93
+ </style>
94
+ <table class="braille-table">
95
+ <tr>
96
+ <th>Original Text</th>
97
+ <th>Braille Representation</th>
98
+ </tr>
99
+ """
100
+
101
+ for i in range(max_lines):
102
+ html += f"""
103
+ <tr>
104
+ <td class="original-text">{text_lines[i]}</td>
105
+ <td class="braille-text">{braille_lines[i]}</td>
106
+ </tr>
107
+ """
108
+
109
+ html += "</table>"
110
+
111
+ return html
utils/image_processing.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from PIL import Image
4
+
5
+ def preprocess_image(image, target_size=(1000, 1000)):
6
+ """
7
+ Preprocess image for document analysis.
8
+
9
+ Args:
10
+ image: PIL Image object
11
+ target_size: Tuple of (width, height) to resize to
12
+
13
+ Returns:
14
+ Preprocessed image as numpy array
15
+ """
16
+ # Convert PIL Image to numpy array if needed
17
+ if isinstance(image, Image.Image):
18
+ img_array = np.array(image)
19
+ else:
20
+ img_array = image
21
+
22
+ # Convert to RGB if grayscale
23
+ if len(img_array.shape) == 2:
24
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
25
+ elif img_array.shape[2] == 4:
26
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
27
+
28
+ # Resize image
29
+ img_array = cv2.resize(img_array, target_size)
30
+
31
+ # Enhance contrast
32
+ lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
33
+ l, a, b = cv2.split(lab)
34
+ clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
35
+ cl = clahe.apply(l)
36
+ enhanced_lab = cv2.merge((cl, a, b))
37
+ enhanced_img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
38
+
39
+ return enhanced_img
utils/pdf_generator.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from reportlab.lib.pagesizes import letter
4
+ from reportlab.lib import colors
5
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
6
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
7
+ from reportlab.pdfbase import pdfmetrics
8
+ from reportlab.pdfbase.ttfonts import TTFont
9
+ import io
10
+
11
+ # Try to register a font that supports Braille Unicode characters
12
+ try:
13
+ # Check for common Braille fonts
14
+ font_paths = [
15
+ "DejaVuSans.ttf", # Common on Linux
16
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
17
+ "/System/Library/Fonts/Arial Unicode.ttf", # Mac
18
+ "C:\\Windows\\Fonts\\arial.ttf" # Windows
19
+ ]
20
+
21
+ font_registered = False
22
+ for font_path in font_paths:
23
+ if os.path.exists(font_path):
24
+ pdfmetrics.registerFont(TTFont('BrailleFont', font_path))
25
+ font_registered = True
26
+ break
27
+
28
+ if not font_registered:
29
+ # Use default font if none of the above are found
30
+ print("No suitable font found for Braille. Using default font.")
31
+ except Exception as e:
32
+ print(f"Error registering font: {str(e)}")
33
+
34
+ def create_braille_pdf(original_text, braille_text, title="Menu in Braille"):
35
+ """
36
+ Create a PDF file with original text and its Braille translation.
37
+
38
+ Args:
39
+ original_text: Original text content
40
+ braille_text: Braille translation
41
+ title: PDF title
42
+
43
+ Returns:
44
+ BytesIO object containing the PDF
45
+ """
46
+ # Create a BytesIO object to store the PDF
47
+ buffer = io.BytesIO()
48
+
49
+ # Create the PDF document
50
+ doc = SimpleDocTemplate(
51
+ buffer,
52
+ pagesize=letter,
53
+ rightMargin=72,
54
+ leftMargin=72,
55
+ topMargin=72,
56
+ bottomMargin=72
57
+ )
58
+
59
+ # Define styles
60
+ styles = getSampleStyleSheet()
61
+ title_style = styles['Title']
62
+ heading_style = styles['Heading2']
63
+ normal_style = styles['Normal']
64
+
65
+ # Create a custom style for Braille text
66
+ braille_style = ParagraphStyle(
67
+ 'Braille',
68
+ parent=normal_style,
69
+ fontName='BrailleFont' if font_registered else 'Helvetica',
70
+ fontSize=14,
71
+ leading=18,
72
+ spaceAfter=12
73
+ )
74
+
75
+ # Create the content
76
+ content = []
77
+
78
+ # Add title
79
+ content.append(Paragraph(title, title_style))
80
+ content.append(Spacer(1, 12))
81
+
82
+ # Add original text section
83
+ content.append(Paragraph("Original Text", heading_style))
84
+ content.append(Spacer(1, 6))
85
+
86
+ # Split original text by lines and add each as a paragraph
87
+ for line in original_text.split('\n'):
88
+ if line.strip():
89
+ content.append(Paragraph(line, normal_style))
90
+ else:
91
+ content.append(Spacer(1, 12))
92
+
93
+ content.append(Spacer(1, 24))
94
+
95
+ # Add Braille section
96
+ content.append(Paragraph("Braille Translation", heading_style))
97
+ content.append(Spacer(1, 6))
98
+
99
+ # Split Braille text by lines and add each as a paragraph
100
+ for line in braille_text.split('\n'):
101
+ if line.strip():
102
+ content.append(Paragraph(line, braille_style))
103
+ else:
104
+ content.append(Spacer(1, 12))
105
+
106
+ # Build the PDF
107
+ doc.build(content)
108
+
109
+ # Reset buffer position to the beginning
110
+ buffer.seek(0)
111
+ return buffer
112
+
113
+ def create_braille_pdf_with_comparison(original_text, braille_text, title="Menu in Braille"):
114
+ """
115
+ Create a PDF file with side-by-side comparison of original text and Braille.
116
+
117
+ Args:
118
+ original_text: Original text content
119
+ braille_text: Braille translation
120
+ title: PDF title
121
+
122
+ Returns:
123
+ BytesIO object containing the PDF
124
+ """
125
+ # Create a BytesIO object to store the PDF
126
+ buffer = io.BytesIO()
127
+
128
+ # Create the PDF document
129
+ doc = SimpleDocTemplate(
130
+ buffer,
131
+ pagesize=letter,
132
+ rightMargin=72,
133
+ leftMargin=72,
134
+ topMargin=72,
135
+ bottomMargin=72
136
+ )
137
+
138
+ # Define styles
139
+ styles = getSampleStyleSheet()
140
+ title_style = styles['Title']
141
+ heading_style = styles['Heading2']
142
+ normal_style = styles['Normal']
143
+
144
+ # Create a custom style for Braille text
145
+ braille_style = ParagraphStyle(
146
+ 'Braille',
147
+ parent=normal_style,
148
+ fontName='BrailleFont' if font_registered else 'Helvetica',
149
+ fontSize=14,
150
+ leading=18
151
+ )
152
+
153
+ # Create the content
154
+ content = []
155
+
156
+ # Add title
157
+ content.append(Paragraph(title, title_style))
158
+ content.append(Spacer(1, 12))
159
+
160
+ # Split text into lines
161
+ original_lines = original_text.split('\n')
162
+ braille_lines = braille_text.split('\n')
163
+
164
+ # Ensure both lists have the same length
165
+ max_lines = max(len(original_lines), len(braille_lines))
166
+ original_lines = original_lines + [''] * (max_lines - len(original_lines))
167
+ braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
168
+
169
+ # Create a table for side-by-side comparison
170
+ table_data = [
171
+ [Paragraph("Original Text", heading_style), Paragraph("Braille Translation", heading_style)]
172
+ ]
173
+
174
+ # Add each line as a row in the table
175
+ for i in range(max_lines):
176
+ original_para = Paragraph(original_lines[i], normal_style) if original_lines[i].strip() else Spacer(1, 12)
177
+ braille_para = Paragraph(braille_lines[i], braille_style) if braille_lines[i].strip() else Spacer(1, 12)
178
+ table_data.append([original_para, braille_para])
179
+
180
+ # Create the table
181
+ table = Table(table_data, colWidths=[doc.width/2.0-12, doc.width/2.0-12])
182
+
183
+ # Style the table
184
+ table.setStyle(TableStyle([
185
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
186
+ ('GRID', (0, 0), (-1, 0), 1, colors.black),
187
+ ('BOX', (0, 0), (-1, -1), 1, colors.black),
188
+ ('BACKGROUND', (0, 0), (1, 0), colors.lightgrey)
189
+ ]))
190
+
191
+ content.append(table)
192
+
193
+ # Build the PDF
194
+ doc.build(content)
195
+
196
+ # Reset buffer position to the beginning
197
+ buffer.seek(0)
198
+ return buffer