Spaces:
Runtime error
Runtime error
Upload 17 files
Browse files- README.md +61 -20
- app.py +171 -0
- app_bk.py +51 -0
- app_bk2.py +89 -0
- app_bk3.py +135 -0
- models/braille_translator.py +135 -0
- models/document_ai.py +53 -0
- models/text_processor.py +180 -0
- models/text_processor_bk.py +133 -0
- requirements.txt +13 -3
- scripts/download_model.py +50 -0
- tests/test_braille.py +107 -0
- tests/test_ocr.py +104 -0
- utils/__init__.py +0 -0
- utils/braille_display.py +111 -0
- utils/image_processing.py +39 -0
- utils/pdf_generator.py +198 -0
README.md
CHANGED
@@ -1,20 +1,61 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Menu to Braille Converter
|
2 |
+
|
3 |
+
An AI-powered application that converts food menu images to Braille text for visually impaired users.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- Upload menu images
|
8 |
+
- Extract text using AI-powered document understanding (LayoutLMv2)
|
9 |
+
- Process and structure menu text using LLMs
|
10 |
+
- Convert text to Braille
|
11 |
+
- Display Braille in multiple formats (text, visual, side-by-side)
|
12 |
+
- Download as PDF in different formats
|
13 |
+
|
14 |
+
## Deployment on Hugging Face Spaces
|
15 |
+
|
16 |
+
### Option 1: Direct GitHub Repository Deployment
|
17 |
+
|
18 |
+
1. Fork this repository to your GitHub account
|
19 |
+
2. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
20 |
+
3. Click "Create new Space"
|
21 |
+
4. Choose "Streamlit" as the SDK
|
22 |
+
5. Connect your GitHub account and select this repository
|
23 |
+
6. Choose hardware requirements (recommend at least GPU for better performance)
|
24 |
+
7. Click "Create Space"
|
25 |
+
|
26 |
+
### Option 2: Manual Deployment
|
27 |
+
|
28 |
+
1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
29 |
+
2. Click "Create new Space"
|
30 |
+
3. Choose "Streamlit" as the SDK
|
31 |
+
4. Give your Space a name
|
32 |
+
5. Choose hardware requirements (recommend at least GPU for better performance)
|
33 |
+
6. Click "Create Space"
|
34 |
+
7. Clone the Space repository locally
|
35 |
+
8. Copy all files from this project to the cloned repository
|
36 |
+
9. Push the changes to the Space repository
|
37 |
+
|
38 |
+
## Hardware Requirements
|
39 |
+
|
40 |
+
- **Minimum**: CPU (2 vCPUs, 16 GB RAM)
|
41 |
+
- **Recommended**: GPU (T4 or better)
|
42 |
+
|
43 |
+
## Models Used
|
44 |
+
|
45 |
+
- **Document AI**: microsoft/layoutlmv2-base-uncased
|
46 |
+
- **Text Processing**: meta-llama/Meta-Llama-3-8B-Instruct (with fallback to mistralai/Mistral-7B-Instruct-v0.2)
|
47 |
+
- **Context Enhancement**: facebook/bart-large-cnn
|
48 |
+
|
49 |
+
## Local Development
|
50 |
+
|
51 |
+
1. Clone this repository
|
52 |
+
2. Install dependencies: `pip install -r requirements.txt`
|
53 |
+
3. Run the application: `streamlit run app.py`
|
54 |
+
|
55 |
+
|
56 |
+
## Future Enhancements
|
57 |
+
|
58 |
+
- Improved menu section recognition
|
59 |
+
- Support for multiple languages
|
60 |
+
- Physical Braille printer integration
|
61 |
+
- Mobile app version
|
app.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
import io
|
4 |
+
import numpy as np
|
5 |
+
import base64
|
6 |
+
|
7 |
+
# Import our custom modules
|
8 |
+
from utils.image_preprocessing import preprocess_image
|
9 |
+
from models.document_ai import extract_text_and_layout
|
10 |
+
from models.text_processor import process_menu_text
|
11 |
+
from models.braille_translator import text_to_braille, get_braille_metadata
|
12 |
+
from utils.braille_display import create_braille_html, create_braille_comparison
|
13 |
+
from utils.pdf_generator import create_braille_pdf, create_braille_pdf_with_comparison
|
14 |
+
|
15 |
+
# Function to create a download link for a PDF
|
16 |
+
def get_pdf_download_link(pdf_bytes, filename="braille_menu.pdf", text="Download PDF"):
|
17 |
+
"""Generate a link to download the PDF file."""
|
18 |
+
b64 = base64.b64encode(pdf_bytes.read()).decode()
|
19 |
+
href = f'<a href="data:application/pdf;base64,{b64}" download="{filename}">{text}</a>'
|
20 |
+
return href
|
21 |
+
|
22 |
+
# App title and description
|
23 |
+
st.title("Menu to Braille Converter")
|
24 |
+
st.write("Upload a menu image to convert it to Braille text")
|
25 |
+
|
26 |
+
# Sidebar for model settings
|
27 |
+
st.sidebar.header("Settings")
|
28 |
+
use_llm = st.sidebar.checkbox("Use LLM for text processing", value=True)
|
29 |
+
use_context = st.sidebar.checkbox("Use AI for context enhancement", value=True)
|
30 |
+
show_comparison = st.sidebar.checkbox("Show text/Braille comparison", value=True)
|
31 |
+
|
32 |
+
# Add information about the application
|
33 |
+
st.sidebar.markdown("---")
|
34 |
+
st.sidebar.subheader("About")
|
35 |
+
st.sidebar.info(
|
36 |
+
"This application converts menu images to Braille text using AI. "
|
37 |
+
"It extracts text from images using document AI, processes the text with LLMs, "
|
38 |
+
"and converts to Braille."
|
39 |
+
)
|
40 |
+
|
41 |
+
# File uploader
|
42 |
+
uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
|
43 |
+
|
44 |
+
# Display uploaded image and process it
|
45 |
+
if uploaded_file is not None:
|
46 |
+
# Load and display image
|
47 |
+
image = Image.open(uploaded_file)
|
48 |
+
st.image(image, caption="Uploaded Menu", use_column_width=True)
|
49 |
+
|
50 |
+
# Add a button to process the image
|
51 |
+
if st.button("Process Menu"):
|
52 |
+
with st.spinner("Processing image..."):
|
53 |
+
# Preprocess the image
|
54 |
+
preprocessed_img = preprocess_image(image)
|
55 |
+
|
56 |
+
# Extract text using LayoutLMv2
|
57 |
+
try:
|
58 |
+
result = extract_text_and_layout(preprocessed_img)
|
59 |
+
|
60 |
+
# Display extracted words
|
61 |
+
if result['words']:
|
62 |
+
raw_text = ' '.join(result['words'])
|
63 |
+
|
64 |
+
# Show raw text in an expandable section
|
65 |
+
with st.expander("Raw Extracted Text"):
|
66 |
+
st.text_area("Raw OCR Output", raw_text, height=150)
|
67 |
+
|
68 |
+
# Process text with LLM if enabled
|
69 |
+
if use_llm:
|
70 |
+
st.subheader("Processed Menu Text")
|
71 |
+
with st.spinner("Enhancing text with AI..."):
|
72 |
+
processed_result = process_menu_text(raw_text)
|
73 |
+
|
74 |
+
if processed_result['success']:
|
75 |
+
processed_text = processed_result['structured_text']
|
76 |
+
st.text_area("Structured Menu Text", processed_text, height=200)
|
77 |
+
|
78 |
+
# Store the processed result for later use
|
79 |
+
st.session_state.processed_text = processed_text
|
80 |
+
st.session_state.menu_data = processed_result.get('menu_data', {})
|
81 |
+
else:
|
82 |
+
st.warning(f"AI processing failed: {processed_result.get('error', 'Unknown error')}")
|
83 |
+
processed_text = raw_text
|
84 |
+
st.text_area("Text Output", processed_text, height=200)
|
85 |
+
st.session_state.processed_text = processed_text
|
86 |
+
else:
|
87 |
+
# Just use the raw text
|
88 |
+
st.subheader("Extracted Text")
|
89 |
+
processed_text = raw_text
|
90 |
+
st.text_area("Text Output", processed_text, height=200)
|
91 |
+
st.session_state.processed_text = processed_text
|
92 |
+
|
93 |
+
# Translate to Braille
|
94 |
+
st.subheader("Braille Translation")
|
95 |
+
with st.spinner("Translating to Braille..."):
|
96 |
+
braille_result = text_to_braille(processed_text, use_context=use_context)
|
97 |
+
|
98 |
+
if braille_result['success']:
|
99 |
+
# Store for download
|
100 |
+
st.session_state.braille_text = braille_result['formatted_braille']
|
101 |
+
|
102 |
+
# Display options
|
103 |
+
display_option = st.radio(
|
104 |
+
"Display format:",
|
105 |
+
["Text Only", "Visual Braille", "Side-by-Side Comparison"]
|
106 |
+
)
|
107 |
+
|
108 |
+
if display_option == "Text Only":
|
109 |
+
# Display Braille text as plain text
|
110 |
+
st.text_area("Braille Output", braille_result['formatted_braille'], height=300)
|
111 |
+
|
112 |
+
elif display_option == "Visual Braille":
|
113 |
+
# Display Braille with visual representation
|
114 |
+
braille_html = create_braille_html(braille_result['formatted_braille'])
|
115 |
+
st.markdown(braille_html, unsafe_allow_html=True)
|
116 |
+
|
117 |
+
else: # Side-by-Side Comparison
|
118 |
+
# Display side-by-side comparison
|
119 |
+
comparison_html = create_braille_comparison(
|
120 |
+
processed_text, braille_result['formatted_braille']
|
121 |
+
)
|
122 |
+
st.markdown(comparison_html, unsafe_allow_html=True)
|
123 |
+
|
124 |
+
# Display metadata
|
125 |
+
metadata = get_braille_metadata(processed_text)
|
126 |
+
st.info(f"Translation contains {metadata['word_count']} words, "
|
127 |
+
f"{metadata['character_count']} characters, "
|
128 |
+
f"{metadata['line_count']} lines.")
|
129 |
+
|
130 |
+
# Show context summary if available
|
131 |
+
if braille_result.get('context_summary'):
|
132 |
+
with st.expander("AI Context Understanding"):
|
133 |
+
st.write(braille_result['context_summary'])
|
134 |
+
|
135 |
+
# PDF Download section
|
136 |
+
st.subheader("Download Options")
|
137 |
+
|
138 |
+
pdf_option = st.selectbox(
|
139 |
+
"Select PDF format:",
|
140 |
+
["Sequential (Text then Braille)", "Side-by-Side Comparison"]
|
141 |
+
)
|
142 |
+
|
143 |
+
pdf_title = st.text_input("PDF Title:", "Menu in Braille")
|
144 |
+
|
145 |
+
if st.button("Generate PDF"):
|
146 |
+
with st.spinner("Generating PDF..."):
|
147 |
+
if pdf_option == "Sequential (Text then Braille)":
|
148 |
+
pdf_buffer = create_braille_pdf(
|
149 |
+
processed_text,
|
150 |
+
braille_result['formatted_braille'],
|
151 |
+
title=pdf_title
|
152 |
+
)
|
153 |
+
else: # Side-by-Side Comparison
|
154 |
+
pdf_buffer = create_braille_pdf_with_comparison(
|
155 |
+
processed_text,
|
156 |
+
braille_result['formatted_braille'],
|
157 |
+
title=pdf_title
|
158 |
+
)
|
159 |
+
|
160 |
+
# Create download link
|
161 |
+
st.markdown(
|
162 |
+
get_pdf_download_link(pdf_buffer, f"{pdf_title.lower().replace(' ', '_')}.pdf"),
|
163 |
+
unsafe_allow_html=True
|
164 |
+
)
|
165 |
+
else:
|
166 |
+
st.error(f"Braille translation failed: {braille_result.get('error', 'Unknown error')}")
|
167 |
+
else:
|
168 |
+
st.warning("No text was extracted from the image.")
|
169 |
+
|
170 |
+
except Exception as e:
|
171 |
+
st.error(f"Error processing image: {str(e)}")
|
app_bk.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
import io
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# Import our custom modules
|
7 |
+
from utils.image_preprocessing import preprocess_image
|
8 |
+
from models.document_ai import extract_text_and_layout
|
9 |
+
|
10 |
+
# App title and description
|
11 |
+
st.title("Menu to Braille Converter")
|
12 |
+
st.write("Upload a menu image to convert it to Braille text")
|
13 |
+
|
14 |
+
# File uploader
|
15 |
+
uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
|
16 |
+
|
17 |
+
# Display uploaded image and process it
|
18 |
+
if uploaded_file is not None:
|
19 |
+
# Load and display image
|
20 |
+
image = Image.open(uploaded_file)
|
21 |
+
st.image(image, caption="Uploaded Menu", use_column_width=True)
|
22 |
+
|
23 |
+
# Add a button to process the image
|
24 |
+
if st.button("Extract Text"):
|
25 |
+
with st.spinner("Processing image..."):
|
26 |
+
# Preprocess the image
|
27 |
+
st.subheader("Preprocessed Image")
|
28 |
+
preprocessed_img = preprocess_image(image)
|
29 |
+
st.image(preprocessed_img, caption="Preprocessed Image", use_column_width=True)
|
30 |
+
|
31 |
+
# Extract text using LayoutLMv2
|
32 |
+
st.subheader("Extracted Text")
|
33 |
+
try:
|
34 |
+
result = extract_text_and_layout(preprocessed_img)
|
35 |
+
|
36 |
+
# Display extracted words
|
37 |
+
if result['words']:
|
38 |
+
text = ' '.join(result['words'])
|
39 |
+
st.text_area("Extracted Text", text, height=200)
|
40 |
+
else:
|
41 |
+
st.warning("No text was extracted from the image.")
|
42 |
+
|
43 |
+
except Exception as e:
|
44 |
+
st.error(f"Error processing image: {str(e)}")
|
45 |
+
|
46 |
+
# Placeholders for future functionality
|
47 |
+
st.subheader("Braille Translation")
|
48 |
+
st.info("Braille translation will be implemented in Phase 4")
|
49 |
+
|
50 |
+
st.subheader("Download Options")
|
51 |
+
st.info("PDF download will be implemented in Phase 5")
|
app_bk2.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
import io
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# Import our custom modules
|
7 |
+
from utils.image_preprocessing import preprocess_image
|
8 |
+
from models.document_ai import extract_text_and_layout
|
9 |
+
from models.text_processor import process_menu_text
|
10 |
+
|
11 |
+
# App title and description
|
12 |
+
st.title("Menu to Braille Converter")
|
13 |
+
st.write("Upload a menu image to convert it to Braille text")
|
14 |
+
|
15 |
+
# Sidebar for model settings
|
16 |
+
st.sidebar.header("Settings")
|
17 |
+
use_llm = st.sidebar.checkbox("Use LLM for text processing", value=True)
|
18 |
+
|
19 |
+
# Add information about the application
|
20 |
+
st.sidebar.markdown("---")
|
21 |
+
st.sidebar.subheader("About")
|
22 |
+
st.sidebar.info(
|
23 |
+
"This application converts menu images to Braille text using AI. "
|
24 |
+
"It extracts text from images using document AI, processes the text with LLMs, "
|
25 |
+
"and will convert to Braille in future versions."
|
26 |
+
)
|
27 |
+
|
28 |
+
# File uploader
|
29 |
+
uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
|
30 |
+
|
31 |
+
# Display uploaded image and process it
|
32 |
+
if uploaded_file is not None:
|
33 |
+
# Load and display image
|
34 |
+
image = Image.open(uploaded_file)
|
35 |
+
st.image(image, caption="Uploaded Menu", use_column_width=True)
|
36 |
+
|
37 |
+
# Add a button to process the image
|
38 |
+
if st.button("Process Menu"):
|
39 |
+
with st.spinner("Processing image..."):
|
40 |
+
# Preprocess the image
|
41 |
+
preprocessed_img = preprocess_image(image)
|
42 |
+
|
43 |
+
# Extract text using LayoutLMv2
|
44 |
+
try:
|
45 |
+
result = extract_text_and_layout(preprocessed_img)
|
46 |
+
|
47 |
+
# Display extracted words
|
48 |
+
if result['words']:
|
49 |
+
raw_text = ' '.join(result['words'])
|
50 |
+
|
51 |
+
# Show raw text in an expandable section
|
52 |
+
with st.expander("Raw Extracted Text"):
|
53 |
+
st.text_area("Raw OCR Output", raw_text, height=150)
|
54 |
+
|
55 |
+
# Process text with LLM if enabled
|
56 |
+
if use_llm:
|
57 |
+
st.subheader("Processed Menu Text")
|
58 |
+
with st.spinner("Enhancing text with AI..."):
|
59 |
+
processed_result = process_menu_text(raw_text)
|
60 |
+
|
61 |
+
if processed_result['success']:
|
62 |
+
st.text_area("Structured Menu Text",
|
63 |
+
processed_result['structured_text'],
|
64 |
+
height=300)
|
65 |
+
|
66 |
+
# Store the processed result for later use
|
67 |
+
st.session_state.processed_text = processed_result['structured_text']
|
68 |
+
st.session_state.menu_data = processed_result.get('menu_data', {})
|
69 |
+
else:
|
70 |
+
st.warning(f"AI processing failed: {processed_result.get('error', 'Unknown error')}")
|
71 |
+
st.text_area("Text Output", raw_text, height=300)
|
72 |
+
st.session_state.processed_text = raw_text
|
73 |
+
else:
|
74 |
+
# Just use the raw text
|
75 |
+
st.subheader("Extracted Text")
|
76 |
+
st.text_area("Text Output", raw_text, height=300)
|
77 |
+
st.session_state.processed_text = raw_text
|
78 |
+
else:
|
79 |
+
st.warning("No text was extracted from the image.")
|
80 |
+
|
81 |
+
except Exception as e:
|
82 |
+
st.error(f"Error processing image: {str(e)}")
|
83 |
+
|
84 |
+
# Placeholders for future functionality
|
85 |
+
st.subheader("Braille Translation")
|
86 |
+
st.info("Braille translation will be implemented in Phase 4")
|
87 |
+
|
88 |
+
st.subheader("Download Options")
|
89 |
+
st.info("PDF download will be implemented in Phase 5")
|
app_bk3.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
import io
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# Import our custom modules
|
7 |
+
from utils.image_preprocessing import preprocess_image
|
8 |
+
from models.document_ai import extract_text_and_layout
|
9 |
+
from models.text_processor import process_menu_text
|
10 |
+
from models.braille_translator import text_to_braille, get_braille_metadata
|
11 |
+
from utils.braille_display import create_braille_html, create_braille_comparison
|
12 |
+
|
13 |
+
# App title and description
|
14 |
+
st.title("Menu to Braille Converter")
|
15 |
+
st.write("Upload a menu image to convert it to Braille text")
|
16 |
+
|
17 |
+
# Sidebar for model settings
|
18 |
+
st.sidebar.header("Settings")
|
19 |
+
use_llm = st.sidebar.checkbox("Use LLM for text processing", value=True)
|
20 |
+
use_context = st.sidebar.checkbox("Use AI for context enhancement", value=True)
|
21 |
+
show_comparison = st.sidebar.checkbox("Show text/Braille comparison", value=True)
|
22 |
+
|
23 |
+
# Add information about the application
|
24 |
+
st.sidebar.markdown("---")
|
25 |
+
st.sidebar.subheader("About")
|
26 |
+
st.sidebar.info(
|
27 |
+
"This application converts menu images to Braille text using AI. "
|
28 |
+
"It extracts text from images using document AI, processes the text with LLMs, "
|
29 |
+
"and converts to Braille."
|
30 |
+
)
|
31 |
+
|
32 |
+
# File uploader
|
33 |
+
uploaded_file = st.file_uploader("Choose a menu image...", type=["jpg", "jpeg", "png"])
|
34 |
+
|
35 |
+
# Display uploaded image and process it
|
36 |
+
if uploaded_file is not None:
|
37 |
+
# Load and display image
|
38 |
+
image = Image.open(uploaded_file)
|
39 |
+
st.image(image, caption="Uploaded Menu", use_column_width=True)
|
40 |
+
|
41 |
+
# Add a button to process the image
|
42 |
+
if st.button("Process Menu"):
|
43 |
+
with st.spinner("Processing image..."):
|
44 |
+
# Preprocess the image
|
45 |
+
preprocessed_img = preprocess_image(image)
|
46 |
+
|
47 |
+
# Extract text using LayoutLMv2
|
48 |
+
try:
|
49 |
+
result = extract_text_and_layout(preprocessed_img)
|
50 |
+
|
51 |
+
# Display extracted words
|
52 |
+
if result['words']:
|
53 |
+
raw_text = ' '.join(result['words'])
|
54 |
+
|
55 |
+
# Show raw text in an expandable section
|
56 |
+
with st.expander("Raw Extracted Text"):
|
57 |
+
st.text_area("Raw OCR Output", raw_text, height=150)
|
58 |
+
|
59 |
+
# Process text with LLM if enabled
|
60 |
+
if use_llm:
|
61 |
+
st.subheader("Processed Menu Text")
|
62 |
+
with st.spinner("Enhancing text with AI..."):
|
63 |
+
processed_result = process_menu_text(raw_text)
|
64 |
+
|
65 |
+
if processed_result['success']:
|
66 |
+
processed_text = processed_result['structured_text']
|
67 |
+
st.text_area("Structured Menu Text", processed_text, height=200)
|
68 |
+
|
69 |
+
# Store the processed result for later use
|
70 |
+
st.session_state.processed_text = processed_text
|
71 |
+
st.session_state.menu_data = processed_result.get('menu_data', {})
|
72 |
+
else:
|
73 |
+
st.warning(f"AI processing failed: {processed_result.get('error', 'Unknown error')}")
|
74 |
+
processed_text = raw_text
|
75 |
+
st.text_area("Text Output", processed_text, height=200)
|
76 |
+
st.session_state.processed_text = processed_text
|
77 |
+
else:
|
78 |
+
# Just use the raw text
|
79 |
+
st.subheader("Extracted Text")
|
80 |
+
processed_text = raw_text
|
81 |
+
st.text_area("Text Output", processed_text, height=200)
|
82 |
+
st.session_state.processed_text = processed_text
|
83 |
+
|
84 |
+
# Translate to Braille
|
85 |
+
st.subheader("Braille Translation")
|
86 |
+
with st.spinner("Translating to Braille..."):
|
87 |
+
braille_result = text_to_braille(processed_text, use_context=use_context)
|
88 |
+
|
89 |
+
if braille_result['success']:
|
90 |
+
# Store for download
|
91 |
+
st.session_state.braille_text = braille_result['formatted_braille']
|
92 |
+
|
93 |
+
# Display options
|
94 |
+
display_option = st.radio(
|
95 |
+
"Display format:",
|
96 |
+
["Text Only", "Visual Braille", "Side-by-Side Comparison"]
|
97 |
+
)
|
98 |
+
|
99 |
+
if display_option == "Text Only":
|
100 |
+
# Display Braille text as plain text
|
101 |
+
st.text_area("Braille Output", braille_result['formatted_braille'], height=300)
|
102 |
+
|
103 |
+
elif display_option == "Visual Braille":
|
104 |
+
# Display Braille with visual representation
|
105 |
+
braille_html = create_braille_html(braille_result['formatted_braille'])
|
106 |
+
st.markdown(braille_html, unsafe_allow_html=True)
|
107 |
+
|
108 |
+
else: # Side-by-Side Comparison
|
109 |
+
# Display side-by-side comparison
|
110 |
+
comparison_html = create_braille_comparison(
|
111 |
+
processed_text, braille_result['formatted_braille']
|
112 |
+
)
|
113 |
+
st.markdown(comparison_html, unsafe_allow_html=True)
|
114 |
+
|
115 |
+
# Display metadata
|
116 |
+
metadata = get_braille_metadata(processed_text)
|
117 |
+
st.info(f"Translation contains {metadata['word_count']} words, "
|
118 |
+
f"{metadata['character_count']} characters, "
|
119 |
+
f"{metadata['line_count']} lines.")
|
120 |
+
|
121 |
+
# Show context summary if available
|
122 |
+
if braille_result.get('context_summary'):
|
123 |
+
with st.expander("AI Context Understanding"):
|
124 |
+
st.write(braille_result['context_summary'])
|
125 |
+
else:
|
126 |
+
st.error(f"Braille translation failed: {braille_result.get('error', 'Unknown error')}")
|
127 |
+
|
128 |
+
# Download options placeholder
|
129 |
+
st.subheader("Download Options")
|
130 |
+
st.info("PDF download will be implemented in Phase 5")
|
131 |
+
else:
|
132 |
+
st.warning("No text was extracted from the image.")
|
133 |
+
|
134 |
+
except Exception as e:
|
135 |
+
st.error(f"Error processing image: {str(e)}")
|
models/braille_translator.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import braille
|
2 |
+
from transformers import pipeline
|
3 |
+
import re
|
4 |
+
|
5 |
+
# Initialize the summarization pipeline for context understanding
|
6 |
+
summarizer = None
|
7 |
+
|
8 |
+
def get_summarizer():
|
9 |
+
"""Get or initialize the summarization model."""
|
10 |
+
global summarizer
|
11 |
+
if summarizer is None:
|
12 |
+
try:
|
13 |
+
# Use a small, efficient model for summarization
|
14 |
+
summarizer = pipeline(
|
15 |
+
"summarization",
|
16 |
+
model="facebook/bart-large-cnn",
|
17 |
+
max_length=100,
|
18 |
+
min_length=30,
|
19 |
+
truncation=True
|
20 |
+
)
|
21 |
+
except Exception as e:
|
22 |
+
print(f"Error loading summarizer: {str(e)}")
|
23 |
+
return summarizer
|
24 |
+
|
25 |
+
def text_to_braille(text, use_context=True):
|
26 |
+
"""
|
27 |
+
Convert text to Braille, with optional context enhancement.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
text: Text to convert to Braille
|
31 |
+
use_context: Whether to use AI to enhance context understanding
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
Dictionary with Braille text and metadata
|
35 |
+
"""
|
36 |
+
try:
|
37 |
+
# Basic Braille translation
|
38 |
+
braille_text = braille.grade2(text)
|
39 |
+
|
40 |
+
# If context enhancement is enabled
|
41 |
+
context_summary = None
|
42 |
+
if use_context and len(text) > 200: # Only for longer texts
|
43 |
+
summarizer = get_summarizer()
|
44 |
+
if summarizer:
|
45 |
+
try:
|
46 |
+
# Generate a summary to understand context
|
47 |
+
summary_result = summarizer(text)
|
48 |
+
if summary_result and len(summary_result) > 0:
|
49 |
+
context_summary = summary_result[0]['summary_text']
|
50 |
+
except Exception as e:
|
51 |
+
print(f"Summarization error: {str(e)}")
|
52 |
+
|
53 |
+
# Format the Braille text for better readability
|
54 |
+
formatted_braille = format_braille_text(braille_text)
|
55 |
+
|
56 |
+
return {
|
57 |
+
'braille_text': braille_text,
|
58 |
+
'formatted_braille': formatted_braille,
|
59 |
+
'context_summary': context_summary,
|
60 |
+
'success': True
|
61 |
+
}
|
62 |
+
except Exception as e:
|
63 |
+
return {
|
64 |
+
'braille_text': '',
|
65 |
+
'error': str(e),
|
66 |
+
'success': False
|
67 |
+
}
|
68 |
+
|
69 |
+
def format_braille_text(braille_text, line_length=32):
|
70 |
+
"""
|
71 |
+
Format Braille text for better readability.
|
72 |
+
|
73 |
+
Args:
|
74 |
+
braille_text: Raw Braille text
|
75 |
+
line_length: Maximum characters per line
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
Formatted Braille text
|
79 |
+
"""
|
80 |
+
# Split text by existing newlines first
|
81 |
+
paragraphs = braille_text.split('\n')
|
82 |
+
formatted_paragraphs = []
|
83 |
+
|
84 |
+
for paragraph in paragraphs:
|
85 |
+
# Skip empty paragraphs
|
86 |
+
if not paragraph.strip():
|
87 |
+
formatted_paragraphs.append('')
|
88 |
+
continue
|
89 |
+
|
90 |
+
# Word wrap to line_length
|
91 |
+
words = paragraph.split(' ')
|
92 |
+
lines = []
|
93 |
+
current_line = []
|
94 |
+
current_length = 0
|
95 |
+
|
96 |
+
for word in words:
|
97 |
+
# If adding this word exceeds line length, start a new line
|
98 |
+
if current_length + len(word) + (1 if current_length > 0 else 0) > line_length:
|
99 |
+
lines.append(' '.join(current_line))
|
100 |
+
current_line = [word]
|
101 |
+
current_length = len(word)
|
102 |
+
else:
|
103 |
+
if current_length > 0:
|
104 |
+
current_length += 1 # Space
|
105 |
+
current_line.append(word)
|
106 |
+
current_length += len(word)
|
107 |
+
|
108 |
+
# Add the last line if not empty
|
109 |
+
if current_line:
|
110 |
+
lines.append(' '.join(current_line))
|
111 |
+
|
112 |
+
formatted_paragraphs.append('\n'.join(lines))
|
113 |
+
|
114 |
+
# Join paragraphs with double newlines
|
115 |
+
return '\n\n'.join(formatted_paragraphs)
|
116 |
+
|
117 |
+
def get_braille_metadata(text):
|
118 |
+
"""
|
119 |
+
Get metadata about the Braille translation.
|
120 |
+
|
121 |
+
Args:
|
122 |
+
text: Original text
|
123 |
+
|
124 |
+
Returns:
|
125 |
+
Dictionary with metadata
|
126 |
+
"""
|
127 |
+
word_count = len(re.findall(r'\b\w+\b', text))
|
128 |
+
character_count = len(text)
|
129 |
+
line_count = len(text.split('\n'))
|
130 |
+
|
131 |
+
return {
|
132 |
+
'word_count': word_count,
|
133 |
+
'character_count': character_count,
|
134 |
+
'line_count': line_count
|
135 |
+
}
|
models/document_ai.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import LayoutLMv2Processor, LayoutLMv2ForTokenClassification
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# Initialize the model and processor with caching
|
7 |
+
processor = None
|
8 |
+
model = None
|
9 |
+
|
10 |
+
def get_document_ai_models():
|
11 |
+
"""Get or initialize document AI models with proper caching."""
|
12 |
+
global processor, model
|
13 |
+
if processor is None:
|
14 |
+
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
15 |
+
if model is None:
|
16 |
+
model = LayoutLMv2ForTokenClassification.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
17 |
+
return processor, model
|
18 |
+
|
19 |
+
def extract_text_and_layout(image):
|
20 |
+
"""
|
21 |
+
Extract text and layout information using LayoutLMv2.
|
22 |
+
|
23 |
+
Args:
|
24 |
+
image: PIL Image object
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
Dictionary with extracted text and layout information
|
28 |
+
"""
|
29 |
+
# Get models with lazy loading
|
30 |
+
processor, model = get_document_ai_models()
|
31 |
+
|
32 |
+
# Convert numpy array to PIL Image if needed
|
33 |
+
if isinstance(image, np.ndarray):
|
34 |
+
image = Image.fromarray(image).convert("RGB")
|
35 |
+
|
36 |
+
# Prepare inputs for the model
|
37 |
+
encoding = processor(image, return_tensors="pt")
|
38 |
+
|
39 |
+
# Get the input_ids (tokenized text)
|
40 |
+
input_ids = encoding.input_ids
|
41 |
+
|
42 |
+
# Get words from input_ids
|
43 |
+
tokens = processor.tokenizer.convert_ids_to_tokens(input_ids[0])
|
44 |
+
words = processor.tokenizer.convert_tokens_to_string(tokens).split()
|
45 |
+
|
46 |
+
# Get bounding boxes
|
47 |
+
bbox = encoding.bbox[0]
|
48 |
+
|
49 |
+
return {
|
50 |
+
'words': words,
|
51 |
+
'boxes': bbox.tolist(),
|
52 |
+
'encoding': encoding, # Keep for future processing
|
53 |
+
}
|
models/text_processor.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
2 |
+
import torch
|
3 |
+
import json
|
4 |
+
|
5 |
+
# Model ID for a smaller model suitable for Spaces
|
6 |
+
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
|
7 |
+
FALLBACK_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
|
8 |
+
|
9 |
+
# Initialize with None - will be loaded on first use
|
10 |
+
tokenizer = None
|
11 |
+
text_generation_pipeline = None
|
12 |
+
|
13 |
+
def get_text_pipeline():
|
14 |
+
"""
|
15 |
+
Initialize or return the text generation pipeline.
|
16 |
+
Uses smaller models that work well on Spaces.
|
17 |
+
"""
|
18 |
+
global tokenizer, text_generation_pipeline
|
19 |
+
|
20 |
+
if text_generation_pipeline is None:
|
21 |
+
try:
|
22 |
+
# Try to load primary model
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
24 |
+
|
25 |
+
# Use 8-bit quantization to reduce memory usage
|
26 |
+
model = AutoModelForCausalLM.from_pretrained(
|
27 |
+
MODEL_ID,
|
28 |
+
device_map="auto",
|
29 |
+
torch_dtype=torch.float16,
|
30 |
+
load_in_8bit=True
|
31 |
+
)
|
32 |
+
|
33 |
+
# Create the pipeline
|
34 |
+
text_generation_pipeline = pipeline(
|
35 |
+
"text-generation",
|
36 |
+
model=model,
|
37 |
+
tokenizer=tokenizer,
|
38 |
+
max_new_tokens=1024,
|
39 |
+
do_sample=True,
|
40 |
+
temperature=0.3,
|
41 |
+
top_p=0.95,
|
42 |
+
repetition_penalty=1.15
|
43 |
+
)
|
44 |
+
|
45 |
+
except Exception as e:
|
46 |
+
print(f"Error loading primary model: {str(e)}")
|
47 |
+
print(f"Falling back to {FALLBACK_MODEL_ID}")
|
48 |
+
|
49 |
+
try:
|
50 |
+
# Fall back to Mistral model which is more widely available
|
51 |
+
tokenizer = AutoTokenizer.from_pretrained(FALLBACK_MODEL_ID)
|
52 |
+
model = AutoModelForCausalLM.from_pretrained(
|
53 |
+
FALLBACK_MODEL_ID,
|
54 |
+
device_map="auto",
|
55 |
+
torch_dtype=torch.float16,
|
56 |
+
load_in_8bit=True
|
57 |
+
)
|
58 |
+
|
59 |
+
text_generation_pipeline = pipeline(
|
60 |
+
"text-generation",
|
61 |
+
model=model,
|
62 |
+
tokenizer=tokenizer,
|
63 |
+
max_new_tokens=1024,
|
64 |
+
do_sample=True,
|
65 |
+
temperature=0.3,
|
66 |
+
top_p=0.95,
|
67 |
+
repetition_penalty=1.15
|
68 |
+
)
|
69 |
+
except Exception as e2:
|
70 |
+
print(f"Error loading fallback model: {str(e2)}")
|
71 |
+
return None
|
72 |
+
|
73 |
+
return text_generation_pipeline
|
74 |
+
|
75 |
+
def process_menu_text(raw_text):
|
76 |
+
"""
|
77 |
+
Process raw OCR text using LLM to improve structure and readability.
|
78 |
+
|
79 |
+
Args:
|
80 |
+
raw_text: Raw text extracted from menu image
|
81 |
+
|
82 |
+
Returns:
|
83 |
+
Processed and structured menu text
|
84 |
+
"""
|
85 |
+
# Get the pipeline
|
86 |
+
pipeline = get_text_pipeline()
|
87 |
+
|
88 |
+
if pipeline is None:
|
89 |
+
# Fallback to simple processing if model not available
|
90 |
+
return {
|
91 |
+
'structured_text': raw_text,
|
92 |
+
'menu_sections': [],
|
93 |
+
'success': False,
|
94 |
+
'error': "LLM model not available"
|
95 |
+
}
|
96 |
+
|
97 |
+
# Construct prompt for the LLM
|
98 |
+
prompt = f"""<|system|>
|
99 |
+
You are an AI assistant that helps structure menu text from OCR.
|
100 |
+
Your task is to clean up the text, correct obvious OCR errors, and structure it properly.
|
101 |
+
Identify menu sections, items, and prices.
|
102 |
+
Format your response as JSON with menu sections, items, and prices.
|
103 |
+
<|user|>
|
104 |
+
Here is the raw text extracted from a menu image:
|
105 |
+
|
106 |
+
{raw_text}
|
107 |
+
|
108 |
+
Please clean and structure this menu text. Format your response as JSON with the following structure:
|
109 |
+
{{
|
110 |
+
"menu_sections": [
|
111 |
+
{{
|
112 |
+
"section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
|
113 |
+
"items": [
|
114 |
+
{{
|
115 |
+
"name": "Item name",
|
116 |
+
"description": "Item description if available",
|
117 |
+
"price": "Price if available"
|
118 |
+
}}
|
119 |
+
]
|
120 |
+
}}
|
121 |
+
]
|
122 |
+
}}
|
123 |
+
<|assistant|>
|
124 |
+
"""
|
125 |
+
|
126 |
+
try:
|
127 |
+
# Generate response from LLM
|
128 |
+
response = pipeline(prompt, return_full_text=False)[0]['generated_text']
|
129 |
+
|
130 |
+
# Extract JSON from response
|
131 |
+
response_text = response.strip()
|
132 |
+
|
133 |
+
# Find JSON in the response
|
134 |
+
json_start = response_text.find('{')
|
135 |
+
json_end = response_text.rfind('}') + 1
|
136 |
+
|
137 |
+
if json_start >= 0 and json_end > json_start:
|
138 |
+
json_str = response_text[json_start:json_end]
|
139 |
+
menu_data = json.loads(json_str)
|
140 |
+
|
141 |
+
# Reconstruct structured text
|
142 |
+
structured_text = ""
|
143 |
+
for section in menu_data.get('menu_sections', []):
|
144 |
+
structured_text += f"{section.get('section_name', 'Menu Items')}\n"
|
145 |
+
structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
|
146 |
+
|
147 |
+
for item in section.get('items', []):
|
148 |
+
structured_text += f"{item.get('name', '')}"
|
149 |
+
if item.get('price'):
|
150 |
+
structured_text += f" - {item.get('price')}"
|
151 |
+
structured_text += "\n"
|
152 |
+
|
153 |
+
if item.get('description'):
|
154 |
+
structured_text += f" {item.get('description')}\n"
|
155 |
+
|
156 |
+
structured_text += "\n"
|
157 |
+
|
158 |
+
structured_text += "\n"
|
159 |
+
|
160 |
+
return {
|
161 |
+
'structured_text': structured_text,
|
162 |
+
'menu_data': menu_data,
|
163 |
+
'success': True
|
164 |
+
}
|
165 |
+
else:
|
166 |
+
# Fallback to simple processing
|
167 |
+
return {
|
168 |
+
'structured_text': raw_text,
|
169 |
+
'menu_sections': [],
|
170 |
+
'success': False,
|
171 |
+
'error': "Failed to parse LLM response as JSON"
|
172 |
+
}
|
173 |
+
|
174 |
+
except Exception as e:
|
175 |
+
return {
|
176 |
+
'structured_text': raw_text,
|
177 |
+
'menu_sections': [],
|
178 |
+
'success': False,
|
179 |
+
'error': str(e)
|
180 |
+
}
|
models/text_processor_bk.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_cpp import Llama
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
|
5 |
+
# Initialize the model (will download on first run)
|
6 |
+
MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf")
|
7 |
+
|
8 |
+
# Check if model exists, if not provide instructions
|
9 |
+
if not os.path.exists(MODEL_PATH):
|
10 |
+
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
|
11 |
+
print(f"Model not found at {MODEL_PATH}")
|
12 |
+
print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF")
|
13 |
+
print("and place it in the models directory")
|
14 |
+
|
15 |
+
# Initialize model with lazy loading
|
16 |
+
llm = None
|
17 |
+
|
18 |
+
def get_llm():
|
19 |
+
"""Get or initialize the LLM."""
|
20 |
+
global llm
|
21 |
+
if llm is None and os.path.exists(MODEL_PATH):
|
22 |
+
llm = Llama(
|
23 |
+
model_path=MODEL_PATH,
|
24 |
+
n_ctx=4096, # Context window
|
25 |
+
n_gpu_layers=-1 # Use GPU if available
|
26 |
+
)
|
27 |
+
return llm
|
28 |
+
|
29 |
+
def process_menu_text(raw_text):
|
30 |
+
"""
|
31 |
+
Process raw OCR text using LLM to improve structure and readability.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
raw_text: Raw text extracted from menu image
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
Processed and structured menu text
|
38 |
+
"""
|
39 |
+
llm = get_llm()
|
40 |
+
|
41 |
+
if llm is None:
|
42 |
+
# Fallback to simple processing if model not available
|
43 |
+
return {
|
44 |
+
'structured_text': raw_text,
|
45 |
+
'menu_sections': [],
|
46 |
+
'success': False,
|
47 |
+
'error': "LLM model not available"
|
48 |
+
}
|
49 |
+
|
50 |
+
# Construct prompt for the LLM
|
51 |
+
prompt = f"""
|
52 |
+
You are an AI assistant that helps structure menu text from OCR.
|
53 |
+
Below is the raw text extracted from a menu image.
|
54 |
+
Please clean it up, correct any obvious OCR errors, and structure it properly.
|
55 |
+
Identify menu sections, items, and prices.
|
56 |
+
|
57 |
+
RAW MENU TEXT:
|
58 |
+
{raw_text}
|
59 |
+
|
60 |
+
Format your response as JSON with the following structure:
|
61 |
+
{{
|
62 |
+
"menu_sections": [
|
63 |
+
{{
|
64 |
+
"section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
|
65 |
+
"items": [
|
66 |
+
{{
|
67 |
+
"name": "Item name",
|
68 |
+
"description": "Item description if available",
|
69 |
+
"price": "Price if available"
|
70 |
+
}}
|
71 |
+
]
|
72 |
+
}}
|
73 |
+
]
|
74 |
+
}}
|
75 |
+
|
76 |
+
Only respond with the JSON, nothing else.
|
77 |
+
"""
|
78 |
+
|
79 |
+
try:
|
80 |
+
# Generate response from LLM
|
81 |
+
response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"])
|
82 |
+
|
83 |
+
# Extract JSON from response
|
84 |
+
response_text = response['choices'][0]['text'].strip()
|
85 |
+
|
86 |
+
# Find JSON in the response
|
87 |
+
json_start = response_text.find('{')
|
88 |
+
json_end = response_text.rfind('}') + 1
|
89 |
+
|
90 |
+
if json_start >= 0 and json_end > json_start:
|
91 |
+
json_str = response_text[json_start:json_end]
|
92 |
+
menu_data = json.loads(json_str)
|
93 |
+
|
94 |
+
# Reconstruct structured text
|
95 |
+
structured_text = ""
|
96 |
+
for section in menu_data.get('menu_sections', []):
|
97 |
+
structured_text += f"{section.get('section_name', 'Menu Items')}\n"
|
98 |
+
structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
|
99 |
+
|
100 |
+
for item in section.get('items', []):
|
101 |
+
structured_text += f"{item.get('name', '')}"
|
102 |
+
if item.get('price'):
|
103 |
+
structured_text += f" - {item.get('price')}"
|
104 |
+
structured_text += "\n"
|
105 |
+
|
106 |
+
if item.get('description'):
|
107 |
+
structured_text += f" {item.get('description')}\n"
|
108 |
+
|
109 |
+
structured_text += "\n"
|
110 |
+
|
111 |
+
structured_text += "\n"
|
112 |
+
|
113 |
+
return {
|
114 |
+
'structured_text': structured_text,
|
115 |
+
'menu_data': menu_data,
|
116 |
+
'success': True
|
117 |
+
}
|
118 |
+
else:
|
119 |
+
# Fallback to simple processing
|
120 |
+
return {
|
121 |
+
'structured_text': raw_text,
|
122 |
+
'menu_sections': [],
|
123 |
+
'success': False,
|
124 |
+
'error': "Failed to parse LLM response as JSON"
|
125 |
+
}
|
126 |
+
|
127 |
+
except Exception as e:
|
128 |
+
return {
|
129 |
+
'structured_text': raw_text,
|
130 |
+
'menu_sections': [],
|
131 |
+
'success': False,
|
132 |
+
'error': str(e)
|
133 |
+
}
|
requirements.txt
CHANGED
@@ -1,3 +1,13 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit>=1.22.0
|
2 |
+
pillow>=9.0.0
|
3 |
+
numpy>=1.22.0
|
4 |
+
torch>=2.0.0
|
5 |
+
transformers>=4.30.0
|
6 |
+
layoutlmv2>=0.1.0
|
7 |
+
pytesseract>=0.3.10
|
8 |
+
opencv-python>=4.7.0
|
9 |
+
sentence-transformers>=2.2.2
|
10 |
+
python-braille>=0.1.0
|
11 |
+
reportlab>=3.6.12
|
12 |
+
|
13 |
+
|
scripts/download_model.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import requests
|
4 |
+
from tqdm import tqdm
|
5 |
+
import huggingface_hub
|
6 |
+
|
7 |
+
# Add parent directory to path
|
8 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
9 |
+
|
10 |
+
def download_model():
|
11 |
+
"""
|
12 |
+
Download the Llama 3 model from Hugging Face.
|
13 |
+
"""
|
14 |
+
model_name = "TheBloke/Llama-3-8B-Instruct-GGUF"
|
15 |
+
filename = "llama-3-8b-instruct.Q4_K_M.gguf"
|
16 |
+
|
17 |
+
# Create models directory if it doesn't exist
|
18 |
+
models_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "models")
|
19 |
+
os.makedirs(models_dir, exist_ok=True)
|
20 |
+
|
21 |
+
model_path = os.path.join(models_dir, filename)
|
22 |
+
|
23 |
+
if os.path.exists(model_path):
|
24 |
+
print(f"Model already exists at {model_path}")
|
25 |
+
return model_path
|
26 |
+
|
27 |
+
print(f"Downloading {filename} from {model_name}...")
|
28 |
+
|
29 |
+
try:
|
30 |
+
# Download using huggingface_hub
|
31 |
+
huggingface_hub.hf_hub_download(
|
32 |
+
repo_id=model_name,
|
33 |
+
filename=filename,
|
34 |
+
local_dir=models_dir,
|
35 |
+
local_dir_use_symlinks=False
|
36 |
+
)
|
37 |
+
|
38 |
+
print(f"Model downloaded successfully to {model_path}")
|
39 |
+
return model_path
|
40 |
+
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Error downloading model: {str(e)}")
|
43 |
+
print("\nManual download instructions:")
|
44 |
+
print(f"1. Go to https://huggingface.co/{model_name}/tree/main")
|
45 |
+
print(f"2. Download the file {filename}")
|
46 |
+
print(f"3. Place it in the models directory at {models_dir}")
|
47 |
+
return None
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
download_model()
|
tests/test_braille.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import time
|
4 |
+
|
5 |
+
# Add the parent directory to the path so we can import our modules
|
6 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
7 |
+
|
8 |
+
from models.braille_translator import text_to_braille, get_braille_metadata
|
9 |
+
|
10 |
+
def test_braille_translation(text):
|
11 |
+
"""
|
12 |
+
Test Braille translation on a given text.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
text: Text to translate to Braille
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
Dictionary with test results
|
19 |
+
"""
|
20 |
+
start_time = time.time()
|
21 |
+
|
22 |
+
# Translate to Braille
|
23 |
+
try:
|
24 |
+
result = text_to_braille(text, use_context=True)
|
25 |
+
success = result['success']
|
26 |
+
braille_text = result.get('formatted_braille', '')
|
27 |
+
error = result.get('error', None)
|
28 |
+
except Exception as e:
|
29 |
+
success = False
|
30 |
+
braille_text = ''
|
31 |
+
error = str(e)
|
32 |
+
|
33 |
+
end_time = time.time()
|
34 |
+
|
35 |
+
# Get metadata
|
36 |
+
metadata = get_braille_metadata(text)
|
37 |
+
|
38 |
+
# Compile results
|
39 |
+
test_results = {
|
40 |
+
'original_text': text,
|
41 |
+
'success': success,
|
42 |
+
'processing_time': end_time - start_time,
|
43 |
+
'braille_text': braille_text[:100] + '...' if len(braille_text) > 100 else braille_text,
|
44 |
+
'word_count': metadata['word_count'],
|
45 |
+
'character_count': metadata['character_count'],
|
46 |
+
'line_count': metadata['line_count']
|
47 |
+
}
|
48 |
+
|
49 |
+
if not success:
|
50 |
+
test_results['error'] = error
|
51 |
+
|
52 |
+
return test_results
|
53 |
+
|
54 |
+
def run_braille_tests():
|
55 |
+
"""
|
56 |
+
Run tests on sample menu texts.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
List of test results
|
60 |
+
"""
|
61 |
+
# Sample menu texts
|
62 |
+
sample_texts = [
|
63 |
+
# Simple menu item
|
64 |
+
"Cheeseburger - $10.99\nServed with fries and a pickle.",
|
65 |
+
|
66 |
+
# Menu section
|
67 |
+
"APPETIZERS\n-----------\nMozzarella Sticks - $7.99\nLoaded Nachos - $9.99\nBuffalo Wings - $12.99",
|
68 |
+
|
69 |
+
# Complex menu with formatting
|
70 |
+
"""MAIN COURSE
|
71 |
+
-------------
|
72 |
+
Grilled Salmon - $18.99
|
73 |
+
Fresh Atlantic salmon served with seasonal vegetables and rice pilaf.
|
74 |
+
|
75 |
+
Filet Mignon - $24.99
|
76 |
+
8oz center-cut filet served with mashed potatoes and asparagus.
|
77 |
+
|
78 |
+
Vegetable Pasta - $14.99
|
79 |
+
Penne pasta with seasonal vegetables in a creamy garlic sauce."""
|
80 |
+
]
|
81 |
+
|
82 |
+
results = []
|
83 |
+
|
84 |
+
for i, text in enumerate(sample_texts):
|
85 |
+
print(f"\nTesting sample {i+1}...")
|
86 |
+
result = test_braille_translation(text)
|
87 |
+
results.append(result)
|
88 |
+
|
89 |
+
# Print progress
|
90 |
+
status = "SUCCESS" if result['success'] else "FAILED"
|
91 |
+
print(f"Sample {i+1}: {status}")
|
92 |
+
print(f"Words: {result['word_count']}, Time: {result['processing_time']:.2f}s")
|
93 |
+
print(f"Braille sample: {result['braille_text'][:50]}...")
|
94 |
+
|
95 |
+
return results
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
print("Testing Braille translation functionality...")
|
99 |
+
results = run_braille_tests()
|
100 |
+
|
101 |
+
# Print summary
|
102 |
+
success_count = sum(1 for r in results if r['success'])
|
103 |
+
print(f"\nSummary: {success_count}/{len(results)} tests passed")
|
104 |
+
|
105 |
+
if results:
|
106 |
+
avg_time = sum(r['processing_time'] for r in results) / len(results)
|
107 |
+
print(f"Average processing time: {avg_time:.2f} seconds")
|
tests/test_ocr.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import time
|
4 |
+
from PIL import Image
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
# Add the parent directory to the path so we can import our modules
|
8 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
9 |
+
|
10 |
+
from utils.image_preprocessing import preprocess_image
|
11 |
+
from models.document_ai import extract_text_and_layout
|
12 |
+
|
13 |
+
def test_menu_extraction(image_path):
|
14 |
+
"""
|
15 |
+
Test the OCR extraction on a single menu image.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
image_path: Path to the menu image
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
Dictionary with test results
|
22 |
+
"""
|
23 |
+
start_time = time.time()
|
24 |
+
|
25 |
+
# Load and preprocess image
|
26 |
+
image = Image.open(image_path)
|
27 |
+
preprocessed_img = preprocess_image(image)
|
28 |
+
|
29 |
+
# Extract text
|
30 |
+
try:
|
31 |
+
result = extract_text_and_layout(preprocessed_img)
|
32 |
+
extracted_text = ' '.join(result['words']) if 'words' in result else ''
|
33 |
+
success = True
|
34 |
+
except Exception as e:
|
35 |
+
extracted_text = ''
|
36 |
+
success = False
|
37 |
+
error = str(e)
|
38 |
+
|
39 |
+
end_time = time.time()
|
40 |
+
|
41 |
+
# Compile results
|
42 |
+
test_results = {
|
43 |
+
'image_path': image_path,
|
44 |
+
'success': success,
|
45 |
+
'processing_time': end_time - start_time,
|
46 |
+
'extracted_text': extracted_text,
|
47 |
+
'text_length': len(extracted_text),
|
48 |
+
'word_count': len(extracted_text.split()) if extracted_text else 0
|
49 |
+
}
|
50 |
+
|
51 |
+
if not success:
|
52 |
+
test_results['error'] = error
|
53 |
+
|
54 |
+
return test_results
|
55 |
+
|
56 |
+
def run_batch_test(image_dir):
|
57 |
+
"""
|
58 |
+
Run tests on all images in a directory.
|
59 |
+
|
60 |
+
Args:
|
61 |
+
image_dir: Directory containing menu images
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
List of test results
|
65 |
+
"""
|
66 |
+
results = []
|
67 |
+
|
68 |
+
for filename in os.listdir(image_dir):
|
69 |
+
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
|
70 |
+
image_path = os.path.join(image_dir, filename)
|
71 |
+
result = test_menu_extraction(image_path)
|
72 |
+
results.append(result)
|
73 |
+
|
74 |
+
# Print progress
|
75 |
+
status = "SUCCESS" if result['success'] else "FAILED"
|
76 |
+
print(f"{filename}: {status} - {result['word_count']} words extracted")
|
77 |
+
|
78 |
+
return results
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
# Test with sample menus in the assets directory
|
82 |
+
sample_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
83 |
+
"assets", "sample_menus")
|
84 |
+
|
85 |
+
if not os.path.exists(sample_dir):
|
86 |
+
print(f"Sample directory not found: {sample_dir}")
|
87 |
+
print("Creating directory and downloading sample images...")
|
88 |
+
os.makedirs(sample_dir, exist_ok=True)
|
89 |
+
# You would add code here to download sample images
|
90 |
+
# For now, just create a note to add sample images manually
|
91 |
+
with open(os.path.join(sample_dir, "README.txt"), "w") as f:
|
92 |
+
f.write("Add sample menu images to this directory for testing.")
|
93 |
+
|
94 |
+
results = run_batch_test(sample_dir)
|
95 |
+
|
96 |
+
# Print summary
|
97 |
+
success_count = sum(1 for r in results if r['success'])
|
98 |
+
print(f"\nSummary: {success_count}/{len(results)} tests passed")
|
99 |
+
|
100 |
+
if results:
|
101 |
+
avg_words = sum(r['word_count'] for r in results) / len(results)
|
102 |
+
avg_time = sum(r['processing_time'] for r in results) / len(results)
|
103 |
+
print(f"Average words extracted: {avg_words:.1f}")
|
104 |
+
print(f"Average processing time: {avg_time:.2f} seconds")
|
utils/__init__.py
ADDED
File without changes
|
utils/braille_display.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def text_to_unicode_braille(braille_text):
|
2 |
+
"""
|
3 |
+
Convert Braille dots notation to Unicode Braille symbols.
|
4 |
+
|
5 |
+
Args:
|
6 |
+
braille_text: Braille text in dots notation
|
7 |
+
|
8 |
+
Returns:
|
9 |
+
Text with Unicode Braille symbols
|
10 |
+
"""
|
11 |
+
# Mapping from Braille dots to Unicode Braille patterns
|
12 |
+
# Unicode Braille patterns start at U+2800 (⠀)
|
13 |
+
unicode_base = 0x2800
|
14 |
+
|
15 |
+
# Convert each Braille character to its Unicode equivalent
|
16 |
+
unicode_braille = ""
|
17 |
+
for char in braille_text:
|
18 |
+
# Check if the character is a standard Braille pattern
|
19 |
+
if char in "⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿":
|
20 |
+
unicode_braille += char
|
21 |
+
else:
|
22 |
+
# For non-Braille characters, keep them as is
|
23 |
+
unicode_braille += char
|
24 |
+
|
25 |
+
return unicode_braille
|
26 |
+
|
27 |
+
def create_braille_html(braille_text):
|
28 |
+
"""
|
29 |
+
Create HTML to display Braille with proper styling.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
braille_text: Braille text (either in dots or Unicode)
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
HTML string for displaying Braille
|
36 |
+
"""
|
37 |
+
# Convert to Unicode Braille if not already
|
38 |
+
unicode_braille = text_to_unicode_braille(braille_text)
|
39 |
+
|
40 |
+
# Create HTML with proper styling
|
41 |
+
html = f"""
|
42 |
+
<div style="font-family: 'Courier New', monospace; font-size: 20px; line-height: 1.5;
|
43 |
+
background-color: #f5f5f5; padding: 15px; border-radius: 5px;">
|
44 |
+
{unicode_braille.replace('\n', '<br>')}
|
45 |
+
</div>
|
46 |
+
"""
|
47 |
+
|
48 |
+
return html
|
49 |
+
|
50 |
+
def create_braille_comparison(text, braille_text):
|
51 |
+
"""
|
52 |
+
Create a side-by-side comparison of text and its Braille representation.
|
53 |
+
|
54 |
+
Args:
|
55 |
+
text: Original text
|
56 |
+
braille_text: Braille translation
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
HTML string for displaying the comparison
|
60 |
+
"""
|
61 |
+
# Convert to Unicode Braille
|
62 |
+
unicode_braille = text_to_unicode_braille(braille_text)
|
63 |
+
|
64 |
+
# Split into lines
|
65 |
+
text_lines = text.split('\n')
|
66 |
+
braille_lines = unicode_braille.split('\n')
|
67 |
+
|
68 |
+
# Ensure both lists have the same length
|
69 |
+
max_lines = max(len(text_lines), len(braille_lines))
|
70 |
+
text_lines = text_lines + [''] * (max_lines - len(text_lines))
|
71 |
+
braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
|
72 |
+
|
73 |
+
# Create HTML table for comparison
|
74 |
+
html = """
|
75 |
+
<style>
|
76 |
+
.braille-table {
|
77 |
+
width: 100%;
|
78 |
+
border-collapse: collapse;
|
79 |
+
}
|
80 |
+
.braille-table td {
|
81 |
+
padding: 8px;
|
82 |
+
vertical-align: top;
|
83 |
+
border-bottom: 1px solid #ddd;
|
84 |
+
}
|
85 |
+
.braille-text {
|
86 |
+
font-family: 'Courier New', monospace;
|
87 |
+
font-size: 20px;
|
88 |
+
background-color: #f5f5f5;
|
89 |
+
}
|
90 |
+
.original-text {
|
91 |
+
font-family: Arial, sans-serif;
|
92 |
+
}
|
93 |
+
</style>
|
94 |
+
<table class="braille-table">
|
95 |
+
<tr>
|
96 |
+
<th>Original Text</th>
|
97 |
+
<th>Braille Representation</th>
|
98 |
+
</tr>
|
99 |
+
"""
|
100 |
+
|
101 |
+
for i in range(max_lines):
|
102 |
+
html += f"""
|
103 |
+
<tr>
|
104 |
+
<td class="original-text">{text_lines[i]}</td>
|
105 |
+
<td class="braille-text">{braille_lines[i]}</td>
|
106 |
+
</tr>
|
107 |
+
"""
|
108 |
+
|
109 |
+
html += "</table>"
|
110 |
+
|
111 |
+
return html
|
utils/image_processing.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def preprocess_image(image, target_size=(1000, 1000)):
|
6 |
+
"""
|
7 |
+
Preprocess image for document analysis.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
image: PIL Image object
|
11 |
+
target_size: Tuple of (width, height) to resize to
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
Preprocessed image as numpy array
|
15 |
+
"""
|
16 |
+
# Convert PIL Image to numpy array if needed
|
17 |
+
if isinstance(image, Image.Image):
|
18 |
+
img_array = np.array(image)
|
19 |
+
else:
|
20 |
+
img_array = image
|
21 |
+
|
22 |
+
# Convert to RGB if grayscale
|
23 |
+
if len(img_array.shape) == 2:
|
24 |
+
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
|
25 |
+
elif img_array.shape[2] == 4:
|
26 |
+
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
|
27 |
+
|
28 |
+
# Resize image
|
29 |
+
img_array = cv2.resize(img_array, target_size)
|
30 |
+
|
31 |
+
# Enhance contrast
|
32 |
+
lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
|
33 |
+
l, a, b = cv2.split(lab)
|
34 |
+
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
35 |
+
cl = clahe.apply(l)
|
36 |
+
enhanced_lab = cv2.merge((cl, a, b))
|
37 |
+
enhanced_img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
|
38 |
+
|
39 |
+
return enhanced_img
|
utils/pdf_generator.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
from reportlab.lib.pagesizes import letter
|
4 |
+
from reportlab.lib import colors
|
5 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
6 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
7 |
+
from reportlab.pdfbase import pdfmetrics
|
8 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
9 |
+
import io
|
10 |
+
|
11 |
+
# Try to register a font that supports Braille Unicode characters
|
12 |
+
try:
|
13 |
+
# Check for common Braille fonts
|
14 |
+
font_paths = [
|
15 |
+
"DejaVuSans.ttf", # Common on Linux
|
16 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
17 |
+
"/System/Library/Fonts/Arial Unicode.ttf", # Mac
|
18 |
+
"C:\\Windows\\Fonts\\arial.ttf" # Windows
|
19 |
+
]
|
20 |
+
|
21 |
+
font_registered = False
|
22 |
+
for font_path in font_paths:
|
23 |
+
if os.path.exists(font_path):
|
24 |
+
pdfmetrics.registerFont(TTFont('BrailleFont', font_path))
|
25 |
+
font_registered = True
|
26 |
+
break
|
27 |
+
|
28 |
+
if not font_registered:
|
29 |
+
# Use default font if none of the above are found
|
30 |
+
print("No suitable font found for Braille. Using default font.")
|
31 |
+
except Exception as e:
|
32 |
+
print(f"Error registering font: {str(e)}")
|
33 |
+
|
34 |
+
def create_braille_pdf(original_text, braille_text, title="Menu in Braille"):
|
35 |
+
"""
|
36 |
+
Create a PDF file with original text and its Braille translation.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
original_text: Original text content
|
40 |
+
braille_text: Braille translation
|
41 |
+
title: PDF title
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
BytesIO object containing the PDF
|
45 |
+
"""
|
46 |
+
# Create a BytesIO object to store the PDF
|
47 |
+
buffer = io.BytesIO()
|
48 |
+
|
49 |
+
# Create the PDF document
|
50 |
+
doc = SimpleDocTemplate(
|
51 |
+
buffer,
|
52 |
+
pagesize=letter,
|
53 |
+
rightMargin=72,
|
54 |
+
leftMargin=72,
|
55 |
+
topMargin=72,
|
56 |
+
bottomMargin=72
|
57 |
+
)
|
58 |
+
|
59 |
+
# Define styles
|
60 |
+
styles = getSampleStyleSheet()
|
61 |
+
title_style = styles['Title']
|
62 |
+
heading_style = styles['Heading2']
|
63 |
+
normal_style = styles['Normal']
|
64 |
+
|
65 |
+
# Create a custom style for Braille text
|
66 |
+
braille_style = ParagraphStyle(
|
67 |
+
'Braille',
|
68 |
+
parent=normal_style,
|
69 |
+
fontName='BrailleFont' if font_registered else 'Helvetica',
|
70 |
+
fontSize=14,
|
71 |
+
leading=18,
|
72 |
+
spaceAfter=12
|
73 |
+
)
|
74 |
+
|
75 |
+
# Create the content
|
76 |
+
content = []
|
77 |
+
|
78 |
+
# Add title
|
79 |
+
content.append(Paragraph(title, title_style))
|
80 |
+
content.append(Spacer(1, 12))
|
81 |
+
|
82 |
+
# Add original text section
|
83 |
+
content.append(Paragraph("Original Text", heading_style))
|
84 |
+
content.append(Spacer(1, 6))
|
85 |
+
|
86 |
+
# Split original text by lines and add each as a paragraph
|
87 |
+
for line in original_text.split('\n'):
|
88 |
+
if line.strip():
|
89 |
+
content.append(Paragraph(line, normal_style))
|
90 |
+
else:
|
91 |
+
content.append(Spacer(1, 12))
|
92 |
+
|
93 |
+
content.append(Spacer(1, 24))
|
94 |
+
|
95 |
+
# Add Braille section
|
96 |
+
content.append(Paragraph("Braille Translation", heading_style))
|
97 |
+
content.append(Spacer(1, 6))
|
98 |
+
|
99 |
+
# Split Braille text by lines and add each as a paragraph
|
100 |
+
for line in braille_text.split('\n'):
|
101 |
+
if line.strip():
|
102 |
+
content.append(Paragraph(line, braille_style))
|
103 |
+
else:
|
104 |
+
content.append(Spacer(1, 12))
|
105 |
+
|
106 |
+
# Build the PDF
|
107 |
+
doc.build(content)
|
108 |
+
|
109 |
+
# Reset buffer position to the beginning
|
110 |
+
buffer.seek(0)
|
111 |
+
return buffer
|
112 |
+
|
113 |
+
def create_braille_pdf_with_comparison(original_text, braille_text, title="Menu in Braille"):
|
114 |
+
"""
|
115 |
+
Create a PDF file with side-by-side comparison of original text and Braille.
|
116 |
+
|
117 |
+
Args:
|
118 |
+
original_text: Original text content
|
119 |
+
braille_text: Braille translation
|
120 |
+
title: PDF title
|
121 |
+
|
122 |
+
Returns:
|
123 |
+
BytesIO object containing the PDF
|
124 |
+
"""
|
125 |
+
# Create a BytesIO object to store the PDF
|
126 |
+
buffer = io.BytesIO()
|
127 |
+
|
128 |
+
# Create the PDF document
|
129 |
+
doc = SimpleDocTemplate(
|
130 |
+
buffer,
|
131 |
+
pagesize=letter,
|
132 |
+
rightMargin=72,
|
133 |
+
leftMargin=72,
|
134 |
+
topMargin=72,
|
135 |
+
bottomMargin=72
|
136 |
+
)
|
137 |
+
|
138 |
+
# Define styles
|
139 |
+
styles = getSampleStyleSheet()
|
140 |
+
title_style = styles['Title']
|
141 |
+
heading_style = styles['Heading2']
|
142 |
+
normal_style = styles['Normal']
|
143 |
+
|
144 |
+
# Create a custom style for Braille text
|
145 |
+
braille_style = ParagraphStyle(
|
146 |
+
'Braille',
|
147 |
+
parent=normal_style,
|
148 |
+
fontName='BrailleFont' if font_registered else 'Helvetica',
|
149 |
+
fontSize=14,
|
150 |
+
leading=18
|
151 |
+
)
|
152 |
+
|
153 |
+
# Create the content
|
154 |
+
content = []
|
155 |
+
|
156 |
+
# Add title
|
157 |
+
content.append(Paragraph(title, title_style))
|
158 |
+
content.append(Spacer(1, 12))
|
159 |
+
|
160 |
+
# Split text into lines
|
161 |
+
original_lines = original_text.split('\n')
|
162 |
+
braille_lines = braille_text.split('\n')
|
163 |
+
|
164 |
+
# Ensure both lists have the same length
|
165 |
+
max_lines = max(len(original_lines), len(braille_lines))
|
166 |
+
original_lines = original_lines + [''] * (max_lines - len(original_lines))
|
167 |
+
braille_lines = braille_lines + [''] * (max_lines - len(braille_lines))
|
168 |
+
|
169 |
+
# Create a table for side-by-side comparison
|
170 |
+
table_data = [
|
171 |
+
[Paragraph("Original Text", heading_style), Paragraph("Braille Translation", heading_style)]
|
172 |
+
]
|
173 |
+
|
174 |
+
# Add each line as a row in the table
|
175 |
+
for i in range(max_lines):
|
176 |
+
original_para = Paragraph(original_lines[i], normal_style) if original_lines[i].strip() else Spacer(1, 12)
|
177 |
+
braille_para = Paragraph(braille_lines[i], braille_style) if braille_lines[i].strip() else Spacer(1, 12)
|
178 |
+
table_data.append([original_para, braille_para])
|
179 |
+
|
180 |
+
# Create the table
|
181 |
+
table = Table(table_data, colWidths=[doc.width/2.0-12, doc.width/2.0-12])
|
182 |
+
|
183 |
+
# Style the table
|
184 |
+
table.setStyle(TableStyle([
|
185 |
+
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
186 |
+
('GRID', (0, 0), (-1, 0), 1, colors.black),
|
187 |
+
('BOX', (0, 0), (-1, -1), 1, colors.black),
|
188 |
+
('BACKGROUND', (0, 0), (1, 0), colors.lightgrey)
|
189 |
+
]))
|
190 |
+
|
191 |
+
content.append(table)
|
192 |
+
|
193 |
+
# Build the PDF
|
194 |
+
doc.build(content)
|
195 |
+
|
196 |
+
# Reset buffer position to the beginning
|
197 |
+
buffer.seek(0)
|
198 |
+
return buffer
|