|
from PIL import Image |
|
import pytesseract |
|
import easyocr |
|
import cv2 |
|
import os |
|
from io import BytesIO |
|
|
|
import matplotlib.pyplot as plt |
|
import streamlit as st |
|
|
|
DET_ARCHS = ["pytesseract", "easyocr"] |
|
|
|
def main(): |
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
st.title("Image Text Recognition") |
|
|
|
st.write('\n') |
|
|
|
|
|
st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*") |
|
|
|
cols = st.columns((1, 1, 1, 1)) |
|
cols[0].subheader("Input image") |
|
cols[1].subheader("OCR output") |
|
|
|
|
|
|
|
st.sidebar.title("Document selection") |
|
|
|
|
|
|
|
uploaded_file = st.sidebar.file_uploader("Upload files", type=['png', 'jpeg', 'jpg']) |
|
if uploaded_file is not None: |
|
doc = uploaded_file.read() |
|
cols[0].image(doc) |
|
|
|
|
|
st.sidebar.title("Model selection") |
|
det_arch = st.sidebar.selectbox("OCR model", DET_ARCHS) |
|
|
|
|
|
st.sidebar.write('\n') |
|
|
|
if st.sidebar.button("Analyze image"): |
|
|
|
if uploaded_file is None: |
|
st.sidebar.write("Please upload an image") |
|
|
|
else: |
|
with st.spinner("Loading model..."): |
|
if det_arch == 'pytesseract': |
|
predictor = pytesseract.image_to_string(Image.open(BytesIO(doc))) |
|
else: |
|
reader = easyocr.Reader(['en']) |
|
predictor = reader.readtext(doc, detail = 0) |
|
with st.spinner('Analyzing...'): |
|
|
|
|
|
if det_arch == 'pytesseract': |
|
cols[1].text(predictor) |
|
else: |
|
cols[1].text(''.join(predictor)) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|