Christopher Capobianco
commited on
Commit
·
a2d3475
1
Parent(s):
e71d901
Add warning message about doc classifier
Browse files- Home.py +1 -0
- projects/01_Document_Classifier.py +2 -1
Home.py
CHANGED
@@ -20,6 +20,7 @@ with st.container():
|
|
20 |
text_column, image_column = st.columns((3,1))
|
21 |
with text_column:
|
22 |
st.subheader("Document Classifier", divider="green")
|
|
|
23 |
st.markdown("""
|
24 |
- Used OCR text and a Random Forest classification model to predict a document's classification
|
25 |
- Trained on Real World Documents Collection at Kaggle
|
|
|
20 |
text_column, image_column = st.columns((3,1))
|
21 |
with text_column:
|
22 |
st.subheader("Document Classifier", divider="green")
|
23 |
+
st.warning("Work in Progress")
|
24 |
st.markdown("""
|
25 |
- Used OCR text and a Random Forest classification model to predict a document's classification
|
26 |
- Trained on Real World Documents Collection at Kaggle
|
projects/01_Document_Classifier.py
CHANGED
@@ -2,7 +2,6 @@ import streamlit as st
|
|
2 |
import easyocr
|
3 |
import pickle
|
4 |
import spacy
|
5 |
-
# import en_core_web_sm
|
6 |
import re
|
7 |
import os
|
8 |
import subprocess
|
@@ -75,6 +74,8 @@ def autoclassifier(images):
|
|
75 |
|
76 |
st.header('Document Classifier', divider='green')
|
77 |
|
|
|
|
|
78 |
st.markdown("#### What is OCR?")
|
79 |
st.markdown("OCR stands for Optical Character Recognition, and the technology for it has been around for over 30 years.")
|
80 |
st.markdown("In this project, we leverage the extraction of the text from an image to classify the document. I am using EasyOCR as the OCR Engine, and I do some pre-processing of the raw OCR text to improve the quality of the words used to classify the documents.")
|
|
|
2 |
import easyocr
|
3 |
import pickle
|
4 |
import spacy
|
|
|
5 |
import re
|
6 |
import os
|
7 |
import subprocess
|
|
|
74 |
|
75 |
st.header('Document Classifier', divider='green')
|
76 |
|
77 |
+
st.warning("Work in Progress")
|
78 |
+
|
79 |
st.markdown("#### What is OCR?")
|
80 |
st.markdown("OCR stands for Optical Character Recognition, and the technology for it has been around for over 30 years.")
|
81 |
st.markdown("In this project, we leverage the extraction of the text from an image to classify the document. I am using EasyOCR as the OCR Engine, and I do some pre-processing of the raw OCR text to improve the quality of the words used to classify the documents.")
|