Christopher Capobianco commited on
Commit
a2d3475
·
1 Parent(s): e71d901

Add warning message about doc classifier

Browse files
Files changed (2) hide show
  1. Home.py +1 -0
  2. projects/01_Document_Classifier.py +2 -1
Home.py CHANGED
@@ -20,6 +20,7 @@ with st.container():
20
  text_column, image_column = st.columns((3,1))
21
  with text_column:
22
  st.subheader("Document Classifier", divider="green")
 
23
  st.markdown("""
24
  - Used OCR text and a Random Forest classification model to predict a document's classification
25
  - Trained on Real World Documents Collection at Kaggle
 
20
  text_column, image_column = st.columns((3,1))
21
  with text_column:
22
  st.subheader("Document Classifier", divider="green")
23
+ st.warning("Work in Progress")
24
  st.markdown("""
25
  - Used OCR text and a Random Forest classification model to predict a document's classification
26
  - Trained on Real World Documents Collection at Kaggle
projects/01_Document_Classifier.py CHANGED
@@ -2,7 +2,6 @@ import streamlit as st
2
  import easyocr
3
  import pickle
4
  import spacy
5
- # import en_core_web_sm
6
  import re
7
  import os
8
  import subprocess
@@ -75,6 +74,8 @@ def autoclassifier(images):
75
 
76
  st.header('Document Classifier', divider='green')
77
 
 
 
78
  st.markdown("#### What is OCR?")
79
  st.markdown("OCR stands for Optical Character Recognition, and the technology for it has been around for over 30 years.")
80
  st.markdown("In this project, we leverage the extraction of the text from an image to classify the document. I am using EasyOCR as the OCR Engine, and I do some pre-processing of the raw OCR text to improve the quality of the words used to classify the documents.")
 
2
  import easyocr
3
  import pickle
4
  import spacy
 
5
  import re
6
  import os
7
  import subprocess
 
74
 
75
  st.header('Document Classifier', divider='green')
76
 
77
+ st.warning("Work in Progress")
78
+
79
  st.markdown("#### What is OCR?")
80
  st.markdown("OCR stands for Optical Character Recognition, and the technology for it has been around for over 30 years.")
81
  st.markdown("In this project, we leverage the extraction of the text from an image to classify the document. I am using EasyOCR as the OCR Engine, and I do some pre-processing of the raw OCR text to improve the quality of the words used to classify the documents.")