gagan3012 commited on
Commit
9fefb79
·
1 Parent(s): b35af0a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +142 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_cropper import st_cropper
3
+ from PIL import Image
4
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel, DonutProcessor
5
+ import torch
6
+ import re
7
+ import pytesseract
8
+
9
+
10
+ def predict_arabic(img, model_name="UBC-NLP/Qalam"):
11
+ # if img is None:
12
+ # _,generated_text=main(image)
13
+ # return generated_text
14
+ # else:
15
+ # model_name = "UBC-NLP/Qalam"
16
+ processor = TrOCRProcessor.from_pretrained(model_name)
17
+ model = VisionEncoderDecoderModel.from_pretrained(model_name)
18
+ images = img.convert("RGB")
19
+ pixel_values = processor(images, return_tensors="pt").pixel_values
20
+ generated_ids = model.generate(pixel_values, max_length=256)
21
+ generated_text = processor.batch_decode(
22
+ generated_ids, skip_special_tokens=True)[0]
23
+ return generated_text
24
+
25
+
26
+ def predict_english(img, model_name="naver-clova-ix/donut-base-finetuned-cord-v2"):
27
+ processor = DonutProcessor.from_pretrained(model_name)
28
+ model = VisionEncoderDecoderModel.from_pretrained(model_name)
29
+
30
+ device = "cuda" if torch.cuda.is_available() else "cpu"
31
+ model.to(device)
32
+
33
+ task_prompt = "<s_cord-v2>"
34
+ decoder_input_ids = processor.tokenizer(
35
+ task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
36
+
37
+ image = img.convert("RGB")
38
+
39
+ pixel_values = processor(image, return_tensors="pt").pixel_values
40
+
41
+ outputs = model.generate(
42
+ pixel_values.to(device),
43
+ decoder_input_ids=decoder_input_ids.to(device),
44
+ max_length=model.decoder.config.max_position_embeddings,
45
+ early_stopping=True,
46
+ pad_token_id=processor.tokenizer.pad_token_id,
47
+ eos_token_id=processor.tokenizer.eos_token_id,
48
+ use_cache=True,
49
+ num_beams=1,
50
+ bad_words_ids=[[processor.tokenizer.unk_token_id]],
51
+ return_dict_in_generate=True,
52
+ )
53
+
54
+ sequence = processor.batch_decode(outputs.sequences)[0]
55
+ sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(
56
+ processor.tokenizer.pad_token, "")
57
+ sequence = re.sub(r"<.*?>", "", sequence).strip()
58
+ return sequence
59
+
60
+
61
+ def predict_tesseract(img):
62
+ text = pytesseract.image_to_string(Image.open(img))
63
+ return text
64
+
65
+
66
+ st.set_option('deprecation.showfileUploaderEncoding', False)
67
+
68
+ st.set_page_config(
69
+ page_title="Ex-stream-ly Cool App",
70
+ page_icon="🖊️",
71
+ layout="wide",
72
+ initial_sidebar_state="expanded",
73
+ menu_items={
74
+ 'Get Help': 'https://www.extremelycoolapp.com/help',
75
+ 'Report a bug': "https://www.extremelycoolapp.com/bug",
76
+ 'About': "# This is a header. This is an *extremely* cool app!"
77
+ }
78
+ )
79
+
80
+ # Upload an image and set some options for demo purposes
81
+ st.header("Qalam: A Multilingual OCR System")
82
+ img_file = st.sidebar.file_uploader(label='Upload a file', type=['png', 'jpg'])
83
+ realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True)
84
+ # box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF')
85
+ aspect_choice = st.sidebar.radio(label="Aspect Ratio", options=[
86
+ "Free"])
87
+ aspect_dict = {
88
+ "Free": None
89
+ }
90
+ aspect_ratio = aspect_dict[aspect_choice]
91
+ Lng = st.sidebar.selectbox(label="Language", options=[
92
+ "Arabic", "English", "French", "Korean", "Chinese"])
93
+
94
+ Models = {
95
+ "Arabic": "Qalam",
96
+ "English": "Donut",
97
+ "French": "Tesseract",
98
+ "Korean": "Donut",
99
+ "Chinese": "Donut"
100
+ }
101
+
102
+ st.sidebar.write("# Model: ", Models[Lng])
103
+
104
+ if img_file:
105
+ img = Image.open(img_file)
106
+ if not realtime_update:
107
+ st.write("Double click to save crop")
108
+
109
+ col1, col2 = st.columns(2)
110
+ with col1:
111
+ st.header("Select Input Image")
112
+ # Get a cropped image from the frontend
113
+ cropped_img = st_cropper(
114
+ img,
115
+ realtime_update=realtime_update,
116
+ box_color="#FF0000",
117
+ aspect_ratio=aspect_ratio,
118
+ should_resize_image=True,
119
+ )
120
+
121
+ with col2:
122
+ # Manipulate cropped image at will
123
+ st.header("Output Image")
124
+ # _ = cropped_img.thumbnail((150, 150))
125
+ st.image(cropped_img)
126
+ button = st.button("Run OCR")
127
+ if button:
128
+ if Lng == "Arabic":
129
+ st.write("# Arabic Text:")
130
+ st.write(predict_arabic(cropped_img))
131
+ elif Lng == "English":
132
+ st.write("# English Text:")
133
+ st.write(predict_english(cropped_img))
134
+ elif Lng == "French":
135
+ st.write("# French Text:")
136
+ st.write(predict_tesseract(cropped_img))
137
+ elif Lng == "Korean":
138
+ st.write("# Korean Text:")
139
+ st.write(predict_english(cropped_img))
140
+ elif Lng == "Chinese":
141
+ st.write("# Chinese Text:")
142
+ st.write(predict_english(cropped_img))
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ streamlit-cropper
3
+ transformers
4
+ torch
5
+ pytesseract
6
+ re
7
+ pillow