Spaces:

ChronoStellar
/

Indonesian_ALPR_model_comparison

Running

App Files Files Community

ChronoStellar commited on Dec 12, 2024

Commit

91073e2

verified ·

1 Parent(s): 75baf9a

Create app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import gradio as gr
+import cv2
+import joblib
+import numpy as np
+from skimage.feature import hog
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from transformers import AutoTokenizer, AutoModelForImageTextToText
+from transformers import VisionEncoderDecoderModel, TrOCRProcessor
+import torch
+from PIL import Image
+# Paths to your models
+MODEL_TYPES = ["HOG & Logistic Regression","CRNN CTC","Fine Tuned TrOCR"]
+clf_hog = joblib.load('/content/HOG_LogRes.pkl')
+clf_crnn = tf.keras.models.load_model('/content/crnn_ctc.keras')
+num_to_char = joblib.load('./decoder.joblib')
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
+clf_trocr = AutoModelForImageTextToText.from_pretrained("ChronoStellar/TrOCR_IndonesianLPR")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+clf_trocr.to(device)
+# Preprocessing and prediction functions for each model
+def ocr_model_1(file_path):
+    im = cv2.imread(file_path)
+    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+    ret, im_th = cv2.threshold(im_gray, 120, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    ctrs, hier = cv2.findContours(im_th, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    bboxes = [cv2.boundingRect(c) for c in ctrs]
+    sorted_bboxes = sorted(bboxes, key=lambda b: b[0])
+    plate_char = []
+    image_height, image_width = im.shape[:2]
+    height_threshold = image_height * 0.3
+    width_threshold = image_width * 0.3
+    for num, i_bboxes in enumerate(sorted_bboxes):
+        [x, y, w, h] = i_bboxes
+        if h > height_threshold and w < width_threshold:
+            roi = im_gray[y:y + h, x:x + w]
+            roi = cv2.resize(roi, (64, 128), interpolation=cv2.INTER_AREA)
+            roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(1, 1))
+            nbr = clf_hog.predict(np.array([roi_hog_fd]))
+            plate_char.append(str(nbr[0]))
+    return ''.join(plate_char)
+max_length = 9
+img_width = 200
+img_height = 50
+def decode_batch_predictions(pred):
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
+        :, :max_length
+    ]
+    output_text = []
+    for res in results:
+        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
+        res = res.replace('[UNK]', '')
+        output_text.append(res)
+    return output_text
+def ocr_model_2(file_path):
+  img = tf.io.read_file(file_path)
+  img = tf.io.decode_png(img, channels=1)
+  img = tf.image.convert_image_dtype(img, tf.float32)
+  img = tf.image.resize(img, [img_height, img_width])
+  img = tf.transpose(img, perm=[1, 0, 2])
+  img = tf.expand_dims(img, axis=0)
+  preds = clf_crnn.predict(img)
+  pred_text = decode_batch_predictions(preds)
+  return pred_text[0]
+def ocr_model_3(file_path):
+    pil_image = Image.open(file_path).convert("RGB")
+    pixel_values = processor(pil_image, return_tensors="pt").pixel_values
+    pixel_values = pixel_values.to(device)
+    clf_trocr.eval()
+    with torch.no_grad():
+        generated_ids = clf_trocr.generate(pixel_values)
+    predicted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return predicted_text
+# Master OCR function that chooses the appropriate pipeline
+def ocr(file_path, model_name):
+    if model_name == MODEL_TYPES[0]:
+        return ocr_model_1(file_path)
+    elif model_name == MODEL_TYPES[1]:
+        return ocr_model_2(file_path)
+    elif model_name == MODEL_TYPES[2]:
+        return ocr_model_3(file_path)
+# Create Gradio interface
+interface = gr.Interface(
+    fn=ocr,
+    inputs=[
+        gr.Image(type="filepath"),
+        gr.Dropdown(choices=MODEL_TYPES, label="Choose Model")
+    ],
+    outputs=gr.Textbox(label="Predicted License Plate"),
+    title="Automatic License Plate Recognition",
+    description="Provide the file path of a license plate image, choose a model, and the system will predict the text on it. These Models are all trained on the same dataset, one model might be better compared to the other",
+    examples=[
+        ['/content/B8837NR.jpg', ''],
+        ['/content/E5105OD.jpg', '']
+    ]
+)
+# Launch the Gradio app
+interface.launch()