|
--- |
|
license: apache-2.0 |
|
tags: |
|
- vision |
|
datasets: |
|
- dmitva/the-mnist-database |
|
inference: false |
|
--- |
|
|
|
The MNIST OCR (Optical Character Recognition) model is a deep learning model trained to recognise and classify handwritten digits from 0 to 9. This model is trained on the MNIST dataset, which consists of 60,000 small square 28×28 pixel grayscale images of handwritten single digits, making it highly accurate for recognising written, isolated digits in a similar style to those found in the training set. |
|
|
|
 |
|
|
|
### Install Packages |
|
|
|
```sh |
|
pip install numpy opencv-python requests pillow transformers tensorflow |
|
``` |
|
|
|
### Usage |
|
|
|
```python |
|
import numpy as np |
|
import cv2 |
|
import requests |
|
from PIL import Image |
|
from io import BytesIO |
|
from transformers import TFAutoModelForImageClassification, AutoFeatureExtractor |
|
|
|
class MNISTPredictor: |
|
def __init__(self, model_name): |
|
self.model = TFAutoModelForImageClassification.from_pretrained(model_name) |
|
self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name) |
|
|
|
def extract_features(self, image): |
|
"""Extract features from the image for multiple digits.""" |
|
# Convert to grayscale |
|
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY) |
|
|
|
# Apply Gaussian blur |
|
blurred = cv2.GaussianBlur(gray, (5, 5), 0) |
|
|
|
# Apply adaptive thresholding |
|
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) |
|
|
|
# Find contours |
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
digit_images = [] |
|
for contour in contours: |
|
# Filter small contours |
|
if cv2.contourArea(contour) > 50: # Adjust this threshold as needed |
|
x, y, w, h = cv2.boundingRect(contour) |
|
roi = thresh[y:y+h, x:x+w] |
|
resized = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA) |
|
digit_images.append(Image.fromarray(resized).convert('RGB')) |
|
|
|
return digit_images |
|
|
|
def predict(self, image): |
|
"""Predict digits in the image.""" |
|
try: |
|
digit_images = self.extract_features(image) |
|
predictions = [] |
|
for digit_image in digit_images: |
|
inputs = self.feature_extractor(images=digit_image, return_tensors="tf") |
|
outputs = self.model(**inputs) |
|
predicted_class = int(np.argmax(outputs.logits)) |
|
predictions.append(predicted_class) |
|
return predictions |
|
except Exception as e: |
|
print(f"Error during prediction: {e}") |
|
return None |
|
|
|
def download_image(url): |
|
"""Download an image from a URL.""" |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
return Image.open(BytesIO(response.content)) |
|
except Exception as e: |
|
print(f"Error downloading image: {e}") |
|
return None |
|
|
|
def save_predictions_to_file(predictions, output_path): |
|
"""Save predictions to a text file.""" |
|
try: |
|
with open(output_path, 'w') as f: |
|
f.write(f"Predicted digits are: {', '.join(map(str, predictions))}\n") |
|
except Exception as e: |
|
print(f"Error saving predictions to file: {e}") |
|
|
|
def main(image_url, model_name, output_path): |
|
try: |
|
predictor = MNISTPredictor(model_name) |
|
|
|
# Download image |
|
image = download_image(image_url) |
|
if image is None: |
|
raise Exception("Failed to download image") |
|
|
|
print(f"Image downloaded successfully.") |
|
|
|
# Predict digits |
|
digits = predictor.predict(image) |
|
print(f"Predicted digits are: {digits}") |
|
|
|
# Save predictions to file |
|
save_predictions_to_file(digits, output_path) |
|
print(f"Predictions saved to {output_path}") |
|
except Exception as e: |
|
print(f"An error occurred: {e}") |
|
|
|
if __name__ == "__main__": |
|
image_url = "https://miro.medium.com/v2/resize:fit:720/format:webp/1*w7pBsjI3t3ZP-4Gdog-JdQ.png" |
|
model_name = "0xnu/mnist-ocr" |
|
output_path = "predictions.txt" |
|
|
|
main(image_url, model_name, output_path) |
|
``` |
|
|
|
### Copyright |
|
|
|
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu). All Rights Reserved. |
|
|