0xnu
/

Image Classification
Keras
vision
mnist-ocr / README.md
0xnu's picture
Update README.md
6b1a29b verified
|
raw
history blame
4.3 kB
---
license: apache-2.0
tags:
- vision
datasets:
- dmitva/the-mnist-database
inference: false
---
The MNIST OCR (Optical Character Recognition) model is a deep learning model trained to recognise and classify handwritten digits from 0 to 9. This model is trained on the MNIST dataset, which consists of 60,000 small square 28×28 pixel grayscale images of handwritten single digits, making it highly accurate for recognising written, isolated digits in a similar style to those found in the training set.
![Training History](training_history.png "Training History")
### Install Packages
```sh
pip install numpy opencv-python requests pillow transformers tensorflow
```
### Usage
```python
import numpy as np
import cv2
import requests
from PIL import Image
from io import BytesIO
from transformers import TFAutoModelForImageClassification, AutoFeatureExtractor
class MNISTPredictor:
def __init__(self, model_name):
self.model = TFAutoModelForImageClassification.from_pretrained(model_name)
self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
def extract_features(self, image):
"""Extract features from the image for multiple digits."""
# Convert to grayscale
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
# Apply Gaussian blur
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Apply adaptive thresholding
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
digit_images = []
for contour in contours:
# Filter small contours
if cv2.contourArea(contour) > 50: # Adjust this threshold as needed
x, y, w, h = cv2.boundingRect(contour)
roi = thresh[y:y+h, x:x+w]
resized = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
digit_images.append(Image.fromarray(resized).convert('RGB'))
return digit_images
def predict(self, image):
"""Predict digits in the image."""
try:
digit_images = self.extract_features(image)
predictions = []
for digit_image in digit_images:
inputs = self.feature_extractor(images=digit_image, return_tensors="tf")
outputs = self.model(**inputs)
predicted_class = int(np.argmax(outputs.logits))
predictions.append(predicted_class)
return predictions
except Exception as e:
print(f"Error during prediction: {e}")
return None
def download_image(url):
"""Download an image from a URL."""
try:
response = requests.get(url)
response.raise_for_status()
return Image.open(BytesIO(response.content))
except Exception as e:
print(f"Error downloading image: {e}")
return None
def save_predictions_to_file(predictions, output_path):
"""Save predictions to a text file."""
try:
with open(output_path, 'w') as f:
f.write(f"Predicted digits are: {', '.join(map(str, predictions))}\n")
except Exception as e:
print(f"Error saving predictions to file: {e}")
def main(image_url, model_name, output_path):
try:
predictor = MNISTPredictor(model_name)
# Download image
image = download_image(image_url)
if image is None:
raise Exception("Failed to download image")
print(f"Image downloaded successfully.")
# Predict digits
digits = predictor.predict(image)
print(f"Predicted digits are: {digits}")
# Save predictions to file
save_predictions_to_file(digits, output_path)
print(f"Predictions saved to {output_path}")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
image_url = "https://miro.medium.com/v2/resize:fit:720/format:webp/1*w7pBsjI3t3ZP-4Gdog-JdQ.png"
model_name = "0xnu/mnist-ocr"
output_path = "predictions.txt"
main(image_url, model_name, output_path)
```
### Copyright
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu). All Rights Reserved.