--- license: apache-2.0 tags: - vision datasets: - dmitva/the-mnist-database inference: false --- The MNIST OCR (Optical Character Recognition) model is a deep learning model trained to recognise and classify handwritten digits from 0 to 9. This model is trained on the MNIST dataset, which consists of 60,000 small square 28×28 pixel grayscale images of handwritten single digits, making it highly accurate for recognising written, isolated digits in a similar style to those found in the training set. ![Training History](training_history.png "Training History") ### Install Packages ```sh pip install numpy opencv-python requests pillow transformers tensorflow ``` ### Usage ```python import numpy as np import cv2 import requests from PIL import Image from io import BytesIO from transformers import TFAutoModelForImageClassification, AutoFeatureExtractor class MNISTPredictor: def __init__(self, model_name): self.model = TFAutoModelForImageClassification.from_pretrained(model_name) self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name) def extract_features(self, image): """Extract features from the image for multiple digits.""" # Convert to grayscale gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY) # Apply Gaussian blur blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Apply adaptive thresholding thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) # Find contours contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) digit_images = [] for contour in contours: # Filter small contours if cv2.contourArea(contour) > 50: # Adjust this threshold as needed x, y, w, h = cv2.boundingRect(contour) roi = thresh[y:y+h, x:x+w] resized = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA) digit_images.append(Image.fromarray(resized).convert('RGB')) return digit_images def predict(self, image): """Predict digits in the image.""" try: digit_images = self.extract_features(image) predictions = [] for digit_image in digit_images: inputs = self.feature_extractor(images=digit_image, return_tensors="tf") outputs = self.model(**inputs) predicted_class = int(np.argmax(outputs.logits)) predictions.append(predicted_class) return predictions except Exception as e: print(f"Error during prediction: {e}") return None def download_image(url): """Download an image from a URL.""" try: response = requests.get(url) response.raise_for_status() return Image.open(BytesIO(response.content)) except Exception as e: print(f"Error downloading image: {e}") return None def save_predictions_to_file(predictions, output_path): """Save predictions to a text file.""" try: with open(output_path, 'w') as f: f.write(f"Predicted digits are: {', '.join(map(str, predictions))}\n") except Exception as e: print(f"Error saving predictions to file: {e}") def main(image_url, model_name, output_path): try: predictor = MNISTPredictor(model_name) # Download image image = download_image(image_url) if image is None: raise Exception("Failed to download image") print(f"Image downloaded successfully.") # Predict digits digits = predictor.predict(image) print(f"Predicted digits are: {digits}") # Save predictions to file save_predictions_to_file(digits, output_path) print(f"Predictions saved to {output_path}") except Exception as e: print(f"An error occurred: {e}") if __name__ == "__main__": image_url = "https://miro.medium.com/v2/resize:fit:720/format:webp/1*w7pBsjI3t3ZP-4Gdog-JdQ.png" model_name = "0xnu/mnist-ocr" output_path = "predictions.txt" main(image_url, model_name, output_path) ``` ### Copyright (c) 2024 [Finbarrs Oketunji](https://finbarrs.eu). All Rights Reserved.