|
import os |
|
import pickle |
|
import random |
|
import shutil |
|
|
|
import cv2 |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
|
|
from data.dataset import get_transform |
|
|
|
|
|
def summarize_dataset(data: dict): |
|
print(f"Training authors: {len(data['train'].keys())} \t Testing authors: {len(data['test'].keys())}") |
|
training_images = sum([len(data['train'][k]) for k in data['train'].keys()]) |
|
testing_images = sum([len(data['test'][k]) for k in data['test'].keys()]) |
|
print(f"Training images: {training_images} \t Testing images: {testing_images}") |
|
|
|
|
|
def compare_data(path_a: str, path_b: str): |
|
with open(path_a, 'rb') as f: |
|
data_a = pickle.load(f) |
|
summarize_dataset(data_a) |
|
|
|
with open(path_b, 'rb') as f: |
|
data_b = pickle.load(f) |
|
summarize_dataset(data_b) |
|
|
|
training_a = data_a['train'] |
|
training_b = data_b['train'] |
|
|
|
training_a = {int(k): v for k, v in training_a.items()} |
|
training_b = {int(k): v for k, v in training_b.items()} |
|
|
|
while True: |
|
author = random.choice(list(training_a.keys())) |
|
|
|
if author in training_b.keys(): |
|
author_images_a = [np.array(im_dict["img"]) for im_dict in training_a[author]] |
|
author_images_b = [np.array(im_dict["img"]) for im_dict in training_b[author]] |
|
|
|
labels_a = [str(im_dict["label"]) for im_dict in training_a[author]] |
|
labels_b = [str(im_dict["label"]) for im_dict in training_b[author]] |
|
|
|
vis_a = np.hstack(author_images_a[:10]) |
|
vis_b = np.hstack(author_images_b[:10]) |
|
|
|
cv2.imshow("Author a", vis_a) |
|
cv2.imshow("Author b", vis_b) |
|
|
|
cv2.waitKey(0) |
|
|
|
else: |
|
print(f"Author: {author} not found in second dataset") |
|
|
|
|
|
def show_dataset(path: str, samples: int = 10): |
|
with open(path, 'rb') as f: |
|
data = pickle.load(f) |
|
summarize_dataset(data) |
|
|
|
training = data['train'] |
|
|
|
author = training['013'] |
|
author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in author] |
|
|
|
for img in author_images: |
|
cv2.imshow('image', img) |
|
cv2.waitKey(0) |
|
|
|
for author in list(training.keys()): |
|
|
|
author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]] |
|
labels = [str(im_dict["label"]) for im_dict in training[author]] |
|
|
|
vis = np.hstack(author_images[:samples]) |
|
print(f"Author: {author}") |
|
cv2.destroyAllWindows() |
|
cv2.imshow("vis", vis) |
|
cv2.waitKey(0) |
|
|
|
|
|
def test_transform(path: str): |
|
with open(path, 'rb') as f: |
|
data = pickle.load(f) |
|
summarize_dataset(data) |
|
|
|
training = data['train'] |
|
transform = get_transform(grayscale=True) |
|
|
|
for author_id in training.keys(): |
|
author = training[author_id] |
|
for image_dict in author: |
|
original_image = image_dict['img'].convert('L') |
|
transformed_image = transform(original_image).detach().numpy() |
|
restored_image = (((transformed_image + 1) / 2) * 255).astype(np.uint8) |
|
restored_image = np.squeeze(restored_image) |
|
original_image = np.array(original_image) |
|
|
|
wrong_pixels = (original_image != restored_image).astype(np.uint8) * 255 |
|
|
|
combined = np.hstack((restored_image, original_image, wrong_pixels)) |
|
|
|
cv2.imshow("original", original_image) |
|
cv2.imshow("restored", restored_image) |
|
cv2.imshow("combined", combined) |
|
|
|
f, ax = plt.subplots(1, 2) |
|
ax[0].hist(original_image.flatten()) |
|
ax[1].hist(restored_image.flatten()) |
|
plt.show() |
|
|
|
cv2.waitKey(0) |
|
|
|
def dump_words(): |
|
data_path = r"..\files\IAM-32.pickle" |
|
|
|
p_mark = 'point' |
|
p = '.' |
|
|
|
with open(data_path, 'rb') as f: |
|
data = pickle.load(f) |
|
|
|
training = data['train'] |
|
|
|
target_folder = f"../saved_images/debug/{p_mark}" |
|
|
|
if os.path.exists(target_folder): |
|
shutil.rmtree(target_folder) |
|
|
|
os.mkdir(target_folder) |
|
|
|
count = 0 |
|
|
|
for author in list(training.keys()): |
|
|
|
author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]] |
|
labels = [str(im_dict["label"]) for im_dict in training[author]] |
|
|
|
for img, label in zip(author_images, labels): |
|
if p in label: |
|
cv2.imwrite(os.path.join(target_folder, f"{count}.png"), img) |
|
count += 1 |
|
|
|
|
|
if __name__ == "__main__": |
|
test_transform("../files/IAM-32.pickle") |
|
|
|
|
|
|