Spaces:

offry
/

Arcaid

Runtime error

App Files Files Community

Arcaid / app.py

offry

Update app.py

0c2a598 almost 2 years ago

raw

history blame

16.1 kB

	import os
	import pickle
	from operator import itemgetter

	import cv2
	import gradio as gr
	import kornia.filters
	import kornia.filters
	import scipy.ndimage
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import numpy as np
	import matplotlib.pyplot as plt
	import random
	import zipfile
	# from skimage.transform import resize
	from torchvision import transforms, models


	def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
	"""3x3 convolution with padding"""
	return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
	padding=dilation, groups=groups, bias=False, dilation=dilation)


	def conv1x1(in_planes, out_planes, stride=1):
	"""1x1 convolution"""
	return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


	class DoubleConv(nn.Module):
	"""(convolution => [BN] => ReLU) * 2"""

	def __init__(self, in_channels, out_channels, mid_channels=None):
	super().__init__()
	if not mid_channels:
	mid_channels = out_channels
	norm_layer = nn.BatchNorm2d

	self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False)
	self.bn1 = nn.BatchNorm2d(mid_channels)
	self.inst1 = nn.InstanceNorm2d(mid_channels)
	# self.gn1 = nn.GroupNorm(4, mid_channels)
	self.relu = nn.ReLU(inplace=True)
	self.conv2 = nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False)
	self.bn2 = nn.BatchNorm2d(out_channels)
	self.inst2 = nn.InstanceNorm2d(out_channels)
	# self.gn2 = nn.GroupNorm(4, out_channels)
	self.downsample = None
	if in_channels != out_channels:
	self.downsample = nn.Sequential(
	nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
	nn.BatchNorm2d(out_channels),
	)

	def forward(self, x):
	identity = x

	out = self.conv1(x)
	# out = self.bn1(out)
	out = self.inst1(out)
	# out = self.gn1(out)
	out = self.relu(out)

	out = self.conv2(out)
	# out = self.bn2(out)
	out = self.inst2(out)
	# out = self.gn2(out)
	if self.downsample is not None:
	identity = self.downsample(x)

	out += identity
	out = self.relu(out)
	return out


	class Down(nn.Module):
	"""Downscaling with maxpool then double conv"""

	def __init__(self, in_channels, out_channels):
	super().__init__()
	self.maxpool_conv = nn.Sequential(
	nn.MaxPool2d(2),
	DoubleConv(in_channels, out_channels)
	)

	def forward(self, x):
	return self.maxpool_conv(x)


	class Up(nn.Module):
	"""Upscaling then double conv"""

	def __init__(self, in_channels, out_channels, bilinear=True):
	super().__init__()

	# if bilinear, use the normal convolutions to reduce the number of channels
	if bilinear:
	self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
	self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
	else:
	if in_channels == out_channels:
	self.up = nn.Identity()
	else:
	self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
	self.conv = DoubleConv(in_channels, out_channels)

	def forward(self, x1, x2):
	x1 = self.up(x1)
	# input is CHW
	diffY = x2.size()[2] - x1.size()[2]
	diffX = x2.size()[3] - x1.size()[3]

	x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
	diffY // 2, diffY - diffY // 2])
	# if you have padding issues, see
	# https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
	# https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
	x = torch.cat([x2, x1], dim=1)
	return self.conv(x)


	class OutConv(nn.Module):
	def __init__(self, in_channels, out_channels):
	super(OutConv, self).__init__()
	self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

	def forward(self, x):
	return self.conv(x)

	class GaussianLayer(nn.Module):
	def __init__(self):
	super(GaussianLayer, self).__init__()
	self.seq = nn.Sequential(
	# nn.ReflectionPad2d(10),
	nn.Conv2d(1, 1, 5, stride=1, padding=2, bias=False)
	)

	self.weights_init()
	def forward(self, x):
	return self.seq(x)

	def weights_init(self):
	n= np.zeros((5,5))
	n[3,3] = 1
	k = scipy.ndimage.gaussian_filter(n,sigma=1)
	for name, f in self.named_parameters():
	f.data.copy_(torch.from_numpy(k))

	def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
	"""3x3 convolution with padding"""
	return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
	padding=dilation, groups=groups, bias=False, dilation=dilation)

	class Decoder(nn.Module):
	def __init__(self):
	super(Decoder, self).__init__()
	self.up1 = Up(2048, 1024 // 1, False)
	self.up2 = Up(1024, 512 // 1, False)
	self.up3 = Up(512, 256 // 1, False)
	self.conv2d_2_1 = conv3x3(256, 128)
	self.gn1 = nn.GroupNorm(4, 128)
	self.instance1 = nn.InstanceNorm2d(128)
	self.up4 = Up(128, 64 // 1, False)
	self.upsample4 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
	# self.upsample4 = nn.ConvTranspose2d(64, 64, 2, stride=2)
	self.upsample4_conv = DoubleConv(64, 64, 64 // 2)
	self.up_ = Up(128, 128 // 1, False)
	self.conv2d_2_2 = conv3x3(128, 6)
	self.instance2 = nn.InstanceNorm2d(6)
	self.gn2 = nn.GroupNorm(3, 6)
	self.gaussian_blur = GaussianLayer()
	self.up5 = Up(6, 3, False)
	self.conv2d_2_3 = conv3x3(3, 1)
	self.instance3 = nn.InstanceNorm2d(1)
	self.gaussian_blur = GaussianLayer()
	self.kernel = nn.Parameter(torch.tensor(
	[[[0.0, 0.0, 0.0], [0.0, 1.0, random.uniform(-1.0, 0.0)], [0.0, 0.0, 0.0]],
	[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, random.uniform(-1.0, 0.0)]],
	[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, random.uniform(random.uniform(-1.0, 0.0), -0.0), 0.0]],
	[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [random.uniform(-1.0, 0.0), 0.0, 0.0]],
	[[0.0, 0.0, 0.0], [random.uniform(-1.0, 0.0), 1.0, 0.0], [0.0, 0.0, 0.0]],
	[[random.uniform(-1.0, 0.0), 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]],
	[[0.0, random.uniform(-1.0, 0.0), 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]],
	[[0.0, 0.0, random.uniform(-1.0, 0.0)], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], ],
	).unsqueeze(1))

	self.nms_conv = nn.Conv2d(1, 1, kernel_size=3, stride=1, padding=1, bias=False, groups=1)
	with torch.no_grad():
	self.nms_conv.weight = self.kernel.float()


	class Resnet_with_skip(nn.Module):
	def __init__(self, model):
	super(Resnet_with_skip, self).__init__()
	self.model = model
	self.decoder = Decoder()

	def forward_pred(self, image):
	pred_net = self.model(image)
	return pred_net

	def forward_decode(self, image):
	identity = image

	image = self.model.conv1(image)
	image = self.model.bn1(image)
	image = self.model.relu(image)
	image1 = self.model.maxpool(image)

	image2 = self.model.layer1(image1)
	image3 = self.model.layer2(image2)
	image4 = self.model.layer3(image3)
	image5 = self.model.layer4(image4)

	reconst1 = self.decoder.up1(image5, image4)
	reconst2 = self.decoder.up2(reconst1, image3)
	reconst3 = self.decoder.up3(reconst2, image2)
	reconst = self.decoder.conv2d_2_1(reconst3)
	# reconst = self.decoder.instance1(reconst)
	reconst = self.decoder.gn1(reconst)
	reconst = F.relu(reconst)
	reconst4 = self.decoder.up4(reconst, image1)
	# reconst5 = self.decoder.upsample4(reconst4)
	reconst5 = self.decoder.upsample4(reconst4)
	# reconst5 = self.decoder.upsample4_conv(reconst4)
	reconst5 = self.decoder.up_(reconst5, image)
	# reconst5 = reconst5 + image
	reconst5 = self.decoder.conv2d_2_2(reconst5)
	reconst5 = self.decoder.instance2(reconst5)
	# reconst5 = self.decoder.gn2(reconst5)
	reconst5 = F.relu(reconst5)
	reconst = self.decoder.up5(reconst5, identity)
	reconst = self.decoder.conv2d_2_3(reconst)
	# reconst = self.decoder.instance3(reconst)
	reconst = F.relu(reconst)

	# return reconst

	blurred = self.decoder.gaussian_blur(reconst)

	gradients = kornia.filters.spatial_gradient(blurred, normalized=False)
	# Unpack the edges
	gx = gradients[:, :, 0]
	gy = gradients[:, :, 1]

	angle = torch.atan2(gy, gx)

	# Radians to Degrees
	import math
	angle = 180.0 * angle / math.pi

	# Round angle to the nearest 45 degree
	angle = torch.round(angle / 45) * 45
	nms_magnitude = self.decoder.nms_conv(blurred)
	# nms_magnitude = F.conv2d(blurred, kernel.unsqueeze(1), padding=kernel.shape[-1]//2)

	# Non-maximal suppression
	# Get the indices for both directions
	positive_idx = (angle / 45) % 8
	positive_idx = positive_idx.long()

	negative_idx = ((angle / 45) + 4) % 8
	negative_idx = negative_idx.long()

	# Apply the non-maximum suppression to the different directions
	channel_select_filtered_positive = torch.gather(nms_magnitude, 1, positive_idx)
	channel_select_filtered_negative = torch.gather(nms_magnitude, 1, negative_idx)

	channel_select_filtered = torch.stack(
	[channel_select_filtered_positive, channel_select_filtered_negative], 1
	)

	# is_max = channel_select_filtered.min(dim=1)[0] > 0.0

	# magnitude = reconst * is_max

	thresh = nn.Threshold(0.01, 0.01)
	max_matrix = channel_select_filtered.min(dim=1)[0]
	max_matrix = thresh(max_matrix)
	magnitude = torch.mul(reconst, max_matrix)
	# magnitude = torchvision.transforms.functional.invert(magnitude)
	# magnitude = self.decoder.sharpen(magnitude)
	# magnitude = self.decoder.threshold(magnitude)
	magnitude = kornia.enhance.adjust_gamma(magnitude, 2.0)
	# magnitude = F.leaky_relu(magnitude)
	return magnitude

	def forward(self, image):
	reconst = self.forward_decode(image)
	pred = self.forward_pred(image)
	return pred, reconst


	def create_retrieval_figure(res):
	fig = plt.figure(figsize=[10 * 3, 10 * 3])
	cols = 5
	rows = 2
	ax_query = fig.add_subplot(rows, 1, 1)
	plt.rcParams['figure.facecolor'] = 'white'
	plt.axis('off')
	ax_query.set_title('Top 10 most similar scarabs', fontsize=40)
	names = ""
	for i, image in zip(range(len(res)), res):
	current_image_path = image.split("/")[3]+"/"+image.split("/")[4]
	if i==0: continue
	if i < 11:
	archive = zipfile.ZipFile('dataset.zip', 'r')
	imgfile = archive.read(current_image_path)
	image = cv2.imdecode(np.frombuffer(imgfile, np.uint8), 1)
	# image_resized = cv2.resize(image, (224, 224), interpolation=cv2.INTER_LINEAR)
	ax = fig.add_subplot(rows, cols, i)
	plt.axis('off')
	plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	item_uuid = current_image_path.split("/")[1].split("_photoUUID")[0].split("itemUUID_")[1]
	ax.set_title('Top {}'.format(i), fontsize=40)
	names = names + "Top " + str(i) + " item UUID is " + item_uuid + "\n"
	return fig, names

	def knn_calc(image_name, query_feature, features):
	current_image_feature = features[image_name]
	criterion = torch.nn.CosineSimilarity(dim=1)
	dist = criterion(query_feature, current_image_feature).mean()
	dist = -dist.item()
	return dist

	checkpoint_path = "multi_label.pth.tar"

	resnet = models.resnet101(pretrained=True)
	num_ftrs = resnet.fc.in_features
	resnet.fc = nn.Linear(num_ftrs, 13)
	model = Resnet_with_skip(resnet)
	checkpoint = torch.load(checkpoint_path, map_location="cpu")
	model.load_state_dict(checkpoint)
	embedding_model_test = torch.nn.Sequential(*(list(model.children())[:-1]))

	periods_model = models.resnet101(pretrained=True)
	periods_model.fc = nn.Linear(num_ftrs, 5)
	periods_checkpoint = torch.load("periods.pth.tar", map_location="cpu")
	periods_model.load_state_dict(periods_checkpoint)

	with open('query_images_paths.pkl', 'rb') as fp:
	query_images_paths = pickle.load(fp)

	with open('features.pkl', 'rb') as fp:
	features = pickle.load(fp)



	model.eval()
	transform = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.Grayscale(num_output_channels=3),
	transforms.ToTensor(),
	transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
	])
	invTrans = transforms.Compose([transforms.Normalize(mean=[0., 0., 0.],
	std=[1 / 0.5, 1 / 0.5, 1 / 0.5]),
	transforms.Normalize(mean=[-0.5, -0.5, -0.5],
	std=[1., 1., 1.]),
	])

	labels = ['ankh', 'anthropomorphic', 'bands', 'beetle', 'bird', 'circles', 'cross', 'duck', 'head', 'ibex', 'lion', 'sa', 'snake']

	periods_labels = ["MB1", "MB2", "LB", "Iron1", 'Iron2']
	periods_model.eval()

	def predict(inp):
	image_tensor = transform(inp)
	with torch.no_grad():
	classification, reconstruction = model(image_tensor.unsqueeze(0))
	periods_classification = periods_model(image_tensor.unsqueeze(0))
	recon_tensor = reconstruction[0].repeat(3, 1, 1)
	recon_tensor = invTrans(kornia.enhance.invert(recon_tensor))
	plot_recon = recon_tensor.permute(1, 2, 0).detach().numpy()
	w, h = inp.size
	# plot_recon = resize(plot_recon, (h, w))
	m = nn.Sigmoid()
	y = m(classification)
	preds = []
	for sample in y:
	for i in sample:
	if i >=0.8:
	preds.append(1)
	else:
	preds.append(0)
	confidences = {}
	true_labels = ""
	for i in range(len(labels)):
	if preds[i]==1:
	if true_labels=="":
	true_labels = true_labels + labels[i]
	else:
	true_labels = true_labels + "&" + labels[i]
	confidences[true_labels] = torch.tensor(1.0)

	periods_prediction = torch.nn.functional.softmax(periods_classification[0], dim=0)
	periods_confidences = {periods_labels[i]: periods_prediction[i] for i in range(len(periods_labels))}
	feature = embedding_model_test(image_tensor.unsqueeze(0))
	dists = dict()
	with torch.no_grad():
	for i, image_name in enumerate(query_images_paths):
	dist = knn_calc(image_name, feature, features)
	dists[image_name] = dist
	res = dict(sorted(dists.items(), key=itemgetter(1)))
	fig, names = create_retrieval_figure(res)
	return confidences, periods_confidences, plot_recon, fig, names


	gr.Interface(fn=predict,
	inputs=gr.Image(type="pil"),
	title="ArcAid: Analysis of Archaeological Artifacts using Drawings",
	description="Easily classify artifacs, retrieve similar ones and generate drawings. "
	"https://arxiv.org/abs/2211.09480.",
	# examples=['anth.jpg', 'beetle_snakes.jpg', 'bird.jpg', 'cross.jpg', 'ibex.jpg',
	# 'lion.jpg', 'lion2.jpg', 'sa.jpg'],
	outputs=[gr.Label(num_top_classes=1), gr.Label(num_top_classes=1), "image", 'plot', 'text'], ).launch(share=True, enable_queue=True)