Spaces:
Runtime error
Runtime error
Initial commit
Browse files- .gitignore +3 -0
- app.py +101 -0
- flagged/content/tmpu5ej5fhy.jpg +0 -0
- flagged/log.csv +2 -0
- flagged/style/tmpesg98402.jpg +0 -0
- model.py +354 -0
- requirements.txt +2 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
NeuralStyleTransfer.ipynb
|
2 |
+
images
|
3 |
+
__pycache__
|
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from model import NeuralStyleTransfer
|
3 |
+
import tensorflow as tf
|
4 |
+
|
5 |
+
|
6 |
+
def model_fn(
|
7 |
+
style, content, extractor="inception_v3", n_content_layers=3, n_style_layers=2,
|
8 |
+
epochs=4, learning_rate=60.0, steps_per_epoch=100, style_weight=1e-2,
|
9 |
+
):
|
10 |
+
model = NeuralStyleTransfer(
|
11 |
+
style_image=style,
|
12 |
+
content_image=content,
|
13 |
+
extractor=extractor,
|
14 |
+
n_content_layers=n_content_layers,
|
15 |
+
n_style_layers=n_style_layers,
|
16 |
+
)
|
17 |
+
|
18 |
+
return model.fit_style_transfer(
|
19 |
+
epochs=10,
|
20 |
+
learning_rate=80.0,
|
21 |
+
steps_per_epoch=100,
|
22 |
+
style_weight=1e-2,
|
23 |
+
content_weight=1e-4,
|
24 |
+
show_image=True,
|
25 |
+
show_interval=90,
|
26 |
+
var_weight=1e-12,
|
27 |
+
terminal=False,
|
28 |
+
)
|
29 |
+
|
30 |
+
|
31 |
+
def hugging_face():
|
32 |
+
demo = gr.Interface(
|
33 |
+
fn=model_fn,
|
34 |
+
inputs=[
|
35 |
+
"image",
|
36 |
+
"image",
|
37 |
+
gr.Dropdown(
|
38 |
+
["inception_v3", "vgg19", "resnet50", "mobilenet_v2"],
|
39 |
+
label="extractor",
|
40 |
+
default="inception_v3",
|
41 |
+
info="Feature extractor to use.",
|
42 |
+
),
|
43 |
+
gr.Slider(
|
44 |
+
1,
|
45 |
+
5,
|
46 |
+
value=3,
|
47 |
+
label="n_content_layers",
|
48 |
+
info="Number of content layers to use.",
|
49 |
+
),
|
50 |
+
gr.Slider(
|
51 |
+
1,
|
52 |
+
5,
|
53 |
+
value=2,
|
54 |
+
label="n_style_layers",
|
55 |
+
info="Number of style layers to use.",
|
56 |
+
),
|
57 |
+
gr.Slider(
|
58 |
+
2, 20, value=4, label="epochs", info="Number of epochs to train for."
|
59 |
+
),
|
60 |
+
gr.Slider(
|
61 |
+
1, 100, value=60, label="learning_rate", info="Initial Learning rate."
|
62 |
+
),
|
63 |
+
gr.Slider(
|
64 |
+
1,
|
65 |
+
100,
|
66 |
+
value=100,
|
67 |
+
label="steps_per_epoch",
|
68 |
+
info="Number of steps per epoch.",
|
69 |
+
),
|
70 |
+
gr.Slider(
|
71 |
+
1e-4,
|
72 |
+
1e-2,
|
73 |
+
value=1e-2,
|
74 |
+
label="style_weight",
|
75 |
+
info="Weight of style loss.",
|
76 |
+
),
|
77 |
+
gr.Slider(
|
78 |
+
1e-4,
|
79 |
+
1e-2,
|
80 |
+
value=1e-4,
|
81 |
+
label="content_weight",
|
82 |
+
info="Weight of content loss.",
|
83 |
+
),
|
84 |
+
gr.Slider(
|
85 |
+
1e-12,
|
86 |
+
1e-9,
|
87 |
+
value=1e-12,
|
88 |
+
label="var_weight",
|
89 |
+
info="Weight of total variation loss.",
|
90 |
+
),
|
91 |
+
],
|
92 |
+
outputs="image",
|
93 |
+
)
|
94 |
+
|
95 |
+
return demo
|
96 |
+
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
demo = hugging_face()
|
100 |
+
demo.launch( share=True)
|
101 |
+
|
flagged/content/tmpu5ej5fhy.jpg
ADDED
![]() |
flagged/log.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
style,content,extractor,n_content_layers,n_style_layers,epochs,learning_rate,steps_per_epoch,style_weight,content_weight,var_weight,output,flag,username,timestamp
|
2 |
+
/home/shailja/Courses/Notes/Projects/StyleTransfer/flagged/style/tmpesg98402.jpg,/home/shailja/Courses/Notes/Projects/StyleTransfer/flagged/content/tmpu5ej5fhy.jpg,inception_v3,3,2,4,60,100,0.01,0.00039,5.8e-11,,,,2023-05-28 08:18:26.193609
|
flagged/style/tmpesg98402.jpg
ADDED
![]() |
model.py
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import imageio
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from keras import backend as K
|
6 |
+
from IPython.display import display as display_fn
|
7 |
+
|
8 |
+
|
9 |
+
class NeuralStyleTransfer:
|
10 |
+
def __init__(self, style_image, content_image, extractor, n_style_layers=5, n_content_layers=5, display=True):
|
11 |
+
# load the model
|
12 |
+
if extractor == "inception_v3":
|
13 |
+
self.feature_extractor = tf.keras.applications.InceptionV3(
|
14 |
+
include_top=False, weights="imagenet"
|
15 |
+
)
|
16 |
+
elif isinstance(extractor, tf.keras.Model):
|
17 |
+
self.feature_extractor = extractor
|
18 |
+
else:
|
19 |
+
raise Exception("Features Extractor not found")
|
20 |
+
|
21 |
+
# freeze the model
|
22 |
+
self.feature_extractor.trainable = False
|
23 |
+
|
24 |
+
# define the style and content depth
|
25 |
+
self.n_style_layers = n_style_layers
|
26 |
+
self.n_content_layers = n_content_layers
|
27 |
+
|
28 |
+
self.style_image = self._load_img(style_image)
|
29 |
+
self.content_image = self._load_img(content_image)
|
30 |
+
|
31 |
+
|
32 |
+
if display:
|
33 |
+
self.show_images_with_objects(
|
34 |
+
[self.style_image, self.content_image],
|
35 |
+
["Style Image", "Content Image"],
|
36 |
+
)
|
37 |
+
|
38 |
+
def tensor_to_image(self, tensor):
|
39 |
+
"""converts a tensor to an image"""
|
40 |
+
tensor_shape = tf.shape(tensor)
|
41 |
+
number_elem_shape = tf.shape(tensor_shape)
|
42 |
+
if number_elem_shape > 3:
|
43 |
+
assert tensor_shape[0] == 1
|
44 |
+
tensor = tensor[0]
|
45 |
+
return tf.keras.preprocessing.image.array_to_img(tensor)
|
46 |
+
|
47 |
+
def _load_img(self, image):
|
48 |
+
max_dim = 512
|
49 |
+
|
50 |
+
image = tf.io.read_file(image)
|
51 |
+
image = tf.image.decode_image(image)
|
52 |
+
image = tf.image.convert_image_dtype(image, tf.float32)
|
53 |
+
|
54 |
+
image = tf.image.convert_image_dtype(image, tf.float32)
|
55 |
+
|
56 |
+
shape = tf.shape(image)[:-1]
|
57 |
+
shape = tf.cast(tf.shape(image)[:-1], tf.float32)
|
58 |
+
long_dim = max(shape)
|
59 |
+
scale = max_dim / long_dim
|
60 |
+
|
61 |
+
new_shape = tf.cast(shape * scale, tf.int32)
|
62 |
+
|
63 |
+
image = tf.image.resize(image, new_shape)
|
64 |
+
image = image[tf.newaxis, :]
|
65 |
+
image = tf.image.convert_image_dtype(image, tf.uint8)
|
66 |
+
|
67 |
+
return image
|
68 |
+
|
69 |
+
def imshow(self, image, title=None):
|
70 |
+
"""displays an image with a corresponding title"""
|
71 |
+
if len(image.shape) > 3:
|
72 |
+
image = tf.squeeze(image, axis=0)
|
73 |
+
|
74 |
+
plt.imshow(image)
|
75 |
+
if title:
|
76 |
+
plt.title(title)
|
77 |
+
|
78 |
+
def show_images_with_objects(self, images, titles=[]):
|
79 |
+
"""displays a row of images with corresponding titles"""
|
80 |
+
if len(images) != len(titles):
|
81 |
+
return
|
82 |
+
|
83 |
+
plt.figure(figsize=(20, 12))
|
84 |
+
for idx, (image, title) in enumerate(zip(images, titles)):
|
85 |
+
plt.subplot(1, len(images), idx + 1)
|
86 |
+
plt.xticks([])
|
87 |
+
plt.yticks([])
|
88 |
+
self.imshow(image, title)
|
89 |
+
|
90 |
+
def _preprocess_image(self, image):
|
91 |
+
image = tf.cast(image, dtype=tf.float32)
|
92 |
+
image = (image / 127.5) - 1.0
|
93 |
+
|
94 |
+
return image
|
95 |
+
|
96 |
+
def get_output_layers(self):
|
97 |
+
# get all the layers which contain conv in their name
|
98 |
+
all_layers = [
|
99 |
+
layer.name
|
100 |
+
for layer in self.feature_extractor.layers
|
101 |
+
if "conv" in layer.name
|
102 |
+
]
|
103 |
+
|
104 |
+
# define the style layers
|
105 |
+
style_layers = all_layers[: self.n_style_layers]
|
106 |
+
|
107 |
+
# define the content layers from second last layer
|
108 |
+
content_layers = all_layers[-2: -self.n_content_layers - 2 : -1]
|
109 |
+
|
110 |
+
content_and_style_layers = content_layers + style_layers
|
111 |
+
|
112 |
+
return content_and_style_layers
|
113 |
+
|
114 |
+
def build(self, layers_name):
|
115 |
+
|
116 |
+
output_layers = [
|
117 |
+
self.feature_extractor.get_layer(name).output for name in layers_name
|
118 |
+
]
|
119 |
+
|
120 |
+
model = tf.keras.Model(self.feature_extractor.input, output_layers)
|
121 |
+
|
122 |
+
self.feature_extractor = model
|
123 |
+
|
124 |
+
return
|
125 |
+
|
126 |
+
def _loss(self, target_img, features_img, type):
|
127 |
+
"""
|
128 |
+
Calculates the loss of the style transfer
|
129 |
+
|
130 |
+
target_img:
|
131 |
+
the target image (style or content) features
|
132 |
+
|
133 |
+
features_img:
|
134 |
+
the generated image features (style or content)
|
135 |
+
|
136 |
+
"""
|
137 |
+
|
138 |
+
loss = tf.reduce_mean(tf.square(features_img - target_img))
|
139 |
+
|
140 |
+
if type == "content":
|
141 |
+
return 0.5 * loss
|
142 |
+
|
143 |
+
return loss
|
144 |
+
|
145 |
+
def _gram_matrix(self, input_tensor):
|
146 |
+
"""
|
147 |
+
Calculates the gram matrix and divides by the number of locations
|
148 |
+
|
149 |
+
input_tensor:
|
150 |
+
the output of the conv layer of the style image, shape = (batch_size, height, width, channels)
|
151 |
+
|
152 |
+
"""
|
153 |
+
result = tf.linalg.einsum("bijc,bijd->bcd", input_tensor, input_tensor)
|
154 |
+
input_shape = tf.shape(input_tensor)
|
155 |
+
num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
|
156 |
+
return result / (num_locations)
|
157 |
+
|
158 |
+
def get_features(self, image, type):
|
159 |
+
preprocess_image = self._preprocess_image(image)
|
160 |
+
|
161 |
+
outputs = self.feature_extractor(preprocess_image)
|
162 |
+
|
163 |
+
if type == "style":
|
164 |
+
outputs = outputs[self.n_content_layers : ]
|
165 |
+
features = [self._gram_matrix(style_output) for style_output in outputs]
|
166 |
+
|
167 |
+
elif type == "content":
|
168 |
+
features = outputs[ : self.n_content_layers]
|
169 |
+
|
170 |
+
return features
|
171 |
+
|
172 |
+
def _style_content_loss(
|
173 |
+
self,
|
174 |
+
style_targets,
|
175 |
+
style_outputs,
|
176 |
+
content_targets,
|
177 |
+
content_outputs,
|
178 |
+
style_weight,
|
179 |
+
content_weight,
|
180 |
+
):
|
181 |
+
"""
|
182 |
+
Calculates the total loss of the style transfer
|
183 |
+
|
184 |
+
style_targets:
|
185 |
+
the style features of the style image
|
186 |
+
|
187 |
+
style_outputs:
|
188 |
+
the style features of the generated image
|
189 |
+
|
190 |
+
content_targets:
|
191 |
+
the content features of the content image
|
192 |
+
|
193 |
+
content_outputs:
|
194 |
+
the content features of the generated image
|
195 |
+
|
196 |
+
style_weight:
|
197 |
+
the weight of the style loss
|
198 |
+
|
199 |
+
content_weight:
|
200 |
+
the weight of the content loss
|
201 |
+
|
202 |
+
"""
|
203 |
+
|
204 |
+
# adding the loss of each layer
|
205 |
+
style_loss = style_weight * tf.add_n(
|
206 |
+
[
|
207 |
+
self._loss(style_target, style_output, type="style")
|
208 |
+
for style_target, style_output in zip(style_targets, style_outputs)
|
209 |
+
]
|
210 |
+
)
|
211 |
+
content_loss = content_weight * tf.add_n(
|
212 |
+
[
|
213 |
+
self._loss(content_target, content_output, type="content")
|
214 |
+
for content_target, content_output in zip(
|
215 |
+
content_targets, content_outputs
|
216 |
+
)
|
217 |
+
]
|
218 |
+
)
|
219 |
+
total_loss = style_loss + content_loss
|
220 |
+
return total_loss
|
221 |
+
|
222 |
+
def _grad_loss(
|
223 |
+
self,
|
224 |
+
generated_image,
|
225 |
+
style_target,
|
226 |
+
content_target,
|
227 |
+
style_weight,
|
228 |
+
content_weight,
|
229 |
+
var_weight,
|
230 |
+
):
|
231 |
+
"""
|
232 |
+
Calculates the gradients of the loss function with respect to the generated image
|
233 |
+
|
234 |
+
generated_image:
|
235 |
+
the generated image
|
236 |
+
|
237 |
+
"""
|
238 |
+
|
239 |
+
with tf.GradientTape() as tape:
|
240 |
+
style_features = self.get_features(generated_image, type="style")
|
241 |
+
content_features = self.get_features(generated_image, type="content")
|
242 |
+
loss = self._style_content_loss(
|
243 |
+
style_target,
|
244 |
+
style_features,
|
245 |
+
content_target,
|
246 |
+
content_features,
|
247 |
+
style_weight,
|
248 |
+
content_weight,
|
249 |
+
)
|
250 |
+
|
251 |
+
loss += var_weight*tf.image.total_variation(generated_image)
|
252 |
+
grads = tape.gradient(loss, generated_image)
|
253 |
+
return grads, loss
|
254 |
+
|
255 |
+
def _update_image_with_style(
|
256 |
+
self,
|
257 |
+
generated_image,
|
258 |
+
style_target,
|
259 |
+
content_target,
|
260 |
+
style_weight,
|
261 |
+
content_weight,
|
262 |
+
optimizer,
|
263 |
+
var_weight,
|
264 |
+
):
|
265 |
+
grads, loss = self._grad_loss(
|
266 |
+
generated_image, style_target, content_target, style_weight, content_weight, var_weight
|
267 |
+
)
|
268 |
+
|
269 |
+
optimizer.apply_gradients([(grads, generated_image)])
|
270 |
+
|
271 |
+
generated_image.assign(
|
272 |
+
tf.clip_by_value(generated_image, clip_value_min=0.0, clip_value_max=255.0)
|
273 |
+
)
|
274 |
+
return loss
|
275 |
+
|
276 |
+
def fit_style_transfer(
|
277 |
+
self,
|
278 |
+
epochs=10,
|
279 |
+
learning_rate=80,
|
280 |
+
steps_per_epoch=100,
|
281 |
+
style_weight=1e-2,
|
282 |
+
content_weight=1e-4,
|
283 |
+
show_interval=10,
|
284 |
+
var_weight=0.0,
|
285 |
+
):
|
286 |
+
"""
|
287 |
+
epochs:
|
288 |
+
the number of epochs to train the model for
|
289 |
+
|
290 |
+
learning_rate:
|
291 |
+
the initial learning rate of the optimizer (default: 80)
|
292 |
+
|
293 |
+
steps_per_epoch:
|
294 |
+
the number of steps to train the model for per epoch
|
295 |
+
|
296 |
+
style_weight:
|
297 |
+
the weight of the style loss
|
298 |
+
|
299 |
+
content_weight:
|
300 |
+
the weight of the content loss
|
301 |
+
|
302 |
+
show_image:
|
303 |
+
whether to save the generated image after each epoch
|
304 |
+
|
305 |
+
show_interval:
|
306 |
+
the interval at which to save the generated image
|
307 |
+
|
308 |
+
var_weight:
|
309 |
+
the weight of the total variation loss
|
310 |
+
|
311 |
+
"""
|
312 |
+
|
313 |
+
style_image = self.style_image
|
314 |
+
content_image = self.content_image
|
315 |
+
|
316 |
+
content_and_style_layers = self.get_output_layers()
|
317 |
+
|
318 |
+
# build the model with the layers we need to extract the features from
|
319 |
+
K.clear_session()
|
320 |
+
self.build(content_and_style_layers)
|
321 |
+
|
322 |
+
style_features = self.get_features(style_image, type="style")
|
323 |
+
content_features = self.get_features(content_image, type="content")
|
324 |
+
|
325 |
+
optimizer = tf.optimizers.Adam(
|
326 |
+
tf.keras.optimizers.schedules.ExponentialDecay(
|
327 |
+
initial_learning_rate=learning_rate, decay_steps=100, decay_rate=0.80
|
328 |
+
)
|
329 |
+
)
|
330 |
+
|
331 |
+
generated_image = tf.cast(content_image, tf.float32)
|
332 |
+
generated_image = tf.Variable(generated_image)
|
333 |
+
|
334 |
+
step = 0
|
335 |
+
images = []
|
336 |
+
|
337 |
+
img = None
|
338 |
+
|
339 |
+
for epoch in range(epochs):
|
340 |
+
for step in range(steps_per_epoch):
|
341 |
+
loss = self._update_image_with_style(
|
342 |
+
generated_image,
|
343 |
+
style_features,
|
344 |
+
content_features,
|
345 |
+
style_weight,
|
346 |
+
content_weight,
|
347 |
+
optimizer,
|
348 |
+
var_weight,
|
349 |
+
)
|
350 |
+
|
351 |
+
display_image = self.tensor_to_image(generated_image)
|
352 |
+
|
353 |
+
|
354 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
tensorflow-cpu
|
2 |
+
gradio
|