Skip this worksheet and proceed to the next one!
This one contains some imports and auxiliary code (e.g. color correction and dithering) used by another worksheets.
import numpy as np import tensorflow as tf from tensorflow import keras from matplotlib import pyplot as plt from PIL import Image, ImageChops import ipywidgets as widgets
color_params = { 'grayscale_matrix': [0.3, 0.3, 0.3, 0.0, 0.3, 0.3, 0.3, 0.0, 0.3, 0.3, 0.3, 0.0], 'red_component': [0, 0.11568627450980393, 0.3254901960784314, 0.23137254901960785, 0.4196078431372549, 0.027450980392156862, 1], 'green_component': [0, 0.4588235294117647, 0.6294117647058823, 0.9176470588235294, 0.3411764705882353, 0.7647058823529411, 1], 'blue_component': [0, 0.19215686274509805, 0.692156862745098, 0.3843137254901961, 1, 0.9490196078431372, 1], 'color_matrix': [1.3935000000000002, -0.35750000000000004, -0.03599999999999999, 0, -0.10650000000000001, 1.1425, -0.03599999999999999, 0, -0.10650000000000001, -0.35750000000000004, 1.4640000000000002, 0] }
def component_transfer(grayscale: np.ndarray, component: list) -> np.ndarray: n = len(component) - 1 def precompute_transfer(x): if x == 1: return component[-1] k = int(x * n) x_new = component[k] + (x - k / n) * n * (component[k + 1] - component[k]) return round(x_new * 255) transfer_lookup = {x: precompute_transfer(x / 255) for x in range(256)} gradientmap = np.vectorize(transfer_lookup.get)(grayscale) return gradientmap
def recolor_image(source_img: Image) -> Image: grayscale = source_img.convert('RGB', color_params['grayscale_matrix']) grayscale_arr = np.array(grayscale) gradientmap_arr = np.zeros(grayscale_arr.shape, dtype=np.uint) gradientmap_arr[:, :, 0] = component_transfer(grayscale_arr[:, :, 0], color_params['red_component']) gradientmap_arr[:, :, 1] = component_transfer(grayscale_arr[:, :, 1], color_params['green_component']) gradientmap_arr[:, :, 2] = component_transfer(grayscale_arr[:, :, 2], color_params['blue_component']) gradientmap = Image.fromarray(gradientmap_arr.astype(np.uint8)) blended = ImageChops.multiply(gradientmap, grayscale) result_img = blended.convert('RGB', color_params['color_matrix']) return result_img
# Some images could contain gradient bandings. Let's add a grain effect: def dither_image(img: np.ndarray, noise_factor: float = 1): img += (np.random.random(img.shape) - 0.5) * (noise_factor / 256) img[img < 0] = 0 img[img > 1] = 1 return img
# Finally, let's write a method for inferencing the network: def render_image(model, input_space, width=1920, height=1080): pred = model.predict(input_space, batch_size=65536) # Normalizing the predictions pred_normed = (pred - pred.min(0)) / (pred.ptp(0) + 1e-10) img_arr = pred_normed.reshape(height, width, 3) img = dither_image(img_arr, noise_factor=4) img_arr = (img_arr * 255).astype(np.uint8) img = Image.fromarray(img_arr) img = recolor_image(img) return img
# Now let's define a simple model building method def build_network_demo(width=4, depth=4, variance=400, seed=42): if not (width > 0 and depth > 0 and variance > 0): raise ValueError tf.random.set_seed(seed) input_shape = (5,) # number of parameters in input space defined above initializer = keras.initializers.VarianceScaling(scale=variance, mode='fan_in', distribution='normal', seed=seed) inputs = keras.Input(shape=input_shape) x = inputs for _ in range(depth): layer = keras.layers.Dense(width, kernel_initializer=initializer, activation='tanh') layer_output = layer(x) x = keras.layers.Concatenate()([x, layer_output]) bottleneck_initializer = keras.initializers.GlorotNormal(seed) bottleneck = keras.layers.Dense(3, # The number of channels in RGB image activation='tanh', kernel_initializer=bottleneck_initializer)(x) model = keras.Model(inputs=inputs, outputs=bottleneck) return model
def build_input_space_demo(x_resolution=1920, y_resolution=1080, alpha=0.5, beta=0.5, mask=None) -> np.ndarray: x_pos = np.linspace(-1, 1, x_resolution) y_pos = np.linspace(-1, 1, y_resolution) x_pos, y_pos = np.meshgrid(x_pos, y_pos) # Now when we have x_pos and y_pos, let's compute example of F: f = np.sqrt(x_pos ** 2 + y_pos ** 2) # Result is reshaped so it will be more convenient to concatenate later x_pos = x_pos.reshape(-1, 1) y_pos = y_pos.reshape(-1, 1) f = f.reshape(-1, 1) alpha_filled = np.full((x_pos.shape[0], 1), alpha) beta_filled = np.full((x_pos.shape[0], 1), beta) input_space = np.concatenate(np.array((x_pos, y_pos, alpha_filled, beta_filled, f)), axis=1) if mask is not None: input_space[:, 2:-1] *= (0.5-mask)*3 return input_space
alpha_slider = widgets.FloatSlider( value=0.5, min=-1, max=1, step=0.05, description='alpha:', disabled=False, continuous_update=False, orientation='horizontal', readout=True, # readout_format='d' ) beta_slider = widgets.FloatSlider( value=0.5, min=-1, max=1, step=0.05, description='beta:', disabled=False, continuous_update=True, orientation='horizontal', readout=True, # readout_format='d' ) variance_slider = widgets.IntSlider( value=400, min=1, max=1000, step=1, description='variance:', disabled=False, continuous_update=True, orientation='horizontal', readout=True, ) width_slider = widgets.IntSlider( value=4, min=2, max=16, step=1, description='width:', disabled=False, continuous_update=False, orientation='horizontal', readout=True, readout_format='d' ) depth_slider = widgets.IntSlider( value=4, min=2, max=16, step=1, description='depth:', disabled=False, continuous_update=False, orientation='horizontal', readout=True, readout_format='d' ) seed_slider = widgets.IntSlider( value=42, min=0, max=10000, step=1, description='seed:', disabled=False, continuous_update=False, orientation='horizontal', readout=True, readout_format='d' )
Technical fundamentals
Our Art Generator is based on simple feedforward neural networks. Each feedforward network consists of multiple layers of neurons. Each neuron takes some inputs, computes a weighted sum of them, applies some activation function to this sum, and passes the result as output.
data:image/s3,"s3://crabby-images/d6fe9/d6fe955d9fcffc3e3e33c33f46cad37661e2f8d4" alt=""
Therefore, our images are parametrized by the weights (i.e. values) of the weights of the neural network — such approach commonly known as Compositional Pattern Producing Network (CPPN). Usually, CPPNs map the position of a pixel to its color:
$$F(x, y) \rightarrow (r, g, b)$$
In that case, image is evolving via changing the CPPN itself. However, this is quite a long process - and it doesn't allow us to perform GPU-optimized batch inference - so we introduced a set of additional virtual input parameters:
$$F(x, y, a, b, f) \rightarrow (r, g, b)$$
Now we could manipulate the image without changing the neural network itself! It allows us to do some awesome things as generating videos, enforcing some visual patterns across the image, and many more.
Image rendering
The images produced by our generator are essentially landscapes of (fairly simple) feed-forward neural networks' mapping functions! Let's play around with parameter sliders at the bottom, then run the cell below them to see the result!
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space_demo(alpha=alpha_slider.value, beta=beta_slider.value) demo_model = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value) demo_image = render_image(demo_model, demo_input_space) plt.figure(figsize=(19, 10)) plt.imshow(demo_image) plt.axis('off') plt.show()
What is the input space?
There are 5 float inputs corresponding to each pixel in the image:
x_pos
- this input is linearly proportional to the X coordinate of the current pixel on the image (exact values depend on other image generation parameters that are out of the scope of this demo).y_pos
- the same asx_pos
but corresponds to the Y-axis.alpha
- this input has a constant value across all the pixels in the image and is used by the animation engine.beta
- the same as foralpha
.f
- this one allows us to apply some general patterns to images by mapping some functions tox_pos
andy_pos
$f = F(x\_pos, y\_pos)$. For example, $f = \sqrt{x\_pos^{2} + y\_pos^{2}}$ produces some circular-shaped patterns.
# Now let's fill x_pos and y_pos with [-1, 1] values x_pos = np.linspace(-1, 1, 1920) y_pos = np.linspace(-1, 1, 1080) # Now let's combine them into a x_resolution-by-y_resolution-sized grid # In our particular case that means that we simply repeat each column and row # by x_resolution and y_resolution times correspondingly x_pos, y_pos = np.meshgrid(x_pos, y_pos) # Now when we have x_pos and y_pos, let's compute example of F: f = np.sqrt(x_pos ** 2 + y_pos ** 2)
fig, ax = plt.subplots(1, 3) fig.set_size_inches(20, 15) ax[0].imshow(x_pos) ax[1].imshow(y_pos) ax[2].imshow(f) plt.axis('off') plt.show()
# Result is reshaped so it will be more convenient to concatenate later x_pos = x_pos.reshape(-1, 1) y_pos = y_pos.reshape(-1, 1) f = f.reshape(-1, 1) # Finally, we need to repeat our alpha and beta alpha_filled = np.full((x_pos.shape[0], 1), alpha_slider.value) beta_filled = np.full((x_pos.shape[0], 1), beta_slider.value) input_space = np.concatenate(np.array((x_pos, y_pos, alpha_filled, beta_filled, f)), axis=1) # Our resulting array is flattened across its dimensions # so it would be more convenient to perform batch inference in the future print(input_space.shape)
After combining pieces together, we'll get the following function:
def build_input_space(width=1920, height=1080, alpha=0.5, beta=0.5) -> np.ndarray: x_pos = np.linspace(-1, 1, width) y_pos = np.linspace(-1, 1, height) x_pos, y_pos = np.meshgrid(x_pos, y_pos) # Now when we have x_pos and y_pos, let's compute example of F: ### DEFAULT OPTION ### f = np.sqrt(x_pos ** 2 + y_pos ** 2) ### PLAIN SUM ### # f = x_pos + y_pos ### VERTICAL SQUEEZE ### # f = np.sqrt(4 * x_pos ** 2 + y_pos ** 2) ### HORIZONTAL SQUEEZE ### # f = np.sqrt(x_pos ** 2 + 2 * y_pos ** 2) ### SUM OF MODULOS ### # f = np.abs(x_pos) + np.abs(y_pos) # Result is reshaped so it will be more convenient to concatenate later x_pos = x_pos.reshape(-1, 1) y_pos = y_pos.reshape(-1, 1) f = f.reshape(-1, 1) alpha_filled = np.full((x_pos.shape[0], 1), alpha) beta_filled = np.full((x_pos.shape[0], 1), beta) input_space = np.concatenate(np.array((x_pos, y_pos, alpha_filled, beta_filled, f)), axis=1) return input_space
Manipulating the input space
Let's try to change our pattern function F in the cell above and see what happens!
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space(alpha=alpha_slider.value, beta=beta_slider.value) demo_model = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value) demo_image = render_image(demo_model, demo_input_space) plt.figure(figsize=(19, 10)) plt.imshow(demo_image) plt.axis('off') plt.show()
Defining neural network architecture
data:image/s3,"s3://crabby-images/57094/570948c46d381788e20d8e74a3d1407112eddb4e" alt=""
# Now let's define a simple model building method def build_network(width=4, depth=4, variance=400, seed=42): if not (width > 0 and depth > 0 and variance > 0): raise ValueError tf.random.set_seed(seed) input_shape = (5,) # number of parameters in input space defined above initializer = keras.initializers.VarianceScaling(scale=variance, mode='fan_in', distribution='normal', seed=seed) inputs = keras.Input(shape=input_shape) x = inputs for layer_idx in range(depth): layer = keras.layers.Dense(width, kernel_initializer=initializer, activation='tanh') layer_output = layer(x) ### DENSENET ### # x = keras.layers.Concatenate()([x, layer_output]) ### STANDARD CONNECTIVITY ### x = layer(x) ### RESNET CONNECTIVITY ### # if layer_idx == 0: # x = keras.layers.Dense(width, kernel_initializer=initializer, activation='tanh')(x) # x = keras.layers.Add()([x, layer_output]) # x = keras.layers.Activation('tanh')(x) ### VERY DENSE DENSENET :D ### # x = layer(x) # x = keras.layers.Concatenate()([x, layer_output]) bottleneck_initializer = keras.initializers.GlorotNormal(seed) bottleneck = keras.layers.Dense(3, # The number of channels in RGB image activation='tanh', kernel_initializer=bottleneck_initializer)(x) model = keras.Model(inputs=inputs, outputs=bottleneck) return model sample_network = build_network() sample_network.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 5)] 0
dense_10 (Dense) (None, 4) 24
dense_11 (Dense) (None, 4) 20
dense_12 (Dense) (None, 4) 20
dense_13 (Dense) (None, 4) 20
dense_14 (Dense) (None, 3) 15
=================================================================
Total params: 99
Trainable params: 99
Non-trainable params: 0
_________________________________________________________________
Try the network you've defined just above:
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space(alpha=alpha_slider.value, beta=beta_slider.value) sample_network = build_network(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value) demo_image = render_image(sample_network, demo_input_space) plt.figure(figsize=(19, 10)) plt.imshow(demo_image) plt.axis('off') plt.show()
Image masks
Additionally, alpha and beta could be augmented with auxiliary "mask" images. Let's download some examples:
import urllib urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/1.png', 'mask_6.png') urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/mask.png', 'mask_5.png') urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/dl_old_logo.jpg', 'mask_4.png') urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/bill3.png', 'mask_2.png') urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/goose.png', 'mask_1.png') urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/homer2.jpg', 'mask_3.png')
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
mask_img = Image.open('mask_5.png') x_resolution = mask_img.width y_resolution = mask_img.height mask_arr = (np.array(mask_img.convert('L'))).reshape([-1, 1]) / 255 # replace to build_input_space(...) to use custom-defined input space demo_input_space = build_input_space_demo(x_resolution=x_resolution, y_resolution=y_resolution, alpha=alpha_slider.value, beta=beta_slider.value, mask=mask_arr) sample_network = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value) demo_image = render_image(sample_network, demo_input_space, width=x_resolution, height=y_resolution, ) fig, ax = plt.subplots(1, 2) fig.set_size_inches(20, 20) ax[0].imshow(mask_img) ax[1].imshow(demo_image) ax[0].axis('off') ax[1].axis('off') plt.show()
Video generation
Previously we used constant alpha
and beta
values. Now it's time to animate them!
import math def animate_alpha_beta(init_alpha=0.5, init_beta = 0.5): radius = (init_alpha ** 2 + init_beta ** 2) ** 0.5 angle = math.atan2(init_beta, init_alpha) num_frames = 10 alphas = [radius * math.cos(angle + i * math.pi / num_frames * 2) for i in range(num_frames)] betas = [radius * math.sin(angle + i * math.pi / num_frames * 2) for i in range(num_frames)] return alphas, betas
What we did here is essentially generated a circle starting at [alpha; beta]. Now let's try to generate a few images which parameters follow this circle:
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
sample_alphas, sample_betas = animate_alpha_beta(init_alpha=alpha_slider.value, init_beta=beta_slider.value) fig, ax = plt.subplots(figsize=(8,8)) ax.plot(sample_alphas, sample_betas) ax.plot(sample_alphas[0], sample_betas[0], 'ro') ax.set_aspect('equal') ax.grid(True, which='both') ax.spines['left'].set_position('zero') ax.spines['right'].set_color('none') ax.yaxis.tick_left() ax.spines['bottom'].set_position('zero') ax.spines['top'].set_color('none') ax.xaxis.tick_bottom() plt.show()
for a, b in zip(sample_alphas, sample_betas): # replace with build_network(...) to use your previously defined custom architecture sample_network = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value) # replace with build_input_space(...) to use your previously defined custom method demo_input_space = build_input_space_demo(alpha=a, beta=b) demo_image = render_image(sample_network, demo_input_space) # or enable the mask: # demo_input_space = build_input_space_demo(x_resolution=x_resolution, y_resolution=y_resolution, alpha=a, beta=b, mask=mask_arr) # demo_image = render_image(sample_network, demo_input_space, x_resolution=x_resolution, y_resolution=y_resolution) plt.figure(figsize=(19, 10)) plt.imshow(demo_image) plt.axis('off') plt.show()
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 7ms/step
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space(alpha=alpha_slider.value, beta=beta_slider.value, width=40, height=40) demo_model = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value) demo_image = render_image(demo_model, demo_input_space, width=40, height=40) plt.figure(figsize=(19, 10)) plt.imshow(demo_image) plt.axis('off') plt.show()
x_pos = np.arange(-5, 5, 0.25) y_pos = np.arange(-5, 5, 0.25) x_pos, y_pos = np.meshgrid(x_pos, y_pos) z = np.array(demo_image.convert('L'), dtype=np.float32) z /= z.max() colortuple = ('y', 'g') colors = np.empty(x_pos.shape, dtype=str) for y in range(len(y_pos)): for x in range(len(x_pos)): colors[x, y] = colortuple[(x + y) % len(colortuple)] # Plot the surface. ax = plt.figure(figsize=(10, 10)).add_subplot(projection='3d') surf = ax.plot_surface(x_pos, y_pos, z, facecolors=colors, linewidth=0, antialiased=True) plt.show()
data:image/s3,"s3://crabby-images/ad5ab/ad5abe8096c6410b58b001c5defa3538697b0f20" alt=""
data:image/s3,"s3://crabby-images/840e6/840e637a04fe369489eb7f27ae14797f9296f68b" alt=""
Thank you for your attention!
Further reading:
- Adversarial Generation of Continuous Images https://arxiv.org/pdf/2011.12026.pdf
- Fourier features https://bmild.github.io/fourfeat/
- Neural Radiance Fields https://www.matthewtancik.com/nerf