Skip this worksheet and proceed to the next one!

This one contains some imports and auxiliary code (e.g. color correction and dithering) used by another worksheets.

import numpy as np
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from PIL import Image, ImageChops
import ipywidgets as widgets
color_params = {
    'grayscale_matrix': [0.3, 0.3, 0.3, 0.0, 0.3, 0.3, 0.3, 0.0, 0.3, 0.3, 0.3, 0.0],
    'red_component': [0, 0.11568627450980393, 0.3254901960784314, 0.23137254901960785, 0.4196078431372549,
                      0.027450980392156862, 1],
    'green_component': [0, 0.4588235294117647, 0.6294117647058823, 0.9176470588235294, 0.3411764705882353,
                        0.7647058823529411, 1],
    'blue_component': [0, 0.19215686274509805, 0.692156862745098, 0.3843137254901961, 1, 0.9490196078431372, 1],
    'color_matrix': [1.3935000000000002, -0.35750000000000004, -0.03599999999999999, 0,
                     -0.10650000000000001, 1.1425, -0.03599999999999999, 0,
                     -0.10650000000000001, -0.35750000000000004, 1.4640000000000002, 0]
}
def component_transfer(grayscale: np.ndarray, component: list) -> np.ndarray:
    n = len(component) - 1

    def precompute_transfer(x):
        if x == 1:
            return component[-1]
        k = int(x * n)
        x_new = component[k] + (x - k / n) * n * (component[k + 1] - component[k])
        return round(x_new * 255)

    transfer_lookup = {x: precompute_transfer(x / 255) for x in range(256)}
    gradientmap = np.vectorize(transfer_lookup.get)(grayscale)
    return gradientmap
def recolor_image(source_img: Image) -> Image:
    grayscale = source_img.convert('RGB', color_params['grayscale_matrix'])
    grayscale_arr = np.array(grayscale)
    gradientmap_arr = np.zeros(grayscale_arr.shape, dtype=np.uint)
    gradientmap_arr[:, :, 0] = component_transfer(grayscale_arr[:, :, 0], color_params['red_component'])
    gradientmap_arr[:, :, 1] = component_transfer(grayscale_arr[:, :, 1], color_params['green_component'])
    gradientmap_arr[:, :, 2] = component_transfer(grayscale_arr[:, :, 2], color_params['blue_component'])
    gradientmap = Image.fromarray(gradientmap_arr.astype(np.uint8))
    blended = ImageChops.multiply(gradientmap, grayscale)
    result_img = blended.convert('RGB', color_params['color_matrix'])
    return result_img
# Some images could contain gradient bandings. Let's add a grain effect:
def dither_image(img: np.ndarray, noise_factor: float = 1):
    img += (np.random.random(img.shape) - 0.5) * (noise_factor / 256)
    img[img < 0] = 0
    img[img > 1] = 1
    return img
# Finally, let's write a method for inferencing the network:
def render_image(model, input_space, width=1920, height=1080):
    pred = model.predict(input_space, batch_size=65536)
    # Normalizing the predictions
    pred_normed = (pred - pred.min(0)) / (pred.ptp(0) + 1e-10)
    img_arr = pred_normed.reshape(height, width, 3)
    img = dither_image(img_arr, noise_factor=4)
    img_arr = (img_arr * 255).astype(np.uint8)
    img = Image.fromarray(img_arr)
    img = recolor_image(img)
    return img
# Now let's define a simple model building method
def build_network_demo(width=4, depth=4, variance=400, seed=42):
    if not (width > 0 and depth > 0 and variance > 0):
        raise ValueError
    tf.random.set_seed(seed)
    input_shape = (5,)  # number of parameters in input space defined above
    initializer = keras.initializers.VarianceScaling(scale=variance,
                                                     mode='fan_in',
                                                     distribution='normal',
                                                     seed=seed)
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(depth):
        layer = keras.layers.Dense(width, kernel_initializer=initializer, activation='tanh')
        layer_output = layer(x)
        x = keras.layers.Concatenate()([x, layer_output])
    bottleneck_initializer = keras.initializers.GlorotNormal(seed)
    bottleneck = keras.layers.Dense(3,  # The number of channels in RGB image
                                    activation='tanh',
                                    kernel_initializer=bottleneck_initializer)(x)
    model = keras.Model(inputs=inputs, outputs=bottleneck)
    return model
def build_input_space_demo(x_resolution=1920, y_resolution=1080, alpha=0.5, beta=0.5, mask=None) -> np.ndarray:
    x_pos = np.linspace(-1, 1, x_resolution)
    y_pos = np.linspace(-1, 1, y_resolution)
    x_pos, y_pos = np.meshgrid(x_pos, y_pos)
    # Now when we have x_pos and y_pos, let's compute example of F:
    f = np.sqrt(x_pos ** 2 + y_pos ** 2)
    # Result is reshaped so it will be more convenient to concatenate later
    x_pos = x_pos.reshape(-1, 1)
    y_pos = y_pos.reshape(-1, 1)
    f = f.reshape(-1, 1)
    alpha_filled = np.full((x_pos.shape[0], 1), alpha)
    beta_filled = np.full((x_pos.shape[0], 1), beta)
    input_space = np.concatenate(np.array((x_pos, y_pos, alpha_filled, beta_filled, f)), axis=1)
    if mask is not None:
        input_space[:, 2:-1] *= (0.5-mask)*3
    return input_space
alpha_slider = widgets.FloatSlider(
    value=0.5,
    min=-1,
    max=1,
    step=0.05,
    description='alpha:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    # readout_format='d'
)
beta_slider = widgets.FloatSlider(
    value=0.5,
    min=-1,
    max=1,
    step=0.05,
    description='beta:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    # readout_format='d'
)
variance_slider = widgets.IntSlider(
    value=400,
    min=1,
    max=1000,
    step=1,
    description='variance:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
)
width_slider = widgets.IntSlider(
    value=4,
    min=2,
    max=16,
    step=1,
    description='width:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
depth_slider = widgets.IntSlider(
    value=4,
    min=2,
    max=16,
    step=1,
    description='depth:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
seed_slider = widgets.IntSlider(
    value=42,
    min=0,
    max=10000,
    step=1,
    description='seed:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

Technical fundamentals

Our Art Generator is based on simple feedforward neural networks. Each feedforward network consists of multiple layers of neurons. Each neuron takes some inputs, computes a weighted sum of them, applies some activation function to this sum, and passes the result as output.

Therefore, our images are parametrized by the weights (i.e. values) of the weights of the neural network — such approach commonly known as Compositional Pattern Producing Network (CPPN). Usually, CPPNs map the position of a pixel to its color:

$$F(x, y) \rightarrow (r, g, b)$$

In that case, image is evolving via changing the CPPN itself. However, this is quite a long process - and it doesn't allow us to perform GPU-optimized batch inference - so we introduced a set of additional virtual input parameters:

$$F(x, y, a, b, f) \rightarrow (r, g, b)$$

Now we could manipulate the image without changing the neural network itself! It allows us to do some awesome things as generating videos, enforcing some visual patterns across the image, and many more.

Image rendering

The images produced by our generator are essentially landscapes of (fairly simple) feed-forward neural networks' mapping functions! Let's play around with parameter sliders at the bottom, then run the cell below them to see the result!

widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space_demo(alpha=alpha_slider.value, beta=beta_slider.value)
demo_model = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value)
demo_image = render_image(demo_model, demo_input_space)
plt.figure(figsize=(19, 10))
plt.imshow(demo_image)
plt.axis('off')
plt.show()
32/32 [==============================] - 1s 18ms/step

What is the input space?

There are 5 float inputs corresponding to each pixel in the image:

  • x_pos - this input is linearly proportional to the X coordinate of the current pixel on the image (exact values depend on other image generation parameters that are out of the scope of this demo).
  • y_pos - the same as x_pos but corresponds to the Y-axis.
  • alpha - this input has a constant value across all the pixels in the image and is used by the animation engine.
  • beta - the same as for alpha.
  • f - this one allows us to apply some general patterns to images by mapping some functions to x_pos and y_pos $f = F(x\_pos, y\_pos)$. For example, $f = \sqrt{x\_pos^{2} + y\_pos^{2}}$ produces some circular-shaped patterns.
# Now let's fill x_pos and y_pos with [-1, 1] values
x_pos = np.linspace(-1, 1, 1920)
y_pos = np.linspace(-1, 1, 1080)
# Now let's combine them into a x_resolution-by-y_resolution-sized grid
# In our particular case that means that we simply repeat each column and row 
# by x_resolution and y_resolution times correspondingly
x_pos, y_pos = np.meshgrid(x_pos, y_pos)
# Now when we have x_pos and y_pos, let's compute example of F:
f = np.sqrt(x_pos ** 2 + y_pos ** 2)
fig, ax = plt.subplots(1, 3)
fig.set_size_inches(20, 15)
ax[0].imshow(x_pos)
ax[1].imshow(y_pos)
ax[2].imshow(f)
plt.axis('off')
plt.show()
# Result is reshaped so it will be more convenient to concatenate later
x_pos = x_pos.reshape(-1, 1)
y_pos = y_pos.reshape(-1, 1)
f = f.reshape(-1, 1)
# Finally, we need to repeat our alpha and beta
alpha_filled = np.full((x_pos.shape[0], 1), alpha_slider.value)
beta_filled = np.full((x_pos.shape[0], 1), beta_slider.value)
input_space = np.concatenate(np.array((x_pos, y_pos, alpha_filled, beta_filled, f)), axis=1)
# Our resulting array is flattened across its dimensions 
# so it would be more convenient to perform batch inference in the future
print(input_space.shape)
(2073600, 5)

After combining pieces together, we'll get the following function:

def build_input_space(width=1920, height=1080, alpha=0.5, beta=0.5) -> np.ndarray:
    x_pos = np.linspace(-1, 1, width)
    y_pos = np.linspace(-1, 1, height)
    x_pos, y_pos = np.meshgrid(x_pos, y_pos)

    # Now when we have x_pos and y_pos, let's compute example of F:
    ### DEFAULT OPTION ###
    f = np.sqrt(x_pos ** 2 + y_pos ** 2)
    ### PLAIN SUM ###
    # f = x_pos + y_pos
    ### VERTICAL SQUEEZE ###
    # f = np.sqrt(4 * x_pos ** 2 + y_pos ** 2)
    ### HORIZONTAL SQUEEZE ###
    # f = np.sqrt(x_pos ** 2 + 2 * y_pos ** 2)
    ### SUM OF MODULOS ###
    # f = np.abs(x_pos) + np.abs(y_pos)

    # Result is reshaped so it will be more convenient to concatenate later
    x_pos = x_pos.reshape(-1, 1)
    y_pos = y_pos.reshape(-1, 1)
    f = f.reshape(-1, 1)
    alpha_filled = np.full((x_pos.shape[0], 1), alpha)
    beta_filled = np.full((x_pos.shape[0], 1), beta)
    input_space = np.concatenate(np.array((x_pos, y_pos, alpha_filled, beta_filled, f)), axis=1)
    return input_space

Manipulating the input space

Let's try to change our pattern function F in the cell above and see what happens!

widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space(alpha=alpha_slider.value, beta=beta_slider.value)
demo_model = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value)
demo_image = render_image(demo_model, demo_input_space)
plt.figure(figsize=(19, 10))
plt.imshow(demo_image)
plt.axis('off')
plt.show()
32/32 [==============================] - 1s 16ms/step

Defining neural network architecture

# Now let's define a simple model building method
def build_network(width=4, depth=4, variance=400, seed=42):
    if not (width > 0 and depth > 0 and variance > 0):
        raise ValueError
    tf.random.set_seed(seed)
    input_shape = (5,)  # number of parameters in input space defined above
    initializer = keras.initializers.VarianceScaling(scale=variance,
                                                     mode='fan_in',
                                                     distribution='normal',
                                                     seed=seed)
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for layer_idx in range(depth):
        layer = keras.layers.Dense(width, kernel_initializer=initializer, activation='tanh')
        layer_output = layer(x)

        ### DENSENET ###
        # x = keras.layers.Concatenate()([x, layer_output])
        ### STANDARD CONNECTIVITY ###
        x = layer(x)
        ### RESNET CONNECTIVITY ###
        # if layer_idx == 0:
        #     x = keras.layers.Dense(width, kernel_initializer=initializer, activation='tanh')(x)
        # x = keras.layers.Add()([x, layer_output])
        # x = keras.layers.Activation('tanh')(x)
        ### VERY DENSE DENSENET :D ###
        # x = layer(x)
        # x = keras.layers.Concatenate()([x, layer_output])

    bottleneck_initializer = keras.initializers.GlorotNormal(seed)
    bottleneck = keras.layers.Dense(3,  # The number of channels in RGB image
                                    activation='tanh',
                                    kernel_initializer=bottleneck_initializer)(x)
    model = keras.Model(inputs=inputs, outputs=bottleneck)
    return model


sample_network = build_network()
sample_network.summary()
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 5)] 0

dense_10 (Dense) (None, 4) 24

dense_11 (Dense) (None, 4) 20

dense_12 (Dense) (None, 4) 20

dense_13 (Dense) (None, 4) 20

dense_14 (Dense) (None, 3) 15

=================================================================
Total params: 99
Trainable params: 99
Non-trainable params: 0
_________________________________________________________________

Try the network you've defined just above:

widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space(alpha=alpha_slider.value, beta=beta_slider.value)
sample_network = build_network(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value)
demo_image = render_image(sample_network, demo_input_space)
plt.figure(figsize=(19, 10))
plt.imshow(demo_image)
plt.axis('off')
plt.show()
32/32 [==============================] - 0s 5ms/step

Image masks

Additionally, alpha and beta could be augmented with auxiliary "mask" images. Let's download some examples:

import urllib

urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/1.png',
                           'mask_6.png')
urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/mask.png',
                           'mask_5.png')
urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/dl_old_logo.jpg',
                           'mask_4.png')
urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/bill3.png',
                           'mask_2.png')
urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/goose.png',
                           'mask_1.png')
urllib.request.urlretrieve('https://datalore-samples.s3-eu-west-1.amazonaws.com/workshop/homer2.jpg',
                           'mask_3.png')
('mask_3.png', <http.client.HTTPMessage at 0x7f53366c4280>)
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
mask_img = Image.open('mask_5.png')
x_resolution = mask_img.width
y_resolution = mask_img.height
mask_arr = (np.array(mask_img.convert('L'))).reshape([-1, 1]) / 255

# replace to build_input_space(...) to use custom-defined input space
demo_input_space = build_input_space_demo(x_resolution=x_resolution, y_resolution=y_resolution,
                                          alpha=alpha_slider.value,
                                          beta=beta_slider.value, mask=mask_arr)
                                          
sample_network = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value,
                                    variance=variance_slider.value)
demo_image = render_image(sample_network, demo_input_space, width=x_resolution, height=y_resolution, )
fig, ax = plt.subplots(1, 2)
fig.set_size_inches(20, 20)
ax[0].imshow(mask_img)
ax[1].imshow(demo_image)
ax[0].axis('off')
ax[1].axis('off')
plt.show()
39/39 [==============================] - 0s 8ms/step

Video generation

Previously we used constant alpha and beta values. Now it's time to animate them!

import math
def animate_alpha_beta(init_alpha=0.5, init_beta = 0.5):
    radius = (init_alpha ** 2 + init_beta ** 2) ** 0.5
    angle = math.atan2(init_beta, init_alpha)
    num_frames = 10
    alphas = [radius * math.cos(angle + i * math.pi / num_frames * 2) for i in range(num_frames)]
    betas = [radius * math.sin(angle + i * math.pi / num_frames * 2) for i in range(num_frames)]
    return alphas, betas

What we did here is essentially generated a circle starting at [alpha; beta]. Now let's try to generate a few images which parameters follow this circle:

widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
sample_alphas, sample_betas = animate_alpha_beta(init_alpha=alpha_slider.value, init_beta=beta_slider.value)
fig, ax = plt.subplots(figsize=(8,8))
ax.plot(sample_alphas, sample_betas)
ax.plot(sample_alphas[0], sample_betas[0], 'ro')
ax.set_aspect('equal')
ax.grid(True, which='both')
ax.spines['left'].set_position('zero')
ax.spines['right'].set_color('none')
ax.yaxis.tick_left()
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.xaxis.tick_bottom()
plt.show()
for a, b in zip(sample_alphas, sample_betas):
    # replace with build_network(...) to use your previously defined custom architecture
    sample_network = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value)
    
    # replace with build_input_space(...) to use your previously defined custom method 
    demo_input_space = build_input_space_demo(alpha=a, beta=b)
    demo_image = render_image(sample_network, demo_input_space)

    # or enable the mask:
    # demo_input_space = build_input_space_demo(x_resolution=x_resolution, y_resolution=y_resolution, alpha=a, beta=b, mask=mask_arr)
    # demo_image = render_image(sample_network, demo_input_space, x_resolution=x_resolution, y_resolution=y_resolution)
    
    plt.figure(figsize=(19, 10))
    plt.imshow(demo_image)
    plt.axis('off')
    plt.show()
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 7ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 8ms/step
32/32 [==============================] - 0s 7ms/step
widgets.VBox([widgets.HBox([alpha_slider, beta_slider]), widgets.HBox([width_slider, depth_slider]), widgets.HBox([variance_slider, seed_slider])])
demo_input_space = build_input_space(alpha=alpha_slider.value, beta=beta_slider.value, width=40, height=40)
demo_model = build_network_demo(width=width_slider.value, depth=depth_slider.value, seed=seed_slider.value, variance=variance_slider.value)
demo_image = render_image(demo_model, demo_input_space, width=40, height=40)
plt.figure(figsize=(19, 10))
plt.imshow(demo_image)
plt.axis('off')
plt.show()
1/1 [==============================] - 0s 49ms/step
x_pos = np.arange(-5, 5, 0.25)
y_pos = np.arange(-5, 5, 0.25)
x_pos, y_pos = np.meshgrid(x_pos, y_pos)
z = np.array(demo_image.convert('L'), dtype=np.float32)
z /= z.max()

colortuple = ('y', 'g')
colors = np.empty(x_pos.shape, dtype=str)
for y in range(len(y_pos)):
    for x in range(len(x_pos)):
        colors[x, y] = colortuple[(x + y) % len(colortuple)]

# Plot the surface.
ax = plt.figure(figsize=(10, 10)).add_subplot(projection='3d')
surf = ax.plot_surface(x_pos, y_pos, z, facecolors=colors, linewidth=0, antialiased=True)

plt.show()

Thank you for your attention!

Further reading: