Neural style transfer with Keras.

It is preferable to run this script on a GPU, for speed.

Style transfer consists in generating an image with the same “content” as a base image, but with the “style” of a different picture (typically artistic).

This is achieved through the optimization of a loss function that has 3 components: “style loss”, “content loss”, and “total variation loss”:

• The total variation loss imposes local spatial continuity between the pixels of the combination image, giving it visual coherence.

• The style loss is where the deep learning keeps in –that one is defined using a deep convolutional neural network. Precisely, it consists in a sum of L2 distances between the Gram matrices of the representations of the base image and the style reference image, extracted from different layers of a convnet (trained on ImageNet). The general idea is to capture color/texture information at different spatial scales (fairly large scales –defined by the depth of the layer considered).

• The content loss is a L2 distance between the features of the base image (extracted from a deep layer) and the features of the combination image, keeping the generated image close enough to the original one.

library(keras)
library(purrr)
library(R6)

# Parameters --------------------------------------------------------------

base_image_path <- "neural-style-base-img.png"
style_reference_image_path <- "neural-style-style.jpg"
iterations <- 10

# these are the weights of the different loss components
total_variation_weight <- 1
style_weight <- 1
content_weight <- 0.025

# dimensions of the generated picture.
width <- img$size[[1]] height <- img$size[[2]]
img_nrows <- 400
img_ncols <- as.integer(width * img_nrows / height)

# Functions ---------------------------------------------------------------

# util function to open, resize and format pictures into appropriate tensors
preprocess_image <- function(path){
img <- image_load(path, target_size = c(img_nrows, img_ncols)) %>%
image_to_array() %>%
array_reshape(c(1, dim(.)))
imagenet_preprocess_input(img)
}

# util function to convert a tensor into a valid image
# also turn BGR into RGB.
deprocess_image <- function(x){
x <- x[1,,,]
# Remove zero-center by mean pixel
x[,,1] <- x[,,1] + 103.939
x[,,2] <- x[,,2] + 116.779
x[,,3] <- x[,,3] + 123.68
# BGR -> RGB
x <- x[,,c(3,2,1)]
# clip to interval 0, 255
x[x > 255] <- 255
x[x < 0] <- 0
x[] <- as.integer(x)/255
x
}

# Defining the model ------------------------------------------------------

# get tensor representations of our images
base_image <- k_variable(preprocess_image(base_image_path))
style_reference_image <- k_variable(preprocess_image(style_reference_image_path))

# this will contain our generated image
combination_image <- k_placeholder(c(1, img_nrows, img_ncols, 3))

# combine the 3 images into a single Keras tensor
input_tensor <- k_concatenate(list(base_image, style_reference_image,
combination_image), axis = 1)

# build the VGG16 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
model <- application_vgg16(input_tensor = input_tensor, weights = "imagenet",
include_top = FALSE)

nms <- map_chr(model$layers, ~.x$name)
output_dict <- map(model$layers, ~.x$output) %>% set_names(nms)

# compute the neural style loss
# first we need to define 4 util functions

# the gram matrix of an image tensor (feature-wise outer product)

gram_matrix <- function(x){

features <- x %>%
k_permute_dimensions(pattern = c(3, 1, 2)) %>%
k_batch_flatten()

k_dot(features, k_transpose(features))
}

# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image

style_loss <- function(style, combination){
S <- gram_matrix(style)
C <- gram_matrix(combination)

channels <- 3
size <- img_nrows*img_ncols

k_sum(k_square(S - C)) / (4 * channels^2  * size^2)
}

# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image

content_loss <- function(base, combination){
k_sum(k_square(combination - base))
}

# the 3rd loss function, total variation loss,
# designed to keep the generated image locally coherent

total_variation_loss <- function(x){
y_ij  <- x[,1:(img_nrows - 1L), 1:(img_ncols - 1L),]
y_i1j <- x[,2:(img_nrows), 1:(img_ncols - 1L),]
y_ij1 <- x[,1:(img_nrows - 1L), 2:(img_ncols),]

a <- k_square(y_ij - y_i1j)
b <- k_square(y_ij - y_ij1)
k_sum(k_pow(a + b, 1.25))
}

# combine these loss functions into a single scalar
loss <- k_variable(0.0)
layer_features <- output_dict$block4_conv2 base_image_features <- layer_features[1,,,] combination_features <- layer_features[3,,,] loss <- loss + content_weight*content_loss(base_image_features, combination_features) feature_layers = c('block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1') for(layer_name in feature_layers){ layer_features <- output_dict[[layer_name]] style_reference_features <- layer_features[2,,,] combination_features <- layer_features[3,,,] sl <- style_loss(style_reference_features, combination_features) loss <- loss + ((style_weight / length(feature_layers)) * sl) } loss <- loss + (total_variation_weight * total_variation_loss(combination_image)) # get the gradients of the generated image wrt the loss grads <- k_gradients(loss, combination_image)[[1]] f_outputs <- k_function(list(combination_image), list(loss, grads)) eval_loss_and_grads <- function(image){ image <- array_reshape(image, c(1, img_nrows, img_ncols, 3)) outs <- f_outputs(list(image)) list( loss_value = outs[[1]], grad_values = array_reshape(outs[[2]], dim = length(outs[[2]])) ) } # Loss and gradients evaluator. # # This Evaluator class makes it possible # to compute loss and gradients in one pass # while retrieving them via two separate functions, # "loss" and "grads". This is done because scipy.optimize # requires separate functions for loss and gradients, # but computing them separately would be inefficient. Evaluator <- R6Class( "Evaluator", public = list( loss_value = NULL, grad_values = NULL, initialize = function() { self$loss_value <- NULL
self$grad_values <- NULL }, loss = function(x){ loss_and_grad <- eval_loss_and_grads(x) self$loss_value <- loss_and_grad$loss_value self$grad_values <- loss_and_grad$grad_values self$loss_value
},

grad_values <- self$grad_values self$loss_value <- NULL
self$grad_values <- NULL grad_values } ) ) evaluator <- Evaluator$new()

# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
dms <- c(1, img_nrows, img_ncols, 3)
x <- array(data = runif(prod(dms), min = 0, max = 255) - 128, dim = dms)

# Run optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the loss
for(i in 1:iterations){

# Run L-BFGS
opt <- optim(
array_reshape(x, dim = length(x)), fn = evaluator$loss, gr = evaluator$grads,
method = "L-BFGS-B",
control = list(maxit = 15)
)

# Print loss value
print(opt$value) # decode the image image <- x <- opt$par
image <- array_reshape(image, dms)

# plot
im <- deprocess_image(image)
plot(as.raster(im))
}