python实现图像风格转化-人工智能图像处理

什么是图像风格转换？如上图可见，一键变成艺术大师。所谓图片风格迁移，是指利用程序算法学习著名画作的风格，然后再把这种风格应用到另外一张图片上的技术。基本原理如下图可见：我们知道，卷积神经网络(CNN)具有很强的图像特征(feature/representation)提取能力，如上图所示。对于内容图片，深层网络(d和e)提取的是高维特征，同时也丢弃了细节信息；浅层网络(a, b和c)提...

大龙剑神

2645人浏览 · 2020-04-04 17:41:14

大龙剑神 · 2020-04-04 17:41:14 发布

在这里插入图片描述
什么是图像风格转换？如上图可见，一键变成艺术大师。

所谓图片风格迁移，是指利用程序算法学习著名画作的风格，然后再把这种风格应用到另外一张图片上的技术。

基本原理

如下图可见：
在这里插入图片描述
我们知道，卷积神经网络(CNN)具有很强的图像特征(feature/representation)提取能力，如上图所示。

对于内容图片，深层网络(d和e)提取的是高维特征，同时也丢弃了细节信息；浅层网络(a, b和c)提取的是低维特征，图片的细节大多都保留下来了。

对于风格图片，通过包含多层的特征相关性(Gram矩阵)，可获得多尺度图像风格的重构，捕获其纹理信息。这样构建的网络可以忽略图像的具体细节，保留风格。

为了将内容图片和风格图片融合在一起(见下图)，我们应该使风格化结果图(初始为一张白噪声图片)的特征同时与内容图片和风格图片的特征之间的距离最小化，最终获取我们所需的风格化结果图。
在这里插入图片描述
代码：


# coding = utf-8

import tensorflow as tf
import numpy as np

slim = tf.contrib.slim


def arg_scope(weight_decay=0.0005):
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.conv2d_transpose],
                        activation_fn=None,
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        biases_initializer=tf.zeros_initializer()):
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='SAME') as arg_sc:
            return arg_sc


def img_scale(x, scale):
    weight = x.get_shape()[1].value
    height = x.get_shape()[2].value

    try:
        out = tf.image.resize_nearest_neighbor(x, size=(weight*scale, height*scale))
    except:
        out = tf.image.resize_images(x, size=[weight*scale, height*scale])
    return out

# net = slim.conv2d(net, 4096, [1, 1], scope='fc7')

def res_module(x, outchannel, name):
    with tf.variable_scope(name_or_scope=name):
        out1 = slim.conv2d(x, outchannel, [3, 3], stride=1, scope='conv1')
        out1 = relu(out1)
        out2 = slim.conv2d(out1, outchannel, [3, 3], stride=1, scope='conv2')
        out2 = relu(out2)

        return x+out2

def instance_norm(x):
    epsilon = 1e-9

    mean, var = tf.nn.moments(x, [1, 2], keep_dims=True)

    return tf.div(tf.subtract(x, mean), tf.sqrt(tf.add(var, epsilon)))

def relu(x):
    return tf.nn.relu(x)

def gen_net(imgs, reuse, name, is_train=True):
    imgs_shape = tf.shape(imgs)
    imgs = tf.pad(imgs, [[0, 0], [10, 10], [10, 10], [0, 0]], mode='REFLECT')
    with tf.variable_scope(name, reuse=reuse) as vs:
        # encoder : three convs layers
        out1 = slim.conv2d(imgs, 32, [9, 9], scope='conv1')
        out1 = relu(instance_norm(out1))

        out2 = slim.conv2d(out1, 64, [3, 3], stride=2, scope='conv2')
        out2 = instance_norm(out2)
        # out2 = relu(img_scale(out2, 0.5))

        out2 = slim.conv2d(out2, 128, [3, 3], stride=2, scope='conv3')
        out2 = instance_norm(out2)
        # out2 = relu(img_scale(out2, 0.5))

        # transform
        out3 = res_module(out2, 128, name='residual1')
        out3 = res_module(out3, 128, name='residual2')
        out3 = res_module(out3, 128, name='residual3')
        out3 = res_module(out3, 128, name='residual4')
        # decoder
        out4 = img_scale(out3, 2)
        out4 = slim.conv2d(out4, 64, [3, 3], stride=1, scope='conv4')
        out4 = relu(instance_norm(out4))
        # out4 = img_scale(out4, 128)

        out4 = img_scale(out4, 2)
        out4 = slim.conv2d(out4, 32, [3, 3], stride=1, scope='conv5')
        out4 = relu(instance_norm(out4))
        # out4 = img_scale(out4, 256)

        out = slim.conv2d(out4, 3, [9, 9], scope='conv6')
        out = tf.nn.tanh(instance_norm(out))

        variables = tf.contrib.framework.get_variables(vs)

        out = (out + 1) * 127.5

        height = out.get_shape()[1].value  # if is_train else tf.shape(out)[0]
        width = out.get_shape()[2].value  # if is_train else tf.shape(out)[1]

        # out = tf.slice(out, [0, 10, 10, 0], tf.stack([-1, height - 20, width - 20, -1]))
        out = tf.image.crop_to_bounding_box(out, 10, 10, height-20, width-20)
        # out = tf.reshape(out, imgs_shape)

    return out, variables


"""caculate the loss"""
import vgg_simple as vgg
import os


def styleloss(f1, f2, f3, f4):
    gen_f, _, style_f = tf.split(f1, 3, 0)
    size = tf.size(gen_f)
    style_loss = tf.nn.l2_loss(gram(gen_f) - gram(style_f))*2 / tf.to_float(size)

    gen_f, _, style_f = tf.split(f2, 3, 0)
    size = tf.size(gen_f)
    style_loss += tf.nn.l2_loss(gram(gen_f) - gram(style_f)) * 2 / tf.to_float(size)

    gen_f, _, style_f = tf.split(f3, 3, 0)
    size = tf.size(gen_f)
    style_loss += tf.nn.l2_loss(gram(gen_f) - gram(style_f)) * 2 / tf.to_float(size)

    gen_f, _, style_f = tf.split(f4, 3, 0)
    size = tf.size(gen_f)
    style_loss += tf.nn.l2_loss(gram(gen_f) - gram(style_f)) * 2 / tf.to_float(size)

    return style_loss

def gram(layer):
    shape = tf.shape(layer)
    num_images = shape[0]
    width = shape[1]
    height = shape[2]
    num_filters = shape[3]
    filters = tf.reshape(layer, tf.stack([num_images, -1, num_filters]))
    grams = tf.matmul(filters, filters, transpose_a=True) / tf.to_float(width * height * num_filters)

    return grams

if __name__ == '__main__':
    with tf.device('/cpu:0'):

        a = [[1., 2.], [3., 4.], [5, 6]]
        b, c, e = tf.split(a, 3, 0)

        with tf.Session() as sess:
            c, d, g = sess.run([b, c, e])

完整模型、训练和转化代码，关注公众号“人工智能实战及常见代码分享”，后台回复“风格迁移”获取
在这里插入图片描述