生成对抗神经网络
生成对抗神经网络
对抗生成网络,其实就像周伯通,左手打右手,不用陪练,自己就能练成绝世武功!
刚参加工作时,顶头上级是跟我年纪相仿的年轻人,据说家里有点关系。
有一天交给我一个任务,让我写个部门工作总结,我这刚参加工作,哪会写这个啊?他说没关系,写完给他看,他帮我。
![学习对抗神经网络 学习对抗神经网络](http://www.sinaimg.cn/uc/myshow/blog/misc/gif/E___6742EN00SIGG.gif)
![学习对抗神经网络 学习对抗神经网络](http://www.sinaimg.cn/uc/myshow/blog/misc/gif/E___6759EN00SIGG.gif)
接下来的一段时间就反复往他办公室跑,每次写完交上去,第二天拿回来时都能给我指出点问题,提出点修改意见,反反复复半个月。好在最终还是写出来了,而且获得了上级的肯定!
多年后,一次闲聊,说起当年写总结。他说那是他也刚参加工作,他也不会写
![学习对抗神经网络 学习对抗神经网络](http://www.sinaimg.cn/uc/myshow/blog/misc/gif/E___6690EN00SIGG.gif)
以上故事是现编的。这里的“我”就是 生成器(generator),我领导就是 判别器(discriminator),往年的《工作总结》就是数据集。判别器看了往年的《工作总结》,知道什么好,什么不好,再看我写的,心里就大概有数了,我写的哪里不好,然后提出改进方向。这个过程中,“我“在学习,“领导“其实也在学习。
下面是模仿老师的程序生成的玫瑰,1000个epoch,可以看出效果差很多,可能是因为老师为了能让我们都能跑起来,不敢搞大计算量,设计的cnn太简单。
改过的代码(win7,python3.5,tensorflow1.1.0):
import numpy as np
import tensorflow as tf
import pickle
import matplotlib.pyplot as plt
import os os.environ["CUDA_VISIBLE_DEVICES"]="0"#gpu参与运算,没有gpu设到"-1" %matplotlib inline
获得数据
def get_inputs(noise_dim, image_height, image_width, image_depth): inputs_real = tf.placeholder(tf.float32, [None, image_height, image_width, image_depth], name='inputs_real') inputs_noise = tf.placeholder(tf.float32, [None, noise_dim], name='inputs_noise') return inputs_real, inputs_noise
生成器
def get_generator(noise_img, output_dim, is_train=True, alpha=0.01): # ouput_width = (input_width-filter_width+2*padding)/stride+1 #in=out*(stride+1)+filter-2*padding #生成器和判别器的卷积得重新设计!! with tf.variable_scope("generator", reuse=(not is_train)): # 100 x 1 to 4 x 4 x 512 # 全连接层 layer1 = tf.layers.dense(noise_img, 4*4*512) layer1 = tf.reshape(layer1, [-1, 4, 4, 512]) # batch normalization layer1 = tf.layers.batch_normalization(layer1, training=is_train) # Leaky ReLU layer1 = tf.maximum(alpha * layer1, layer1) # dropout layer1 = tf.nn.dropout(layer1, keep_prob=0.8) # 4 x 4 x 512 to 8 layer2 = tf.layers.conv2d_transpose(layer1, 1024, 3, strides=2, padding='same') layer2 = tf.layers.batch_normalization(layer2, training=is_train) layer2 = tf.maximum(alpha * layer2, layer2) layer2 = tf.nn.dropout(layer2, keep_prob=0.8) #8to16 layer21 = tf.layers.conv2d_transpose(layer2, 512, 3, strides=2, padding='same') layer21 = tf.layers.batch_normalization(layer21, training=is_train) layer21 = tf.maximum(alpha * layer21, layer21) layer21 = tf.nn.dropout(layer21, keep_prob=0.8) #16to 32 layer22 = tf.layers.conv2d_transpose(layer21, 256, 3, strides=2, padding='same') layer22 = tf.layers.batch_normalization(layer22, training=is_train) layer22 = tf.maximum(alpha * layer22, layer22) layer22 = tf.nn.dropout(layer22, keep_prob=0.8) # 32 x 32x 256 to 64 x 64 x 64 layer3 = tf.layers.conv2d_transpose(layer22, 128, 3, strides=2, padding='same') layer3 = tf.layers.batch_normalization(layer3, training=is_train) layer3 = tf.maximum(alpha * layer3, layer3) layer3 = tf.nn.dropout(layer3, keep_prob=0.8) # 64 x 64 x 64 to 128 x 128 x 1 logits = tf.layers.conv2d_transpose(layer3, output_dim, 3, strides=2, padding='same') # MNIST原始数据集的像素范围在0-1,这里的生成图片范围为(-1,1) # 因此在训练时,记住要把MNIST像素范围进行resize #print("output_dim:::",output_dim) outputs = tf.tanh(logits) #print("Generator---layer1: ",layer1," | layer21",layer21," | layer22",layer22," | logits",logits) return outputs
判别器
def get_discriminator(inputs_img, reuse=False, alpha=0.01): with tf.variable_scope("discriminator", reuse=reuse): # 128 x 128 x 1 to 64 x 64 x 64 # 第一层不加入BN layer1 = tf.layers.conv2d(inputs_img, 64, 3, strides=2, padding='same') layer1 = tf.maximum(alpha * layer1, layer1) layer1 = tf.nn.dropout(layer1, keep_prob=0.8) # 64 x 64 x 64 to 32 x 32 x 128 layer2 = tf.layers.conv2d(layer1, 128, 3, strides=2, padding='same') layer2 = tf.layers.batch_normalization(layer2, training=True) layer2 = tf.maximum(alpha * layer2, layer2) layer2 = tf.nn.dropout(layer2, keep_prob=0.8) #32 to 16 layer21 = tf.layers.conv2d(layer2, 256, 3, strides=2, padding='same') layer21 = tf.layers.batch_normalization(layer21, training=True) layer21 = tf.maximum(alpha * layer21, layer21) layer21 = tf.nn.dropout(layer21, keep_prob=0.8) #16to8 layer22 = tf.layers.conv2d(layer21, 512, 3, strides=2, padding='same') layer22 = tf.layers.batch_normalization(layer22, training=True) layer21 = tf.maximum(alpha * layer22, layer22) layer21 = tf.nn.dropout(layer22, keep_prob=0.8) #8to4 # 32 x 32 x 128 to 4 x 4 x 512 layer3 = tf.layers.conv2d(layer22, 512, 3, strides=2, padding='same') layer3 = tf.layers.batch_normalization(layer3, training=True) layer3 = tf.maximum(alpha * layer3, layer3) layer3 = tf.nn.dropout(layer3, keep_prob=0.8) # 4 x 4 x 512 to 4*4*512 x 1 flatten = tf.reshape(layer3, (-1, 4*4*512)) logits = tf.layers.dense(flatten, 1) outputs = tf.sigmoid(logits) #print("Discriminator---input: ",inputs_img.shape,"layer1: ",layer1," | layer2",layer2," | layer3",layer3," | logits",logits) return logits, outputs
目标函数:
def get_loss(inputs_real, inputs_noise, image_depth, smooth=0.1): g_outputs = get_generator(inputs_noise, image_depth, is_train=True) d_logits_real, d_outputs_real = get_discriminator(inputs_real) d_logits_fake, d_outputs_fake = get_discriminator(g_outputs, reuse=True) # 计算Loss g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_outputs_fake)*(1-smooth))) d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real, labels=tf.ones_like(d_outputs_real)*(1-smooth))) d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_outputs_fake))) d_loss = tf.add(d_loss_real, d_loss_fake) return g_loss, d_loss
优化器
def get_optimizer(g_loss, d_loss, beta1=0.4, learning_rate=0.001): train_vars = tf.trainable_variables() g_vars = [var for var in train_vars if var.name.startswith("generator")] d_vars = [var for var in train_vars if var.name.startswith("discriminator")] # Optimizer with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): g_opt = tf.train.AdamOptimizer(learning_rate).minimize(g_loss, var_list=g_vars) d_opt = tf.train.AdamOptimizer(learning_rate).minimize(d_loss, var_list=d_vars) return g_opt, d_opt
def plot_images(samples): fig, axes = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True, figsize=(50,15))#如果多行,会认为axes是一个numpy for img, ax in zip(samples, axes): #print (img.shape,ax,samples.shape) #ax.imshow(img.reshape((28, 28)), cmap='Greys_r') ax.imshow(img.reshape((128, 128)), cmap='Greys_r')#可能是cnn计算有误,得不到240*240的 ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) fig.tight_layout(pad=0)
def show_generator_output(sess, n_images, inputs_noise, output_dim): cmap = 'Greys_r' noise_shape = inputs_noise.get_shape().as_list()[-1] # 生成噪声图片 examples_noise = np.random.uniform(-1, 1, size=[n_images, noise_shape]) samples = sess.run(get_generator(inputs_noise, output_dim, False), feed_dict={inputs_noise: examples_noise}) #print("show-output_dim:::",output_dim) result = np.squeeze(samples, -1) return result
定义参数
# 定义参数
batch_size = 32#设为64内存溢出,刚开始以为gpu原因,后来关gpu也溢出,所以减少baitch试试
noise_size = 100
epochs = 1000
n_samples = 3
beta1=0.4
learning_rate = 0.001
图片预处理
把所有图片放到一个目录里,供python 读取,转成黑白格式并存成数据集
图片需要统一到128*128大小
from PIL import Image
import os.path
import glob
images=[]
#获得数据,提取每一幅图片,变成灰度图,压缩到0,1之间 #1.获得图像,变成灰度,保存到一个??里(??格式需要先看看batch )print (batch[0].shape) ---->(64, 784) batch[0]是numpy类型
for jpgfile in glob.glob("E:\\lianhua\\*.jpg"): img=Image.open(jpgfile) img=img.convert('L') img=np.array(img) img=img.reshape(-1,1) images.append(img)
image=np.array(images)
print(image.shape)
image=image.reshape( -1,16384)#128*128 #2.压缩到0,1之间
image=image/255.0
batch
不使用mnist数据集,需要自己做一个get_batch方法
def getBatch(image,batch_size,steps): turns=image.shape[0]//batch_size#数据集 一共 有多少个batchs num=steps%turns#该取第几段了 #print(turns,num) return image[num:num+batch_size]
训练
def train(noise_size, data_shape, batch_size, n_samples): # 存储loss losses = [] steps = 0 inputs_real, inputs_noise = get_inputs(noise_size, data_shape[1], data_shape[2], data_shape[3]) g_loss, d_loss = get_loss(inputs_real, inputs_noise, data_shape[-1]) g_train_opt, d_train_opt = get_optimizer(g_loss, d_loss, beta1, learning_rate) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #try: # saver=tf.train.Saver() # saver.restore(sess,'./checkpoints/generator.ckpt') #except Exception as e: # print(e) # sess.run(tf.global_variables_initializer()) # 迭代epoch for e in range(epochs): #for batch_i in range(mnist.train.num_examples//batch_size): for batch_i in range(image.shape[0]//batch_size):# !!!!自己加的,替换上一句 image是全局变量 steps += 1 #batch = mnist.train.next_batch(batch_size) batch=getBatch(image,batch_size,steps)# !!!!自己加的,替换上一句 image是全局变量 #print(batch.shape) #batch_images = batch[0].reshape((batch_size, data_shape[1], data_shape[2], data_shape[3])) batch_images = batch.reshape((batch_size, data_shape[1], data_shape[2], data_shape[3]))#!!!!自己加的,替换上一句 image是全局变量 # scale to -1, 1 batch_images = batch_images * 2 - 1 # noise batch_noise = np.random.uniform(-1, 1, size=(batch_size, noise_size)) # run optimizer _ = sess.run(g_train_opt, feed_dict={inputs_real: batch_images, inputs_noise: batch_noise}) _ = sess.run(d_train_opt, feed_dict={inputs_real: batch_images, inputs_noise: batch_noise}) if steps % 200 == 0: train_loss_d = d_loss.eval({inputs_real: batch_images, inputs_noise: batch_noise}) train_loss_g = g_loss.eval({inputs_real: batch_images, inputs_noise: batch_noise}) losses.append((train_loss_d, train_loss_g)) # 显示图片 samples = show_generator_output(sess, n_samples, inputs_noise, data_shape[-1]) #print("data_shape[-1]:::",data_shape[-1]) #保存sess #w1=[2,2] train_vars = tf.trainable_variables() g_vars = [var for var in train_vars if var.name.startswith("generator")] #global saver saver=tf.train.Saver(var_list=g_vars) saver.save(sess,'./checkpoints/generator.ckpt') plot_images(samples) print("Epoch {}/{}....".format(e+1, epochs), "Discriminator Loss: {:.4f}....".format(train_loss_d), "Generator Loss: {:.4f}....". format(train_loss_g))
万事具备,可以开工了
with tf.Graph().as_default(): train(noise_size, [-1, 128, 128, 1], batch_size, n_samples)# !!!!将28改成128了
运行结果:
Epoch 12/1000…. Discriminator Loss: 0.3733…. Generator Loss: 5.5305….
Epoch 24/1000…. Discriminator Loss: 0.3808…. Generator Loss: 6.1425….
Epoch 36/1000…. Discriminator Loss: 0.3637…. Generator Loss: 5.7996….
Epoch 48/1000…. Discriminator Loss: 0.3698…. Generator Loss: 5.5932….
Epoch 59/1000…. Discriminator Loss: 0.3606…. Generator Loss: 5.5278….
总结:
生成对抗网络如下图所示,其中G网络的loss计算是通过D网络来表征的。
1. 对于G网络,希望其生成的图像A_out,经过D网络的“审判”之后,可以让D网络输出1,也就是真。
因此,G_loss = D_out_A与1之间的交叉熵
2. 对于D网络,可以看做是两个部分,第一个,有图像B输入网络,输出D_out_B,第二个是图像A_out输入,输出D_out_A。对于第一部分loss的计算是D_out_B与1之间的交叉熵,原因分析:图像B本来就是目标图像,图像A是噪音。第二部分,loss为D_out_A与0之间的交叉熵,原因分析:D网络就是认为图像A_out不是目标图像。
参考:https://blog.csdn.net/atyzy/article/details/77891589