2020年10月16号更新:

应该是由于判别器本身的网络结构导致的(应证了猜想1)。

在训练ACGAN时,梯度也出现了问题(warning),我将判别器由卷积层换成了全连接层,再次运行就能正常训练了,详见这篇文章:

什么是SGAN

在原始GAN架构的基础上,将判别器的二分类(真实样本or生成样本)改为多分类(共N+1,N classes+fake),便得到了SGAN( Semi-Supervised GAN) 。

此时,分类器(同时也是判别器)的表现更好,并且生成器生成的图片的质量也更高。

如何实现SGAN

生成器模型无需改动,需要改动的是判别模型以及后续的训练过程

导入相关函数

1
2
3
4
5
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

准备数据

1
(train_images,train_labels),(_,_)=tf.keras.datasets.mnist.load_data()
1
train_images.shape
(60000, 28, 28)
1
train_images.dtype
dtype('uint8')
1
train_images=train_images.reshape(train_images.shape[0],28,28,1).astype('float32')
1
train_images.shape
(60000, 28, 28, 1)
1
train_images.dtype
dtype('float32')
1
train_images=(train_images-127.5)/127.1#归一化
1
train_labels.shape
(60000,)
1
2
BATCH_SIZE=256
BUFFER_SIZE=60000
1
datasets=tf.data.Dataset.from_tensor_slices((train_images,train_labels))
1
datasets
<TensorSliceDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.uint8)>
1
datasets=datasets.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
1
datasets
<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.uint8)>

搭建生成器和判别器网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class Generator_model(tf.keras.Model):
def __init__(self):
super().__init__()

self.dense=tf.keras.layers.Dense(7*7*256,use_bias=False)
self.bn1=tf.keras.layers.BatchNormalization()
self.leakyrelu1=tf.keras.layers.LeakyReLU()

self.reshape=tf.keras.layers.Reshape((7,7,256))

self.convT1=tf.keras.layers.Conv2DTranspose(128,(5,5),strides=(1,1),padding='same',use_bias=False)
self.bn2=tf.keras.layers.BatchNormalization()
self.leakyrelu2=tf.keras.layers.LeakyReLU()

self.convT2=tf.keras.layers.Conv2DTranspose(64,(5,5),strides=(2,2),padding='same',use_bias=False)
self.bn3=tf.keras.layers.BatchNormalization()
self.leakyrelu3=tf.keras.layers.LeakyReLU()

self.convT3=tf.keras.layers.Conv2DTranspose(1,(5,5),strides=(2,2),padding='same',use_bias=False,activation='tanh')

def call(self,inputs,training=True):
x=self.dense(inputs)
x=self.bn1(x,training)
x=self.leakyrelu1(x)

x=self.reshape(x)

x=self.convT1(x)
x=self.bn2(x,training)
x=self.leakyrelu2(x)

x=self.convT2(x)
x=self.bn3(x,training)
x=self.leakyrelu3(x)

x=self.convT3(x)

return x
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class Discriminator_model(tf.keras.Model):
def __init__(self,num_classes):
self.num_classes=num_classes
super().__init__()

self.conv1=tf.keras.layers.Conv2D(64,(5,5),strides=(2,2),padding='same')
self.leakyrelu1=tf.keras.layers.LeakyReLU()
self.dropout1=tf.keras.layers.Dropout(0.3)

self.conv2=tf.keras.layers.Conv2D(128,(5,5),strides=(2,2),padding='same')
self.leakyrelu2=tf.keras.layers.LeakyReLU()
self.dropout2=tf.keras.layers.Dropout(0.3)

self.flatten=tf.keras.layers.Flatten()

#真实图片还是生成图片,二分类
self.dense_valid=tf.keras.layers.Dense(1,activation='sigmoid')

#分类器
self.dense_label=tf.keras.layers.Dense(self.num_classes+1)
self.softmax=tf.keras.layers.Softmax()

def call(self,inputs,training=True):
x=self.conv1(inputs)
x=self.leakyrelu1(x)
x=self.dropout1(x,training)

x=self.conv2(inputs)
x=self.leakyrelu2(x)
x=self.dropout2(x,training)

features=self.flatten(x)#特征

valid=self.dense_valid(features)#真or假

label=self.dense_label(features)
label=self.softmax(label)#属于哪一类

return valid,label#真实图片还是生成图片:valid;属于哪一类(n_classes,fake):label

定义损失函数

1
2
cross_entropy=tf.keras.losses.BinaryCrossentropy(from_logits=True)
categorical_cross_entropy=tf.keras.losses.CategoricalCrossentropy()#接受one_hot形式
1
2
3
4
5
def discriminator_loss(real_out,fake_out,real_pred_labels,fake_pred_labels,to_categorical_real_labels,to_categorical_fake_labels):
#判别损失+分类损失
real_loss=cross_entropy(tf.ones_like(real_out),real_out)+categorical_cross_entropy(real_pred_labels,to_categorical_real_labels)
fake_loss=cross_entropy(tf.zeros_like(fake_out),fake_out)+categorical_cross_entropy(fake_pred_labels,to_categorical_fake_labels)
return real_loss+fake_loss
1
2
3
def generator_loss(fake_out):
fake_loss=cross_entropy(tf.ones_like(fake_out),fake_out)
return fake_loss

定义优化器

1
2
generator_opt=tf.keras.optimizers.Adam(1e-4)
discriminator_opt=tf.keras.optimizers.Adam(1e-4)

设置超参数,实例化生成器和判别器

1
2
3
4
5
6
7
EPOCHS=100
noise_dim=100
num_examples_to_generate=16
seed=tf.random.normal([num_examples_to_generate,noise_dim])

generator=Generator_model()
discriminator=Discriminator_model(num_classes=10)

定义每个batch的训练过程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def train_step(images_one_batch,one_batch_labels):
noise=tf.random.normal([images_one_batch.shape[0],noise_dim])#noise=seed
with tf.GradientTape() as gen_tape,tf.GradientTape() as disc_tape:
real_out,real_pred_labels=discriminator(images_one_batch,training=True)#真实图片送入判别器之后得到的预测标签,预测类别

gen_image=generator(noise,training=True)
fake_out,fake_pred_labels=discriminator(gen_image,training=True)#生成的假图片送入判别器之后得到的预测标签,预测类别

to_categorical_real_labels=to_categorical(one_batch_labels,num_classes=10+1)
to_categorical_fake_labels=to_categorical(np.full((images_one_batch.shape[0],1),10),num_classes=10+1)#类别10代表假,真实样本的类别为0-9

#分别计算两者的损失
gen_loss=generator_loss(fake_out)
disc_loss=discriminator_loss(real_out,fake_out,real_pred_labels,fake_pred_labels,to_categorical_real_labels,to_categorical_fake_labels)

#求可训练参数的梯度
gradient_gen=gen_tape.gradient(gen_loss,generator.trainable_variables)
gradient_disc=disc_tape.gradient(disc_loss,discriminator.trainable_variables)

#使用优化器更新可训练参数的权值
generator_opt.apply_gradients(zip(gradient_gen,generator.trainable_variables))
discriminator_opt.apply_gradients(zip(gradient_disc,discriminator.trainable_variables))

定义生成图片的展示函数

1
2
3
4
5
6
7
8
9
#将test_noise送入gen_model,以产生假图片
def generate_plot_image(gen_model,test_noise):
pre_images=gen_model(test_noise,training=False)#此时无需训练生成器网络
fig=plt.figure(figsize=(4,4))
for i in range(pre_images.shape[0]):
plt.subplot(4,4,i+1)
plt.imshow((pre_images[i,:,:,0]+1)/2,cmap='gray')
plt.axis('off')
plt.show()

定义训练函数

1
2
3
4
5
def train(dataset,epochs):
for epoch in range(epochs):
for image_batch,one_batch_labels in dataset:
train_step(image_batch,one_batch_labels)
generate_plot_image(generator,seed)

开始训练

1
train(datasets,EPOCHS)

训练结果

一直报warning

1
WARNING:tensorflow:Gradients do not exist for variables ['discriminator_model/conv2d/kernel:0', 'discriminator_model/conv2d/bias:0'] when minimizing the loss.

之前使用继承自Model类的方式实现的DCGAN的代码时,如果判别器discriminator结构中的Dropout不设置training=True,也会报这个warning

问题是现在的SGAN设置了Dropouttraing=True也会报错

网上说只对某次迭代中梯度不为None的参数做梯度更新可以避免这个warning,试了下确实是这样,只需将discriminator_opt.apply_gradients(zip(gradient_disc,discriminator.trainable_variables))改为

1
2
3
4
5
discriminator_opt.apply_gradients([
(grad,var)
for (grad,var) in zip(gradient_disc,discriminator.trainable_variables)
if grad is not None
])

但是生成的图片是惨不忍睹的

不理想原因瞎猜

这个问题有待后续再了解了,先这样。