yshr10ic’s Blog

備忘録

TensorFlowを使用したCNNの実装

TensorFlow Deep Learning

前回の記事では、TensorFlowを使用して、MNISTを解くニューラルネットワーク（Neural Network：NN）を実装をしました。

yshr10ic.hatenablog.com

今回は、畳み込みニューラルネットワーク（Convolutional Neural Network：CNN）を用いて、MNISTを解いていきたいと思います。

CNNの具体的な説明に関しては、こちらの記事が非常に分かりやすかったです。

TensorFlowでの実装

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# input data/output data
X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

# input image
# バッチサイズ、高さ、横幅、チャネル数
img = tf.reshape(X, [-1, 28, 28, 1])

# convolution layer 1
with tf.name_scope('conv1'):
    # フィルタの縦、横、チャネル数、フィルタの枚数
    conv1_f = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
    # strides: バッチ方向、縦方向、横方向、チャネル方向にどれだけ移動させるか
    conv1_c = tf.nn.conv2d(img, conv1_f, strides=[1, 1, 1, 1], padding='SAME')
    conv1_b = tf.Variable(tf.constant(0.1, shape=[32]))
    conv1_o = tf.nn.relu(conv1_c + conv1_b)

# pool layer 1
with tf.name_scope('pool1'):
    # ksize: バッチ方向、縦方向、横方向、チャネル方向のどれだけの大きさでプーリングするか
    pool1_o = tf.nn.max_pool(conv1_o, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# convolution layer 2
with tf.name_scope('conv2'):
    conv2_f = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1))
    conv2_c = tf.nn.conv2d(pool1_o, conv2_f, strides=[1, 1, 1, 1], padding='SAME')
    conv2_b = tf.Variable(tf.constant(0.1, shape=[64]))
    conv2_o = tf.nn.relu(conv2_c + conv2_b)

# pool layer 2
with tf.name_scope('pool2'):
    # ksize: バッチ方向、縦方向、横方向、チャネル方向のどれだけの大きさでプーリングするか
    pool2_o = tf.nn.max_pool(conv2_o, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# flatten layer
with tf.name_scope('flatten'):
    flatten_o = tf.reshape(pool2_o, [-1, 7 * 7 * 64])

# fully connected layer
with tf.name_scope('fully_connected'):
    fully_connected_w = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=0.1))
    fully_connected_b = tf.Variable(tf.constant(0.1, shape=[1024]))
    fully_connected_o = tf.nn.relu(tf.matmul(flatten_o, fully_connected_w) + fully_connected_b)

# output layer
with tf.name_scope('output'):
    output_w = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
    output_b = tf.Variable(tf.constant(0.1, shape=[10]))
    output_o = tf.nn.softmax(tf.matmul(fully_connected_o, output_w) + output_b)

# loss function(cross entropy)
with tf.name_scope('loss'):
    loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(output_o + 1e-5), axis=[1]))

# training
with tf.name_scope('train'):
    train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

# evaluation
with tf.name_scope('accuracy'):
    correct = tf.equal(tf.argmax(output_o, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# init
init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)

    # test data
    test_images = mnist.test.images
    test_labels = mnist.test.labels

    for i in range(1000):
        train_images, train_labels = mnist.train.next_batch(50)
        sess.run(train_step, feed_dict={X: train_images, y: train_labels})

        if (i + 1) % 100 == 0:
            accuracy_val = sess.run(accuracy, feed_dict={X: test_images, y: test_labels})
            print('Step %4d: accuracy = %.2f' % (i + 1, accuracy_val))

    saver.save(sess, 'models/mnist_cnn_model', write_meta_graph=False)

前回のNNの実装では、accuracyは0.90前後でしたが、今回のCNNでは0.96前後まで上がっています！ただし、層を増やしている分、計算量も増えてしまっています。

今後に向けて

TensorFlowを用いてCNNを実装しました。今後やっていきたいことは↓です。

パディングやストライドを変えたときの精度の変化を確認する
kerasを用いてCNNを実装する