夜间模式暗黑模式
字体
阴影
滤镜
圆角
主题色
TensorFlow实践-MNIST与各种优化方式

MNIST数据集

MNIST是有名的手写识别数据集, 包含60000张图片作为训练数据, 10000张图片作为测试数据, 每张图片代表一个0-9的数字, 大小为28×28

下载方式

TensorFlow提供了一个类来处理MNIST数据, 可以自动下载和转化格式:

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("C:/MyFile/MNIST_data", one_hot=True)

在国内网络也可以用其他工具下载:

训练神经网络

从简单的神经网络开始, 一点点添加优化

无隐藏层

最简单的神经网络, 只有输入和输出层, 使用了交叉熵, relu激活函数和Softmax, 优化函数是GradientDescentOptimizer

最终准确率在0.92

# 0.92
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("C:/MyFile/MNIST_data", one_hot=True)

import tensorflow as tf

# 训练数据
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])

# 权重, 偏置项
w = tf.Variable(tf.random_normal([784, 10], stddev=0.1))
b = tf.Variable(tf.constant(0.1, tf.float32, [10]))

y = tf.nn.relu(tf.matmul(x, w)+b) 

loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits= y, labels= tf.argmax(y_, 1)
)
)

train_step = tf.train.GradientDescentOptimizer(0.3).minimize(loss)

correct_prediction = tf.equal(
    tf.argmax(y, 1),
    tf.argmax(y_, 1)
)
accuracy = tf.reduce_mean(tf.cast(
    correct_prediction, tf.float32
))

import numpy as np
import matplotlib.pyplot as plt

ix = []
iy = []

batch_size = 100
import numpy as np
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(10000):
        xs, ys = mnist.train.next_batch(batch_size)
        sess.run(train_step, feed_dict={x:xs, y_:ys})
        if (i+1)%batch_size == 0 :
            print("%d steps: " % (i+1))
            acc = sess.run(accuracy, feed_dict={
                x:mnist.test.images,
                y_:mnist.test.labels
            })
            print(acc)
            ix.append(i+1)
            iy.append(acc)

plt.figure(figsize=(8, 4))
plt.plot(ix, iy, "b--", linewidth=1)
plt.show()
plt.savefig("C:/MyFile/mnist1.png")

with open('C:/MyFile/mnist/1.txt', 'w') as f:
    f.write("%d\n"%len(ix))
    for i in ix:
        f.write("%d\n"%(i))
    for i in iy:
        f.write("%f\n"%(i))


准确率随训练轮数变化折线图:

多层感知机(一个隐藏层)

添加了一个500节点的隐藏层, 准确率最终稳定在0.98, 进步很大

# 0.978

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("C:/MyFile/MNIST_data", one_hot=True)

import tensorflow as tf

INPUT_NODE = 784
OUTPUT_NODE = 10

LAYER1_NODE = 500
BATCH_SIZE = 100

def inference(input_tensor, w1, b1, w2, b2):
    layer1 = tf.nn.relu(tf.matmul(input_tensor, w1) + b1)
    return tf.matmul(layer1, w2) + b2


# 训练数据
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])



# 权重, 偏置项
w1 = tf.Variable(tf.random_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
b1 = tf.Variable(tf.constant(0.01, tf.float32, [LAYER1_NODE]))

w2 = tf.Variable(tf.random_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
b2 = tf.Variable(tf.constant(0.01, tf.float32, [OUTPUT_NODE]))

y = inference(x, w1, b1, w2, b2)

loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits= y, labels= tf.argmax(y_, 1)
)
)


train_step = tf.train.GradientDescentOptimizer(0.3).minimize(loss)

correct_prediction = tf.equal(
    tf.argmax(y, 1),
    tf.argmax(y_, 1)
)
accuracy = tf.reduce_mean(tf.cast(
    correct_prediction, tf.float32
))


import numpy as np
import matplotlib.pyplot as plt

ix = []
iy = []

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(10000):
        xs, ys = mnist.train.next_batch(BATCH_SIZE)
        sess.run(train_step, feed_dict={x:xs, y_:ys})
        if (i+1)%BATCH_SIZE == 0 :
            print("%d steps: " % (i+1))
            acc = sess.run(accuracy, feed_dict={
                x:mnist.test.images,
                y_:mnist.test.labels
            })
            print(acc)
            ix.append(i+1)
            iy.append(acc)

plt.figure(figsize=(8, 4))
plt.plot(ix, iy, "b--", linewidth=1)
plt.show()
plt.savefig("C:/MyFile/mnist2.png")

with open('C:/MyFile/mnist/2.txt', 'w') as f:
    f.write("%d\n"%len(ix))
    for i in ix:
        f.write("%d\n"%(i))
    for i in iy:
        f.write("%f\n"%(i))

准确率随训练轮数变化折线图:

使用了指数衰减的学习率

准确率0.9815

# 0.978

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("C:/MyFile/MNIST_data", one_hot=True)

import tensorflow as tf

INPUT_NODE = 784
OUTPUT_NODE = 10

LAYER1_NODE = 500
BATCH_SIZE = 100

def inference(input_tensor, w1, b1, w2, b2):
    layer1 = tf.nn.relu(tf.matmul(input_tensor, w1) + b1)
    return tf.matmul(layer1, w2) + b2


# 训练数据
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])



# 权重, 偏置项
w1 = tf.Variable(tf.random_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
b1 = tf.Variable(tf.constant(0.01, tf.float32, [LAYER1_NODE]))

w2 = tf.Variable(tf.random_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
b2 = tf.Variable(tf.constant(0.01, tf.float32, [OUTPUT_NODE]))

y = inference(x, w1, b1, w2, b2)

loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits= y, labels= tf.argmax(y_, 1)
)
)

# 指数衰减的学习率
global_step = tf.Variable(tf.constant(0), trainable=False)
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99

learning_rate = tf.train.exponential_decay(
    learning_rate=LEARNING_RATE_BASE,
    global_step=global_step,
    decay_steps=mnist.train.num_examples / BATCH_SIZE,
    decay_rate=LEARNING_RATE_DECAY,
    staircase=True
)

train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

correct_prediction = tf.equal(
    tf.argmax(y, 1),
    tf.argmax(y_, 1)
)
accuracy = tf.reduce_mean(tf.cast(
    correct_prediction, tf.float32
))


import numpy as np
import matplotlib.pyplot as plt

ix = []
iy = []

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(10000):
        xs, ys = mnist.train.next_batch(BATCH_SIZE)
        sess.run(train_step, feed_dict={x:xs, y_:ys})
        if (i+1)%BATCH_SIZE == 0 :
            print("stp: %d" % (sess.run(global_step)))
            print("%d steps: " % i)
            acc = sess.run(accuracy, feed_dict={
                x:mnist.test.images,
                y_:mnist.test.labels
            })
            print(acc)
            ix.append(i+1)
            iy.append(acc)

plt.figure(figsize=(8, 4))
plt.plot(ix, iy, "b--", linewidth=1)
plt.show()
plt.savefig("C:/MyFile/mnist3.png")

with open('C:/MyFile/mnist/3.txt', 'w') as f:
    f.write("%d\n"%len(ix))
    for i in ix:
        f.write("%d\n"%(i))
    for i in iy:
        f.write("%f\n"%(i))

使用了L2正则化

准确率0.9835

# 0.9831
# 0.001 0.976

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("C:/MyFile/MNIST_data", one_hot=True)

import tensorflow as tf

INPUT_NODE = 784
OUTPUT_NODE = 10

LAYER1_NODE = 500
BATCH_SIZE = 100

def inference(input_tensor, w1, b1, w2, b2):
    layer1 = tf.nn.relu(tf.matmul(input_tensor, w1) + b1)
    return tf.matmul(layer1, w2) + b2


# 训练数据
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])



# 权重, 偏置项
w1 = tf.Variable(tf.random_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
b1 = tf.Variable(tf.constant(0.01, tf.float32, [LAYER1_NODE]))

w2 = tf.Variable(tf.random_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
b2 = tf.Variable(tf.constant(0.01, tf.float32, [OUTPUT_NODE]))

y = inference(x, w1, b1, w2, b2)

# l2正则化
regularizer = tf.contrib.layers.l2_regularizer(0.0001)

loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits= y, labels= tf.argmax(y_, 1))) + regularizer(w1) + regularizer(w2)

# 指数衰减的学习率
global_step = tf.Variable(tf.constant(0), trainable=False)
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99

learning_rate = tf.train.exponential_decay(
    learning_rate=LEARNING_RATE_BASE,
    global_step=global_step,
    decay_steps=mnist.train.num_examples / BATCH_SIZE,
    decay_rate=LEARNING_RATE_DECAY,
    staircase=True
)

train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

correct_prediction = tf.equal(
    tf.argmax(y, 1),
    tf.argmax(y_, 1)
)
accuracy = tf.reduce_mean(tf.cast(
    correct_prediction, tf.float32
))


import numpy as np
import matplotlib.pyplot as plt

ix = []
iy = []

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(10000):
        xs, ys = mnist.train.next_batch(BATCH_SIZE)
        sess.run(train_step, feed_dict={x:xs, y_:ys})
        if (i+1)%BATCH_SIZE == 0 :
            print("stp: %d" % (sess.run(global_step)))
            print("%d steps: " % i)
            acc = sess.run(accuracy, feed_dict={
                x:mnist.test.images,
                y_:mnist.test.labels
            })
            print(acc)
            ix.append(i+1)
            iy.append(acc)

plt.figure(figsize=(8, 4))
plt.plot(ix, iy, "b--", linewidth=1)
plt.show()
plt.savefig("C:/MyFile/mnist4.png")

with open('C:/MyFile/mnist/4.txt', 'w') as f:
    f.write("%d\n"%len(ix))
    for i in ix:
        f.write("%d\n"%(i))
    for i in iy:
        f.write("%f\n"%(i))

使用了滑动平均

准确率0.9837

# 0.9837

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("C:/MyFile/MNIST_data", one_hot=True)

import tensorflow as tf

INPUT_NODE = 784
OUTPUT_NODE = 10

LAYER1_NODE = 500
BATCH_SIZE = 100

# 使用滑动平均的前向传播函数
# 当avg_class为None时, 使用原来的值
# 不为None, 使用滑动平均值
# 因为训练过程仍要使用原本的值, 而测试时需要使用滑动平均值
def inference(input_tensor, avg_class, w1, b1, w2, b2):
    if avg_class ==None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, w1) + b1)
        return tf.matmul(layer1, w2) + b2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(w1)) + avg_class.average(b1))
        return tf.matmul(layer1, avg_class.average(w2)) + avg_class.average(b2)


# 训练数据
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])

# 权重, 偏置项
w1 = tf.Variable(tf.random_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
b1 = tf.Variable(tf.constant(0.01, tf.float32, [LAYER1_NODE]))

w2 = tf.Variable(tf.random_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
b2 = tf.Variable(tf.constant(0.01, tf.float32, [OUTPUT_NODE]))


y = inference(x, None, w1, b1, w2, b2)
# 指数衰减的学习率
global_step = tf.Variable(tf.constant(0), trainable=False)

LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99

learning_rate = tf.train.exponential_decay(
    learning_rate=LEARNING_RATE_BASE,
    global_step=global_step,
    decay_steps=mnist.train.num_examples / BATCH_SIZE,
    decay_rate=LEARNING_RATE_DECAY,
    staircase=True
)

# 滑动平均优化
MOVING_AVERAGE_DECAY = 0.99
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variable_averages_op = variable_averages.apply(tf.trainable_variables())

average_y = inference(x, variable_averages, w1, b1, w2, b2)


# l2正则化
regularizer = tf.contrib.layers.l2_regularizer(0.0001)

loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits= y, labels= tf.argmax(y_, 1))) + regularizer(w1) + regularizer(w2)

train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

# 每次训练后都要更新滑动平均值, 所以将它们放在一起
train_op = tf.group(train_step, variable_averages_op)

correct_prediction = tf.equal(
    tf.argmax(average_y, 1),
    tf.argmax(y_, 1)
)
accuracy = tf.reduce_mean(tf.cast(
    correct_prediction, tf.float32
))


import numpy as np
import matplotlib.pyplot as plt

ix = []
iy = []

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(10000):
        xs, ys = mnist.train.next_batch(BATCH_SIZE)
        sess.run(train_op, feed_dict={x:xs, y_:ys})
        if (i+1)%BATCH_SIZE == 0 :
            print("steps: %d" % (sess.run(global_step)))
            acc = sess.run(accuracy, feed_dict={
                x:mnist.test.images,
                y_:mnist.test.labels
            })
            print(acc)
            ix.append(i+1)
            iy.append(acc)

plt.figure(figsize=(8, 4))
plt.plot(ix, iy, "b--", linewidth=1)
plt.show()
plt.savefig("C:/MyFile/mnist5.png")

with open('C:/MyFile/mnist/5.txt', 'w') as f:
    f.write("%d\n"%len(ix))
    for i in ix:
        f.write("%d\n"%(i))
    for i in iy:
        f.write("%f\n"%(i))

总结

添加一个隐藏层能大幅度提高准确率, 而其他优化方法也能有一定的提升作用(对准确率和速度), 但不是那么明显

补充一个定理:

只要隐层节点数量足够多, 单隐层神经网络即可任意逼近闭集上的任意连续函数( Universal approximation theorem)

暂无评论

发送评论


				
上一篇
下一篇