基于CNN的手写数字的识别TensorflowCNN图像分类

基于CNN的手写数字的识别【Tensorflow】【CNN】

前言

烦请大家先阅读我前两篇文章，可能更加理解本文的知识点，链接在下方：
深度学习-图像分类（CNN）-CSDN博客
基于CCN的LeNet网络CIFA-10数据集的模型训练（多分类数据集）【图片分类和预测】【Pytorch】-CSDN博客

模型网络的定义

定义一个卷积层
定义一个全连接层
定义一个全连接层(激活函数为SoftMax，令其最后的输出为10个，得到0-9的最终输出，选择权值最大的作为最终的预测)

from tensorflow.python import keras
from keras.layers import Dense,Flatten,Conv2D
from keras import Model
#主要是用Tensorflow的Keras模块实现
#补充：Keras模块内部的调用顺序是（batch，height，weight，channel）
class MyModel(Model):
    def __init__(self):
        #继承父类的相关的参数
        
        super(MyModel, self).__init__()
        #定义一个全连接层Conv2D(卷积核的大小，卷积核的长宽，激活函数使用relu函数)【filters，kernel_size】参数
        self.conv1 = Conv2D(32, 3, activation='relu')
        #将得到的26*26*32(因为使用了32个卷积核)，使用Flatten函数将其平展开来，方便使用全连接层
        self.flatten = Flatten()
        #使用全连接层将其输出实现，输出的个数为128，激活函数同样也是选取relu函数，当其为正数的时候relu函数便为其值，
        #不比sigmoid函数，当数据较大时梯度消失
        self.d1 = Dense(128, activation='relu')
        #使用Softmax函数对输出进行处理，128个输出经过softmax函数会得到有包含10个输出，选取最大的输出便为本次要输出的值
        #Dense（Output_size，激活函数(默认是None)）
        self.d2 = Dense(10, activation='softmax')

    def call(self, x, **kwargs):
        x = self.conv1(x)      # input[batch, 28, 28, 1] output[batch, 26, 26, 32]
        #为什么是一通道，因为是黑白图片，所以只有一个通道，而之前研究彩色图片为RGB三通道
        x = self.flatten(x)    # output [batch, 21632] 26*26*32=21632
        x = self.d1(x)         # output [batch, 128]
        return self.d2(x)      # output [batch, 10]

模型网络的训练

数据集的下载与校验（mnist）

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from model import MyModel
from tensorflow.python import keras
from keras.losses import CategoricalCrossentropy
import numpy as np
import matplotlib.pyplot as plt


def main():
    #用于下载所需要的手写预测数据集合
    mnist = tf.keras.datasets.mnist

    # download and load data  x存放的是图片，y存放的是标签
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    #显示图片
    imgs = x_test[:3]
    labs = y_test[:3]
    print(labs)
    #按水平拼接在一起，使得三幅图片能在一起显示，达到这仨张图片同时显示的效果,htsack函数的效果
    plot_imgs = np.hstack(imgs)
    plt.imshow(plot_imgs,cmap="gray")
    plt.show()


if __name__ == '__main__':
    main()

显示：
发现测试集的前三章图片为7，2，1，控制台输出的标签数同样也为7，2，1.说明加载图片和标签正确，可以进行模型的训练。

模型的训练

注意

定义训练损失误差，预测的准确率

    # define train_loss and train_accuracy 计算历史的损失值和损失率
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    #计算训练损失误差
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    # define train_loss and train_accuracy
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

1. 误差损失函数的定义：使用的标签是对应的整数而不是对应的one-hot编码，同时其计算公式以及方法可以参考

tf.keras.losses.SparseCategoricalCrossentropy函数-CSDN博客

```python
#定义交叉信息损失熵函数从而求出相应的损失值
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
```

定义训练步骤

    # define train function including calculating loss, applying gradient and calculating accuracy
    @tf.function#(传入的参数分别为图片和相对应的标签)
    def train_step(images, labels):
        with tf.GradientTape() as tape:#跟踪历史损失梯度，便于后续的计算误差梯度计算
            predictions = model(images)
            loss = loss_object(labels, predictions)#（loss_object为之前定义的误差损失函数）
        gradients = tape.gradient(loss, model.trainable_variables)#其本质上是实现求导公式，找到误差下降最大的方向
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        #然后把误差最大的方向应用到优化器Adam中，将误差梯度以及反向传播变打包成一个元组进行应用
        
        #计算训练损失误差，
        train_loss(loss)
        train_accuracy(labels, predictions)

定义测试步骤

    # define test function including calculating loss and calculating accuracy
    @tf.function
    def test_step(images, labels):
        #把图片传入训练好的模型，求出相对应的预测值，一般传出来的是softmax函数，刚好index对应的值便是真实的的预测值
        predictions = model(images)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)

train.py

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from model import MyModel
from tensorflow.python import keras
from keras.losses import CategoricalCrossentropy
import numpy as np
import matplotlib.pyplot as plt


def main():
    #用于下载所需要的手写预测数据集合，用于模型的训练【训练集的图片为60000，测试集合的图片为10000】
    mnist = tf.keras.datasets.mnist

    # download and load data  x存放的是图片，y存放的是标签
    #x_train:训练的图像数据集合  y_train:训练图像的标签值
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    #进行数据的归一化，使得最大值和最小值分别为0和1
    x_train, x_test = x_train / 255.0, x_test / 255.0
    #显示图片
    # imgs = x_test[:3]
    # labs = y_test[:3]
    # print(labs)
    #按水平拼接在一起，使得三幅图片能在一起显示，达到这仨张图片同时显示的效果
    # plot_imgs = np.hstack(imgs)
    # plt.imshow(plot_imgs,cmap="gray")
    # plt.show()

    # Add a channels dimension
    # 因为加载进来的图片shape只有3个维度分别为batch，height，weight，由于模型训练的预估参数的最后一个参数为channel，通道数
    #所以需要新增加一个维度来显示，同理x_test也是一样
    x_train = x_train[..., tf.newaxis]
    x_test = x_test[..., tf.newaxis]
    # create data generator（创造可迭代的数据迭代器）
    train_ds = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(10000).batch(32)
    #shuffle为随机选取10000张图片进入内存，然后选取其中的32张为一组
    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
    #测试集合用于检测模型的预测的正确性，所以暂时不需要随机进行测验

    # create model 对于模型的实例化
    model = MyModel()

    # define loss 交叉信息损失熵函数的定义，提供的是标签的真实数值，而不是one-hot编码，如果是one-hot编码的话，可以去掉Sparse，直接使用交叉信息损失熵函数
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    # define optimizer 定义Adam优化器
    optimizer = tf.keras.optimizers.Adam()

    # define train_loss and train_accuracy 计算历史的损失值和损失率
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    #计算训练损失误差
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    # define train_loss and train_accuracy
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

    # define train function including calculating loss, applying gradient and calculating accuracy
    @tf.function#(传入的参数分别为图片和相对应的标签)
    def train_step(images, labels):
        with tf.GradientTape() as tape:#跟踪历史损失梯度，便于后续的计算误差梯度计算
            predictions = model(images)
            loss = loss_object(labels, predictions)#（loss_object为之前定义的误差损失函数）
        gradients = tape.gradient(loss, model.trainable_variables)#其本质上是实现求导公式，找到误差下降最大的方向
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        #然后把误差最大的方向应用到优化器Adam中，将误差梯度以及反向传播变打包成一个元组进行应用

        train_loss(loss)
        train_accuracy(labels, predictions)

    # define test function including calculating loss and calculating accuracy
    @tf.function
    def test_step(images, labels):
        predictions = model(images)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)

    EPOCHS = 5#设计迭代轮次

    for epoch in range(EPOCHS):
        train_loss.reset_states()        # clear history info
        train_accuracy.reset_states()    # clear history info
        test_loss.reset_states()         # clear history info
        test_accuracy.reset_states()     # clear history info

        for images, labels in train_ds:
            train_step(images, labels)

        for test_images, test_labels in test_ds:
            test_step(test_images, test_labels)

        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(template.format(epoch + 1,
                              train_loss.result(),
                              train_accuracy.result() * 100,
                              test_loss.result(),
                              test_accuracy.result() * 100))

    model.save_weights('mnist')
    print("Finish training")


if __name__ == '__main__':
    main()

模型的结果

在这里插入图片描述

可以观察到，模型的准确率在5轮迭代之后预测准确率在98.20，预测的准确率较高

模型网络的预测

预测代码的设计

import tensorflow as tf
from tensorflow.python import keras
from keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from model import MyModel

def main():
    #选择将mnist数据集加载进来
    mnist=tf.keras.datasets.mnist
    #相对应的模型数据加载进来，x对应的位图片，y对应的为标签值
    (x_train,y_atrain),(x_test,y_test) = mnist.load_data()


    #载入模型
    model = MyModel()
    #加载进来相应的权重（之前模型网络训练出来的权重）
    model.load_weights('mnist')
    img =Image.open('7.jpg')
    #重新设置大小使其能够加载进入网络中，28*28*1的图片大小，其实就是对需要检测图片进行预处理，将其规划为需要检测的大小
    img = img.resize((28,28))

    #转换为灰度图，对非灰度图形进行处理
    gray =np.array(img.convert('L'))

    #将黑字白底变为白字黑底照片，将照片格式进行转换，变成可以与mnist可以训练的图片
    gray_inv = (255-gray)/255.0
    plt.imshow(gray,cmap='gray')
    plt.show()

    image = gray_inv.reshape((1,28,28,1))
    #利用模型自带的预测函数执行对图片的预测
    prediction = model.predict(image)
    prediction_id = np.argmax(prediction,axis=1)#在维度为1上选择最大的值。利用softmax输出选择其最大值，因为axis=0上是batch，所以在维度为1上进行输出
    print(prediction_id)

    #显示图片，展示预测前后处理的图片
    f,ax = plt.subplots(1,3,figsize=(5,5))
    ax[0].set_title("do not process")
    ax[0].axis('off')
    ax[0].imshow(img,cmap='gray')

    ax[1].set_title('process - gray')
    ax[1].axis('off')
    ax[1].imshow(gray,cmap='gray')

    ax[2].set_title(f'predict:{prediction_id}')
    ax[2].axis('off')
    ax[2].imshow(gray_inv,cmap='gray')

    plt.show()
    print("predict success!")


if __name__ == "__main__":
    main()

运行结果：
待预测的值为7，传入的未经处理的数据也是7，预测结果也是7，预测正确
注意若是对彩色图片进行处理可能会出现一点偏差，因为我在自己校验的时候发现对彩色图片进行处理的时候会出现一些问题。因为原数据集合是黑白手写数据集，并不是彩色数据集。

模型框架的设计

在这里插入图片描述

结束语：

本篇blog参考b站up主霹雳啪啦wz
同时感谢每一位读者，有不完善的地方欢迎批评指正
一起继续努力…