当前位置：首页 > article >正文

MNIST 数据集与 TFOD API

article 2026/2/1 20:35:20

此处给出我在进行毕业设计过程中写的三份脚本，作为demo 展示模型的预处理，输出信息提取和TFOD API的应用。

script1

加载本地的MNIST模型，对本地的手写数字进行推理

# test the validation of the saved file and the camera
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt# 1. 加载训练好的模型
model = tf.keras.models.load_model("/home/ncut/models/myMNIST_Model.keras")'''
# 2. 从摄像头捕获一张图片
cap = cv2.VideoCapture(0)  # 0 表示默认摄像头
if not cap.isOpened():print("无法打开摄像头")exit()ret, frame = cap.read()
cap.release()
if not ret:print("无法捕获摄像头图像")exit()
'''frame = cv2.imread("~/code_garden/testcase_folder/five.png")
# 3. 预处理图像
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # 转换为灰度图
img_resized = cv2.resize(gray, (28, 28))  # 调整大小到 28x28
img_normalized = 1.0 - (img_resized.astype("float32") / 255.0)
img_input = img_normalized.reshape(1, 28, 28, 1)  # 展平以匹配模型输入格式plt.imshow(img_input[0, :, :, 0], cmap='gray')
plt.title("image after preprocess")
plt.axis("off")
plt.show()# 4. 进行推理
predictions = model.predict(img_input)
print("预测概率:", predictions)
predicted_class = np.argmax(predictions)
print("预测类别:", predicted_class)
confidence = np.max(predictions)
print("confidence value:", confidence)# 5. 可视化结果
plt.figure(figsize=(10, 4))# 左侧显示原始摄像头拍摄的彩色图像
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # OpenCV 读取的 BGR 需转换为 RGB
plt.title("The original color photo")
plt.axis("off")# 中间显示处理后的灰度图
plt.subplot(1, 3, 2)
plt.imshow(img_normalized, cmap="gray")
plt.title("gray map(28x28)")
plt.axis("off")# 右侧显示推理结果
plt.subplot(1, 3, 3)
plt.title("inference result")
plt.text(0.1, 0.6, f"predicition class: {predicted_class}", fontsize=14)
plt.text(0.1, 0.4, f"confidence: {confidence:.4f}", fontsize=14)
#plt.text(0.1, 0.2, f"Loss: {loss_value:.4f}", fontsize=14)
plt.axis("off")plt.tight_layout()
plt.show()

Scripts2

实际使用的脚本，用类进行封装，同样是对本地图片的读取。这里使用了 @staticmethod 修饰器，类似 C++ 中的 static method，独立于类的实例而存在。在主函数调用时，睡眠三秒，方便使用者为接下来的摄像头数据读取做准备。

import argparse
import cv2
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
import timeclass MNISTProcessor:def __init__(self, model_path="/home/ncut/models/myMNIST_Model.keras"):"""初始化模型"""self.model = tf.keras.models.load_model(model_path)def process_image(self, input_path="/home/ncut/Pictures/five.png", output_path=None):"""完整处理流程入口:param input_path: 输入图像路径:param output_path: 输出图像路径 (可选):return: 处理后的结果图像"""# 1. 加载图像image = self._load_image(input_path)if image is None:raise FileNotFoundError(f"图像文件 {input_path} 不存在或无法读取")# 2. 执行推理predicted_class, confidence, processed_image = self.predict(image)if output_path is None:output_path = f"/dev/shm/mnist_result_{int(time.time())}.png"# 3. 绘制,保存结果self.draw_results_save(image.copy(), predicted_class, confidence, processed_image, output_path)return True;def predict(self, image):"""执行推理"""gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)img_resized = cv2.resize(gray, (28, 28), interpolation=cv2.INTER_CUBIC)    # resize the width and heightimg_normalized = 1.0 - (img_resized.astype("float32") / 255.0)img_input = img_normalized.reshape(1, 28, 28, 1)    # reshape can change the dimensionpredictions = self.model.predict(img_input)predicted_class = np.argmax(predictions)    # pick out the largest element of the arrayconfidence = np.max(predictions)return predicted_class, confidence, img_normalizeddef draw_results_save(self, image, predicted_class, confidence, img_normalized, output_path):"""在图像上绘制检测结果"""plt.figure(figsize=(10, 4))# 左侧显示原始摄像头拍摄的彩色图像plt.subplot(1, 3, 1)plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # OpenCV 读取的 BGR 需转换为 RGBplt.title("The original color photo")plt.axis("off")# 中间显示处理后的灰度图plt.subplot(1, 3, 2)plt.imshow(img_normalized, cmap="gray")plt.title("gray map(28x28)")plt.axis("off")# 右侧显示推理结果plt.subplot(1, 3, 3)plt.title("inference result")plt.text(0.1, 0.6, f"predicition class: {predicted_class}", fontsize=14)plt.text(0.1, 0.4, f"confidence: {confidence:.4f}", fontsize=14)#plt.text(0.1, 0.2, f"Loss: {loss_value:.4f}", fontsize=14)plt.axis("off")plt.tight_layout()#plt.show()plt.savefig(output_path, bbox_inches="tight", pad_inches=0)plt.close()@staticmethoddef _load_image(path):"""加载图像"""if not os.path.exists(path):return Nonereturn cv2.imread(path)@staticmethoddef _save_cvimage(image, path):"""保存图像到指定路径"""cv2.imwrite(path, image)def main():# 命令行参数解析parser = argparse.ArgumentParser(description='MNIST 处理器')parser.add_argument('--input', required=True, help='输入图像路径')parser.add_argument('--output', help='输出图像路径 (可选)')args = parser.parse_args()print("parse succeed")# 创建处理器实例processor = MNISTProcessor()try:# 执行处理流程result_bool = processor.process_image(args.input, args.output)# 可选：显示结果（调试时使用）if os.environ.get('DEBUG_SHOW'):plt.imshow(cv2.cvtColor(cv2.imread(args.output), cv2.COLOR_BGR2RGB))plt.axis('off')plt.show()except Exception as e:print(f"处理失败: {str(e)}")exit(1)if __name__ == "__main__":print("sleep for 3 seconds")time.sleep(3)main()

Script3

能在虚拟机资源受限环境下实现的模型训练和推理，简单的训练模型demo。

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf# ==============================================
# 1. 加载本地 MNIST 数据集
# ==============================================
def load_local_mnist(path='./datasets/mnist.npz'):with np.load(path, allow_pickle=True) as f:x_train = f['x_train']y_train = f['y_train']x_test = f['x_test']y_test = f['y_test']return (x_train, y_train), (x_test, y_test)# 从本地路径加载数据
local_mnist_path = '/home/ncut/.keras/datasets/mnist.npz'  # 修改为你的实际路径
(x_train, y_train), (x_test, y_test) = load_local_mnist(local_mnist_path)# ==============================================
# 2. 数据预处理
# ==============================================
# 归一化像素值到 [0,1] 并展平图像（可选）
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0# ==============================================
# 3. 可视化前 25 张训练集图片
# ==============================================
def plot_mnist_samples(images, labels, num_samples=25):plt.figure(figsize=(10, 10))for i in range(num_samples):plt.subplot(5, 5, i+1)plt.imshow(images[i].reshape(28, 28), cmap='gray')  # 若已展平，需 reshape 回 28x28plt.title(f"Label: {labels[i]}")plt.axis('off')plt.tight_layout()plt.show()# 调用可视化函数（使用原始未展平的图像数据）
_, (x_train_original, _) = load_local_mnist(local_mnist_path)  # 重新加载未展平的数据用于可视化
plot_mnist_samples(x_train_original, y_train)# ==============================================
# 4. 训练模型（基于展平数据）
# ==============================================
model = tf.keras.Sequential([tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),tf.keras.layers.Dropout(0.2),tf.keras.layers.Dense(10, activation='softmax')
])model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy']
)history = model.fit(x_train, y_train,epochs=5,validation_split=0.2,verbose=1
)# ==============================================
# 5. 评估测试集
# ==============================================
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'\nTest accuracy: {test_acc:.4f}')

Script4

训练模型，此处使用了数据增强和缓存，是自己在 Google colab上训练时的脚本。

import tensorflow as tf
import numpy as np
from sklearn.utils import class_weight# 1. 加载 MNIST 数据
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()# 2. 基本预处理：归一化 & 扩展通道维度
x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)  # (28,28) -> (28,28,1)
x_test  = np.expand_dims(x_test, axis=-1)# 3. 定义数据增强层
data_augmentation = tf.keras.Sequential([tf.keras.layers.RandomRotation(0.1),tf.keras.layers.RandomZoom(0.1),tf.keras.layers.RandomTranslation(0.1, 0.1),tf.keras.layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.1)),  # 加入随机亮度调整tf.keras.layers.RandomContrast(0.1)  # 加入随机对比度调整
])# 4. 定义数据增强函数
def augment(image, label):# image 的 shape 是 (28,28,1)，直接传入数据增强层即可image = data_augmentation(image, training=True)return image, label# 5. 构建训练和验证数据集
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024)
train_dataset = train_dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.batch(64)
train_dataset = train_dataset.cache()  # 缓存数据，避免重复处理
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(64)
val_dataset = val_dataset.cache()
val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)# 6. 计算 class_weight
# 这里使用原始的 y_train 数据计算类别权重
cw = class_weight.compute_class_weight(class_weight='balanced',classes=np.unique(y_train),y=y_train
)
class_weights = dict(enumerate(cw))
print("Class weights:", class_weights)# 7. 构建 CNN 模型
model = tf.keras.Sequential([tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),tf.keras.layers.MaxPooling2D((2, 2)),tf.keras.layers.Dropout(0.3),tf.keras.layers.Flatten(),tf.keras.layers.Dense(128, activation='relu'),tf.keras.layers.Dropout(0.3),tf.keras.layers.Dense(10, activation='softmax')
])# 8. 编译模型
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy']
)# 9. 训练模型，同时传入 class_weight
history = model.fit(train_dataset,epochs=15,validation_data=val_dataset,class_weight=class_weights
)# 保存和加载模型
model.save("myMNIST_Model.keras")
reconstructed_model = tf.keras.models.load_model("myMNIST_Model.keras")# 10. 评估模型
test_loss1, test_acc1 = model.evaluate(x_test, y_test)
test_loss2, test_acc2 = reconstructed_model.evaluate(x_test, y_test)
print("Test loss1:", test_loss1)
print(f"Test accuracy1: {test_acc1:.4f}")
print("Test loss2:", test_loss2)
print(f"Test accuracy2: {test_acc2:.3f}%")

additon

上述的四份脚本作为四份独立的脚本存储于我的虚拟机中，但它们的共通之处：模型的加载、数据的预处理、对模型检测结果的信息提取没有在此处体现。放任四份脚本中的共同之处不管而着急地对各个脚本进行处理，是一种放任思维的、不负责任的表现。

这份博客会在后面进行修改，提取出公共部分，对独立的部分进行描述说明。

MNIST 数据集与 TFOD API

script1

Scripts2

Script3

Script4

additon

相关文章：

MNIST 数据集与 TFOD API

SpringSecurity6.0 通过JWTtoken进行认证授权

【Java】Maven

第十五届蓝桥杯PythonC组题解

MATLAB中plot函数的详细参数表

R语言赋能气象水文科研：从多维数据处理到学术级可视化

虚拟试衣间-云尚衣橱小程序-衣橱管理实现

BGP路由协议之属性2

纯个人整理，蓝桥杯使用的算法模板day2（0-1背包问题），手打个人理解注释，超全面，且均已验证成功（附带详细手写“模拟流程图”，全网首个

算法与数据结构线性表之栈和队列

python应用之使用pdfplumber 解析pdf文件内容

laravel update报In PackageManifest.php line 122:Undefined index: name 错误的解决办法

Vue中使用antd-table组件实现数据选择、禁用、已选择禁用-demo

C语言--统计输入字符串中的单词个数

Kubernetes 集群搭建（三）：使用dashboard用户界面（需要访问外网获取yaml）

Debian 12 服务器搭建Beego环境

游戏引擎学习第208天

【在校课堂笔记】Python 第 7 节课总结

评价区动态加载是怎么实现的？

【＜二＞丹方改良：Spring 时代的 JavaWeb】之 Spring Boot 中的监控：使用 Actuator 实现健康检查

蓝桥杯—数字接龙(dfs+减枝)

Docker与VNC的使用

C++——清明

Unity ViewportConstraint

Gin、Echo 和 Beego三个 Go 语言 Web 框架的核心区别及各自的优缺点分析，结合其设计目标、功能特性与适用场景

ffmpeg视频转码相关

手搓多模态-06 数据预处理

HCIP【路由过滤技术（详解）】

【Kafka基础】topics命令行操作大全：高级命令解析（2）

【AI插件开发】Notepad++ AI插件开发实践（代码篇）：从Dock窗口集成到功能菜单实现