看大佬的文章,用Python识别验证码_python 自动输入验证码-CSDN博客
修修补补的结果,直接贴源码吧
数字图片识别的源码(需要建立在模型训练的基础上):
import cv2 import time import os import numpy as np import shutil # 创建空的char文件夹 if not os.path.exists('char'): os.makedirs('char') im = cv2.imread(r'D:\imgcode104.jfif') # 灰度处理 im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) # 二值化处理 ret, im_inv = cv2.threshold(im_gray, 127, 255, cv2.THRESH_BINARY_INV) kernel = 1 / 16 * np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]]) im_blur = cv2.filter2D(im_inv, -1, kernel) # 高斯模糊 kernel = 1 / 16 * np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]]) im_blur = cv2.filter2D(im_inv, -1, kernel) # 再次二值化 ret, im_res = cv2.threshold(im_blur, 127, 255, cv2.THRESH_BINARY) # 提取轮廓 contours, hierarchy = cv2.findContours(im_res, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 根据识别结果进行切割 result = [] for contour in contours: x, y, w, h = cv2.boundingRect(contour) result = [] for contour in contours: x, y, w, h = cv2.boundingRect(contour) box = np.int0([[x, y], [x + w, y], [x + w, y + h], [x, y + h]]) result.append(box) print(result) # 图片分割完成后,将分割后的单个字符的图片存成不同的图片文件 char_images = [] # 创建一个空列表 for box in result: cv2.drawContours(im, [box], 0, (0, 0, 255), 2) roi = im_res[box[0][1]:box[3][1], box[0][0]:box[1][0]] roistd = cv2.resize(roi, (30, 30)) timestamp = int(time.time() * 1e6) filename = "{}.jpg".format(timestamp) filepath = os.path.join("char", filename) cv2.imwrite(filepath, roistd) char_image_info = {"box": box, "filepath": filepath} # 保存切割后字符图片的相关信息 char_images.append(char_image_info) # 从label目录中加载已标注的数据 filenames = os.listdir("label") samples = np.empty((0, 900)) labels = [] for filename in filenames: filepath = os.path.join("label", filename) label = filename.split(".")[0].split("_")[-1] labels.append(label) im = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) sample = im.reshape((1, 900)).astype(np.float32) samples = np.append(samples, sample, 0) samples = samples.astype(np.float32) unique_labels = list(set(labels)) unique_ids = list(range(len(unique_labels))) label_id_map = dict(zip(unique_labels, unique_ids)) id_label_map = dict(zip(unique_ids, unique_labels)) label_ids = list(map(lambda x: label_id_map[x], labels)) label_ids = np.array(label_ids).reshape((-1, 1)).astype(np.float32) filenames = os.listdir("label") samples = np.empty((0, 900)) labels = [] for filename in filenames: filepath = os.path.join("label", filename) label = filename.split(".")[0].split("_")[-1] labels.append(label) im = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) sample = im.reshape((1, 900)).astype(np.float32) samples = np.append(samples, sample, 0) samples = samples.astype(np.float32) unique_labels = list(set(labels)) unique_ids = list(range(len(unique_labels))) label_id_map = dict(zip(unique_labels, unique_ids)) id_label_map = dict(zip(unique_ids, unique_labels)) label_ids = list(map(lambda x: label_id_map[x], labels)) label_ids = np.array(label_ids).reshape((-1, 1)).astype(np.float32) model = cv2.ml.KNearest_create() model.train(samples, cv2.ml.ROW_SAMPLE, label_ids) char_images.sort(key=lambda x: x["box"][0][0]) # 根据字符图片在原始图像中的横坐标排序 result_str = "" # 初始化结果字符串 for char_image_info in char_images: char_image = cv2.imread(char_image_info["filepath"], cv2.IMREAD_GRAYSCALE) sample = char_image.reshape((1, 900)).astype(np.float32) ret, results, neighbours, distances = model.findNearest(sample, k=3) label_id = int(results[0, 0]) label = id_label_map[label_id] result_str += label # 将每个字符的识别结果添加到结果字符串中 # 执行完整个逻辑后,清空char文件夹 shutil.rmtree('char') print(result_str)
然后是最费力的一集(模型训练的前提):
简单来说就是多下载一些图片到本地,然后运行这个demo挨个人工输入识别结果.
我是下载了100张,识别准确率有90%吧,按道理是识别越多越准确,大家自行决定
import cv2 as cv import time import os import glob import sys import numpy as np def recognize_text(image): # 边缘保留滤波 去噪 blur = cv.pyrMeanShiftFiltering(image, sp=8, sr=60) # 灰度图像 gray = cv.cvtColor(blur, cv.COLOR_BGR2GRAY) # 二值化 ret, binary = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV) contours, hierarchy = cv.findContours(binary, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) result = [] for contour in contours: x, y, w, h = cv.boundingRect(contour) box = np.int0([[x, y], [x + w, y], [x + w, y + h], [x, y + h]]) result.append(box) print(result) for box in result: cv.drawContours(image, [box], 0, (0, 0, 255), 2) roi = binary[box[0][1]:box[3][1], box[0][0]:box[1][0]] roistd = cv.resize(roi, (30, 30)) # 将字符图片统一调整为30x30的图片大小 timestamp = int(time.time() * 1e6) # 为防止文件重名,使用时间戳命名文件名 filename = "{}.jpg".format(timestamp) filepath = os.path.join("char", filename) cv.imwrite(filepath, roistd) files = os.listdir("char") for filename in files: filename_ts = filename.split(".")[0] patt = "label/{}_*".format(filename_ts) saved_num = len(glob.glob(patt)) if saved_num == 1: print("{} done".format(patt)) continue filepath = os.path.join("char", filename) im = cv.imread(filepath) cv.imshow("image", im) key = cv.waitKey(0) if key == 27: sys.exit() if key == 13: continue char = chr(key) filename_ts = filename.split(".")[0] outfile = "{}_{}.jpg".format(filename_ts, char) outpath = os.path.join("label", outfile) cv.imwrite(outpath, im) # 从1到100的循环 for i in range(1, 101): src = cv.imread(r'D:\imgcode' + str(i) + '.jfif') recognize_text(src) # # 显示图像 # cv.imshow("image", src) # cv.waitKey(0) # # cv.destroyAllWindows()
最后是模型训练(源码和参考的一致):
上面的识别源码中已经整合进去了
import cv2 as cv import os import numpy as np filenames = os.listdir("label") samples = np.empty((0, 900)) labels = [] for filename in filenames: filepath = os.path.join("label", filename) label = filename.split(".")[0].split("_")[-1] labels.append(label) im = cv.imread(filepath, cv.IMREAD_GRAYSCALE) sample = im.reshape((1, 900)).astype(np.float32) samples = np.append(samples, sample, 0) samples = samples.astype(np.float32) unique_labels = list(set(labels)) unique_ids = list(range(len(unique_labels))) label_id_map = dict(zip(unique_labels, unique_ids)) id_label_map = dict(zip(unique_ids, unique_labels)) label_ids = list(map(lambda x: label_id_map[x], labels)) label_ids = np.array(label_ids).reshape((-1, 1)).astype(np.float32) model = cv.ml.KNearest_create() model.train(samples, cv.ml.ROW_SAMPLE, label_ids)
最后提一下,记得提前创建label和char这两个文件夹,可能对新手来说很容易忽略吧