1. Keras环境安装
##参考Keras安装点击打开链接
2. 文本图片素材-文字切割并保存切割图片
# -*- coding: UTF-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as plt
def median_split_ranges(peek_ranges):
new_peek_ranges = []
widthes = []
for peek_range in peek_ranges:
w = peek_range[1] - peek_range[0] + 1
widthes.append(w)
widthes = np.asarray(widthes)
median_w = np.median(widthes)
for i, peek_range in enumerate(peek_ranges):
num_char = int(round(widthes[i]/median_w, 0))
if num_char > 1:
char_w = float(widthes[i] / num_char)
for i in range(num_char):
start_point = peek_range[0] + int(i * char_w)
end_point = peek_range[0] + int((i + 1) * char_w)
new_peek_ranges.append((start_point, end_point))
else:
new_peek_ranges.append(peek_range)
return new_peek_ranges
def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2):
start_i = None
end_i = None
peek_ranges = []
for i, val in enumerate(array_vals):
if val > minimun_val and start_i is None:
start_i = i
elif val > minimun_val and start_i is not None:
pass
elif val < minimun_val and start_i is not None:
end_i = i
if end_i - start_i >= minimun_range:
peek_ranges.append((start_i, end_i))
start_i = None
end_i = None
elif val < minimun_val and start_i is None:
pass
else:
raise ValueError("cannot parse this case...")
return peek_ranges
def get_font_face_peek_ranges(path_test_image):
image_color = cv2.imread(path_test_image)
new_shape = (image_color.shape[1] * 2, image_color.shape[0] * 2)
image_color = cv2.resize(image_color, new_shape)
image = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)
adaptive_threshold = cv2.adaptiveThreshold(
image,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
horizontal_sum = np.sum(adaptive_threshold, axis=1)
plt.plot(horizontal_sum, range(horizontal_sum.shape[0]))
plt.gca().invert_yaxis()
# plt.show()
peek_ranges = extract_peek_ranges_from_array(horizontal_sum)
vertical_peek_ranges2d = []
for peek_range in peek_ranges:
start_y = peek_range[0]
end_y = peek_range[1]
line_img = adaptive_threshold[start_y:end_y, :]
vertical_sum = np.sum(line_img, axis=0)
vertical_peek_ranges = extract_peek_ranges_from_array(
vertical_sum,
minimun_val=40,
minimun_range=1)
vertical_peek_ranges2d.append(vertical_peek_ranges)
vertical_peek_ranges2d = []
for peek_range in peek_ranges:
start_y = peek_range[0]
end_y = peek_range[1]
line_img = adaptive_threshold[start_y:end_y, :]
vertical_sum = np.sum(line_img, axis=0)
vertical_peek_ranges = extract_peek_ranges_from_array(
vertical_sum,
minimun_val=40,
minimun_range=1)
vertical_peek_ranges = median_split_ranges(vertical_peek_ranges)
vertical_peek_ranges2d.append(vertical_peek_ranges)
return peek_ranges,vertical_peek_ranges2d,image_color
color = (0, 0, 255)
path_test_image = "tmp/font.png"
peek_ranges,vertical_peek_ranges2d,image_color = get_font_face_peek_ranges(path_test_image)
for i, peek_range in enumerate(peek_ranges):
for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):
x = vertical_range[0]
y = peek_range[0]
w = vertical_range[1] - x
h = peek_range[1] - y
image = image_color[y - 2:y + h + 2, x - 2:x + w + 2]
pt1 = (x, y)
pt2 = (x + w, y + h)
cv2.rectangle(image_color, pt1, pt2, color)
cv2.imshow('image', image_color)
cv2.waitKey(0)
3. 训练
FILE_PATH = "model.h5" #模型进行存储和读取的地方
IMAGE_SIZE = 128
PATH = "fonts"
imgs,labels,counter = read_file(PATH, IMAGE_SIZE)
X_train,X_test,y_train,y_test = train_test_split(imgs,labels,test_size=0.2,random_state=0)
X_train = X_train.reshape(X_train.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE)/255.0
X_test = X_test.reshape(X_test.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE) / 255.0
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
Y_train = np_utils.to_categorical(y_train, num_classes=counter)
Y_test = np_utils.to_categorical(y_test, num_classes=counter)
model = Sequential()
model.add(
Convolution2D(
filters=32,
kernel_size=(5, 5),
padding='same',
dim_ordering='th',
input_shape=X_train.shape[1:]
)
)
model.add(Activation('relu'))
model.add(
MaxPooling2D(
pool_size=(2, 2),
strides=(2, 2),
padding='same'
)
)
model.add(Convolution2D(filters=64, kernel_size=(5, 5), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(counter))
model.add(Activation('softmax'))
model.summary()
model.compile(
optimizer='adam', optimizer,
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(X_train,Y_train,epochs=32,batch_size=32)
loss, accuracy = model.evaluate(X_test, Y_test)
print('test loss;', loss)
print('test accuracy:', accuracy)
model.save(FILE_PATH)
4. 识别文字图片
a. 图片文字切割
b. 文字识别
# -*- coding: UTF-8 -*-
from keras.models import load_model
import cv2
import numpy as np
import utils
import os
def getLetter(model, img, name_list, IMAGE_SIZE):
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = img.reshape((1, 1, IMAGE_SIZE, IMAGE_SIZE))
img = img.astype('float32')
img = img/255.0
result = model.predict_proba(img)
max_index = np.argmax(result)
picType,prob = max_index,result[0][max_index]
if picType != -1:
return name_list[picType],prob
else:
return ""
FILE_PATH = "model.h5"
IMAGE_SIZE = 128
result = ""
path_test_image = "tmp/font.png"
name_list = readName()
model = load_model(FILE_PATH)
peek_ranges,vertical_peek_ranges2d,image_color = utils.get_font_face_peek_ranges(path_test_image)
for i, peek_range in enumerate(peek_ranges):
for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):
x = vertical_range[0]
y = peek_range[0]
w = vertical_range[1] - x
h = peek_range[1] - y
image = image_color[y - 2:y + h + 2, x - 2:x + w + 2]
letter,prob = getLetter(model, image, name_list, IMAGE_SIZE)
code = letter
result += code
print(result)
最后一段代码里的readName()函数是自定义的吗,可以发一下是什么吗2 年前回复
[点赞]
u010379996
zhuzihuaile回复:
PATH_DIR = "fonts"
name_list = []
for child_dir in os.listdir(PATH_DIR):
if os.path.isdir((PATH_DIR+"/"+child_dir)):
name_list.append(child_dir)
c. 测试结果(还需优化)