如何让一个网络同时分类一张图像的两个独立标签?
一般我们会构建一个输出网络,每一个label作为属性输出;或者构建两个分支网络,针对不同label输出。
1、数据集组成(fashion)
本人的数据集有12类,共计5547张图片。其中有6类是从网上下载获取的,剩余的6类是自己在网上爬虫分类整理得到的。
该数据集主要是有两类信息:颜色(黑色、红色、蓝色、白色)和服饰类型(牛仔裤、连衣裙、短袖、鞋子、包包),具体的数据集内容如下:
黑色连衣裙:black_dress(333张)
黑色牛仔裤:black_jeans(344张)
黑色短袖:black_shirt(436张)
黑色鞋子:black_shoe(534张)
蓝色连衣裙:blue_dress(386张)
蓝色牛仔裤:blue_jeans(356张)
蓝色短袖:blue_shirt(369张)
红色连衣裙:red_dress(384张)
红色短袖:red_shirt(332)
红色鞋子:red_shoe(486)
白色包包:white_bag(747)
白色鞋子:white_shoe(840)
2、构建网络(单输出)
2.1、采用类似vgg的网络结构(SimpleNet)
-
class SimpleNet(object):
-
def __init__(self, input_shape, classes, finalAct="softmax"):
-
#default input_shape = (width, height, channel)
-
self.input_shape = input_shape
-
self.classes = classes
-
self.finalAct = finalAct
-
-
#chanDim = inputShape[2]
-
chanDim = -1
-
if K.image_data_format() == "channels_first":
-
chanDim = 1
-
self.chanDim = chanDim
-
-
-
def build_model(self):
-
model = Sequential()
-
# CONV => RELU => POOL
-
model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", input_shape=self.input_shape))
-
model.add(Activation("relu"))
-
model.add(BatchNormalization(axis=self.chanDim))
-
model.add(MaxPooling2D(pool_size=(3, 3)))
-
model.add(Dropout(0.25))
-
-
# (CONV => RELU) * 2 => POOL
-
model.add(Conv2D(64, (3, 3), padding="same"))
-
model.add(Activation("relu"))
-
model.add(BatchNormalization(axis=self.chanDim))
-
model.add(Conv2D(64, (3, 3), padding="same"))
-
model.add(Activation("relu"))
-
model.add(BatchNormalization(axis=self.chanDim))
-
model.add(MaxPooling2D(pool_size=(2, 2)))
-
model.add(Dropout(0.25))
-
-
# (CONV => RELU) * 2 => POOL
-
model.add(Conv2D(128, (3, 3), padding="same"))
-
model.add(Activation("relu"))
-
model.add(BatchNormalization(axis=self.chanDim))
-
model.add(Conv2D(128, (3, 3), padding="same"))
-
model.add(Activation("relu"))
-
model.add(BatchNormalization(axis=self.chanDim))
-
model.add(MaxPooling2D(pool_size=(2, 2)))
-
model.add(Dropout(0.25))
-
-
# (CONV => RELU) * 2 => POOL
-
model.add(Conv2D(256, (3, 3), padding="same"))
-
model.add(Activation("relu"))
-
model.add(BatchNormalization(axis=self.chanDim))
-
model.add(Conv2D(256, (3, 3), padding="same"))
-
model.add(Activation("relu"))
-
model.add(BatchNormalization(axis=self.chanDim))
-
model.add(MaxPooling2D(pool_size=(2, 2)))
-
model.add(Dropout(0.25))
-
-
# use global average pooling instead of fc layer
-
model.add(GlobalAveragePooling2D())
-
model.add(Activation("relu"))
-
model.add(BatchNormalization())
-
model.add(Dropout(0.5))
-
-
# softmax classifier
-
model.add(Dense(self.classes))
-
model.add(Activation(self.finalAct))
-
model.summary()
-
-
return model
说明:该种结构仅能识别上述12类,若是出现了某类其他类型和颜色搭配,如红色包包,则会识别错误。
在多分类中,最常用的就是softmax层。由于标签间是独立的,因此对于一个二分类问题,常用的激活函数是sigmoid函数。
在多标签分类中,大多使用binary_crossentropy损失而不是通常在多类分类中使用的categorical_crossentropy损失函数。
2.2、采用多分枝的网络结构(FashionNet)
该网络结构中一个用于识别类型,一个识别色彩。类型识别的结构可以复杂点,主要是形状识别,因此传入的图片做了灰度化处理;色彩识别比较简单,因此对应的网络结构比较简单。
该结构的好出是可以出数据中没有出现的类型,比如蓝色鞋子、红色包包等,前一个网络结构则无法识别。
-
class FashionNet(object):
-
def __init__(self, input_shape, category_classes, color_classes, finalAct="softmax"):
-
#default input_shape = (width, height, channel)
-
self.input_shape = input_shape
-
self.category_classes = category_classes
-
self.color_classes = color_classes
-
self.finalAct = finalAct
-
-
#chanDim = inputShape[2]
-
chanDim = -1
-
if K.image_data_format() == "channels_first":
-
chanDim = 1
-
self.chanDim = chanDim
-
-
def build_category_branch(self, inputs):
-
# convert 3 channel(rgb) input to gray
-
x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)
-
-
#Conv->ReLU->BN->Pool
-
x = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding='same')(x)
-
x = Activation('relu')(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = MaxPooling2D(pool_size=(3,3))(x)
-
-
#(CONV => RELU) * 2 => POOL
-
x = Conv2D(64, (3, 3), padding="same")(x)
-
x = Activation("relu")(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = Conv2D(64, (3, 3), padding="same")(x)
-
x = Activation("relu")(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = MaxPooling2D(pool_size=(2, 2))(x)
-
x = Dropout(0.25)(x)
-
-
# (CONV => RELU) * 2 => POOL
-
x = Conv2D(128, (3, 3), padding="same")(x)
-
x = Activation("relu")(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = Conv2D(128, (3, 3), padding="same")(x)
-
x = Activation("relu")(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = MaxPooling2D(pool_size=(2, 2))(x)
-
x = Dropout(0.25)(x)
-
-
# (CONV => RELU) * 2 => POOL
-
x = Conv2D(256, (3, 3), padding="same")(x)
-
x = Activation("relu")(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = Conv2D(256, (3, 3), padding="same")(x)
-
x = Activation("relu")(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = MaxPooling2D(pool_size=(2, 2))(x)
-
x = Dropout(0.25)(x)
-
-
# use global average pooling instead of fc layer
-
x = GlobalAveragePooling2D()(x)
-
x = Activation("relu")(x)
-
x = BatchNormalization()(x)
-
x = Dropout(0.5)(x)
-
-
# softmax classifier
-
x = Dense(self.category_classes)(x)
-
x = Activation(self.finalAct, name='category_output')(x)
-
-
return x
-
-
def build_color_branch(self, inputs):
-
#Conv->ReLU->BN->Pool
-
x = Conv2D(filters=16, kernel_size=(3,3), strides=(1,1), padding='same')(inputs)
-
x = Activation('relu')(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = MaxPooling2D(pool_size=(3,3))(x)
-
-
#Conv->ReLU->BN->Pool*2
-
x = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding='same')(x)
-
x = Activation('relu')(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding='same')(x)
-
x = Activation('relu')(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = MaxPooling2D(pool_size=(2,2))(x)
-
x = Dropout(0.25)(x)
-
-
#Conv->ReLU->BN->Pool*2
-
x = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding='same')(x)
-
x = Activation('relu')(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding='same')(x)
-
x = Activation('relu')(x)
-
x = BatchNormalization(axis=self.chanDim)(x)
-
x = MaxPooling2D(pool_size=(2,2))(x)
-
x = Dropout(0.25)(x)
-
-
x = Flatten()(x)
-
x = Dense(128)(x)
-
x = Activation('relu')(x)
-
x = BatchNormalization()(x)
-
x = Dropout(0.5)(x)
-
x = Dense(self.color_classes)(x)
-
x = Activation(self.finalAct, name='color_output')(x)
-
return x
-
-
def build_model(self):
-
input_shape = self.input_shape
-
inputs = Input(shape=input_shape)
-
category_branch = self.build_category_branch(inputs)
-
color_branch = self.build_color_branch(inputs)
-
-
model = Model(inputs=inputs, outputs=[category_branch, color_branch])
-
model.summary()
-
return model
3、模型训练
针对两种不同的方式,训练代码中的函数做了如下区分:
-
#! -*- coding:utf-8
-
-
# import the necessary packages
-
from keras.preprocessing.image import ImageDataGenerator
-
from keras.optimizers import Adam
-
from keras.preprocessing.image import img_to_array
-
from sklearn.preprocessing import MultiLabelBinarizer,LabelBinarizer
-
from sklearn.model_selection import train_test_split
-
from cnn import SimpleNet
-
#from cnn import SmallerInceptionNet
-
from cnn import FashionNet
-
import matplotlib.pyplot as plt
-
from imutils import paths
-
import numpy as np
-
import argparse
-
import random
-
import pickle
-
import cv2
-
import os
-
from PIL import Image
-
-
# grab the image paths and randomly shuffle them
-
def load_data(data_dir, img_size):
-
print("[INFO] loading images...")
-
if not os.path.exists(data_dir):
-
return None
-
imagePaths = sorted(list(paths.list_images(data_dir)))
-
random.seed(42)
-
random.shuffle(imagePaths)
-
-
datas = []
-
labels = []
-
for imagePath in imagePaths:
-
image = cv2.imread(imagePath, cv2.IMREAD_UNCHANGED)
-
if image is None:
-
print(imagePath)
-
continue
-
# convert 8depth to 24 depth
-
if len(image.shape)==2:
-
with Image.open(imagePath) as img:
-
rgb_img = img.convert('RGB')
-
image = cv2.cvtColor(np.asarray(rgb_img), cv2.COLOR_RGB2BGR)
-
elif len(image.shape)==3:
-
if image.shape[2]==4:
-
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
-
elif image.shape[2]==1:
-
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
-
-
image = cv2.resize(image, img_size)
-
image = img_to_array(image)
-
datas.append(image)
-
-
label = imagePath.split(os.path.sep)[-2].split("_")
-
labels.append(label)
-
-
# scale the raw pixel intensities to the range [0, 1]
-
datas = np.array(datas, dtype="float") / 255.0
-
labels = np.array(labels)
-
return datas, labels
-
-
def load_data_multilabels(data_dir, img_size):
-
print("[INFO] loading images...")
-
if not os.path.exists(data_dir):
-
return None
-
imagePaths = sorted(list(paths.list_images(data_dir)))
-
random.seed(42)
-
random.shuffle(imagePaths)
-
-
datas = []
-
category_labels = []
-
color_labels = []
-
for imagePath in imagePaths:
-
image = cv2.imread(imagePath)
-
if image is None:
-
print(imagePath)
-
continue
-
if image.shape[2]==4:
-
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
-
image = cv2.resize(image, img_size)
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-
image = img_to_array(image)
-
datas.append(image)
-
-
(color_label, category_label) = imagePath.split(os.path.sep)[-2].split("_")
-
category_labels.append(category_label)
-
color_labels.append(color_label)
-
-
# scale the raw pixel intensities to the range [0, 1]
-
datas = np.array(datas, dtype="float") / 255.0
-
category_labels = np.array(category_labels)
-
color_labels = np.array(color_labels)
-
return datas, category_labels, color_labels
-
-
# binarize the labels using scikit-learn's special multi-label
-
def binarize_multilabels_and_save(labels, path):
-
mlb = MultiLabelBinarizer()
-
labels = mlb.fit_transform(labels)
-
print(labels[:6])
-
print('labels shape:', labels.shape)
-
for (i, label) in enumerate(mlb.classes_):
-
print("{}. {}".format(i + 1, label))
-
with open(path, "wb") as f:
-
f.write(pickle.dumps(mlb))
-
return labels, len(mlb.classes_)
-
-
def binarize_labels_and_save(category_labels, color_labels, category_path, color_path):
-
category_lb = LabelBinarizer()
-
color_lb = LabelBinarizer()
-
category_labels = category_lb.fit_transform(category_labels)
-
color_labels = color_lb.fit_transform(color_labels)
-
-
# loop over each of the possible class labels and show them
-
for (i, label) in enumerate(category_lb.classes_):
-
print("category {}. {}".format(i + 1, label))
-
-
for (i, label) in enumerate(color_lb.classes_):
-
print("color {}. {}".format(i + 1, label))
-
-
with open(category_path, "wb") as f:
-
f.write(pickle.dumps(category_lb))
-
-
with open(color_path, "wb") as f:
-
f.write(pickle.dumps(color_lb))
-
return category_labels, color_labels, len(category_lb.classes_), len(color_lb.classes_)
-
-
# model_type='SimpleNet' 'SmallerInceptionNet'
-
def train_model(datas, labels, classes, finalAct='sigmoid', model_type='SimpleNet'):
-
EPOCHS = 20
-
INIT_LR = 1e-3
-
BATCH_SIZE = 32
-
INPUT_SHAPE = (96, 96, 3)
-
(trainX, testX, trainY, testY) = train_test_split(datas, labels, test_size=0.2, random_state=42)
-
if model_type == 'SimpleNet':
-
simpleNet = SimpleNet(INPUT_SHAPE, classes, finalAct)
-
model = simpleNet.build_model()
-
else:
-
smallerInceptionNet = SmallerInceptionNet()
-
model = smallerInceptionNet.build_model(INPUT_SHAPE, classes, finalAct)
-
-
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
-
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
-
-
history = model.fit(trainX, trainY, batch_size=BATCH_SIZE,
-
epochs=EPOCHS, verbose=1,
-
validation_data=(testX,testY))
-
-
model.save('trained_mode/' + '{}.h5'.format(model_type))
-
-
def train_fashionnet_model(datas, category_labels, color_labels, category_classes, color_classes, finalAct='softmaxt'):
-
EPOCHS = 30
-
INIT_LR = 1e-3
-
BATCH_SIZE = 32
-
INPUT_SHAPE = (96, 96, 3)
-
(trainX, testX, trainCategoryY, testCategoryY, trainColorY, testColorY) = train_test_split(datas, category_labels, color_labels, test_size=0.2, random_state=42)
-
-
fashionNet = FashionNet(INPUT_SHAPE, category_classes=category_classes,
-
color_classes=color_classes, finalAct=finalAct)
-
model = fashionNet.build_model()
-
losses = { 'category_output':'categorical_crossentropy', 'color_output':'categorical_crossentropy' }
-
loss_weights = {'category_output':1.0, 'color_output':1.0}
-
-
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
-
model.compile(optimizer=opt,loss=losses, loss_weights=loss_weights, metrics=["accuracy"])
-
-
history = model.fit(trainX, {'category_output': trainCategoryY, 'color_output':trainColorY},
-
batch_size=BATCH_SIZE, epochs=EPOCHS,
-
verbose=1,
-
validation_data=(testX, {'category_output': testCategoryY, 'color_output':testColorY}))
-
-
model.save('trained_mode/' + '{}.h5'.format('FashionNet'))
-
-
plot_fashionnet_loss_acc(history, EPOCHS)
-
-
def plot_loss_acc(history, EPOCHS):
-
plt.style.use("ggplot")
-
plt.figure()
-
N = EPOCHS
-
plt.plot(np.arange(0, N), history.history["loss"], label="train_loss")
-
plt.plot(np.arange(0, N), history.history["val_loss"], label="val_loss")
-
plt.plot(np.arange(0, N), history.history["acc"], label="train_acc")
-
plt.plot(np.arange(0, N), history.history["val_acc"], label="val_acc")
-
plt.title("Training Loss and Accuracy")
-
plt.xlabel("Epoch #")
-
plt.ylabel("Loss/Accuracy")
-
plt.legend(loc="upper left")
-
plt.savefig('plot_loss_acc.png')
-
-
def plot_fashionnet_loss_acc(history, EPOCHS):
-
loss_names = ['loss', 'category_output_loss', 'color_output_loss']
-
plt.style.use("ggplot")
-
(fig, ax) = plt.subplots(3, 1, figsize=(13, 13))
-
-
for (i, l) in enumerate(loss_names):
-
title = 'Loss for {}'.format(l) if l != 'loss' else 'Total loss'
-
ax[i].set_title(title)
-
ax[i].set_xlabel('Epoch #')
-
ax[i].set_ylabel('Loss')
-
ax[i].plot(np.arange(0, EPOCHS), history.history[l], label=l)
-
ax[i].plot(np.arange(0, EPOCHS), history.history["val_"+l], label="val_"+l)
-
ax[i].legend()
-
plt.savefig('plot_fashionnet_losses.png')
-
plt.close()
-
'''
-
accuray_names = ['category_output_acc', 'color_output_acc']
-
plt.style.use("ggplot")
-
(fig, ax) = plt.subplots(2, 1, figsize=(8, 8))
-
for (i, l) in enumerate(accuray_names):
-
title = 'Accuray for {}'.format(l)
-
ax[i].set_title(title)
-
ax[i].set_xlabel('Epoch #')
-
ax[i].set_ylabel('Accuray')
-
ax[i].plot(np.arange(0, EPOCHS), history.history[l], label=l)
-
ax[i].plot(np.arange(0, EPOCHS), history.history["val_"+l], label="val_"+l)
-
ax[i].legend()
-
plt.savefig('plot_fashionnet_accs.png')
-
plt.close()
-
'''
-
-
def main():
-
data_dir = './dataset'
-
img_size = (96, 96)
-
label_dir = './labels'
-
if not os.path.exists(label_dir):
-
os.mkdir(label_dir)
-
-
'''
-
datas, labels = load_data(data_dir, img_size)
-
labels, classes= binarize_multilabels_and_save(labels, os.path.join(label_dir, 'multi-label.pickle'))
-
train_model(datas, labels, classes, finalAct='sigmoid', model_type='SimpleNet')
-
-
'''
-
datas, category_labels, color_labels = load_data_multilabels(data_dir, img_size)
-
category_path = os.path.join(label_dir, 'category.pickle')
-
color_path = os.path.join(label_dir, 'color.pickle')
-
category_labels, color_labels, category_classes, color_classes = binarize_labels_and_save(category_labels, color_labels, category_path, color_path)
-
train_fashionnet_model(datas, category_labels, color_labels, category_classes, color_classes, finalAct='softmax')
-
-
if __name__ == '__main__':
-
main()
4、测试部分代码
-
# import the necessary packages
-
from keras.preprocessing.image import img_to_array
-
from keras.models import load_model
-
import numpy as np
-
import argparse
-
import imutils
-
import pickle
-
import cv2
-
import os
-
import tensorflow as tf
-
-
# load the image
-
# model_type = None, FashionNnet
-
def load_image(img_path, model_type=None):
-
image = cv2.imread(img_path)
-
output = imutils.resize(image, width=400)
-
if model_type == 'FashionNnet':
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-
# pre-process the image for classification
-
image = cv2.resize(image, (96, 96))
-
image = image.astype("float") / 255.0
-
image = img_to_array(image)
-
image = np.expand_dims(image, axis=0)
-
return image, output
-
-
def load_trained_model(img, model_path, labelbin_path):
-
label_lb = pickle.loads(open(labelbin_path, "rb").read())
-
model = load_model(model_path)
-
proba = model.predict(img)[0]
-
-
idxs = np.argsort(proba)[::-1][:2]
-
label_1 = label_lb.classes_[idxs[0]]
-
label_2 = label_lb.classes_[idxs[1]]
-
-
proba_1 = proba[idxs[0]]
-
proba_2 = proba[idxs[1]]
-
-
result = (label_1, proba_1, label_2, proba_2)
-
return result
-
-
-
-
# load the trained convolutional neural network
-
def load_trained_fashionnet_model(img, model_path, categorybin_path, colorbin_path):
-
category_lb = pickle.loads(open(categorybin_path, "rb").read())
-
color_lb = pickle.loads(open(colorbin_path, "rb").read())
-
-
model = load_model(model_path, custom_objects={'tf':tf})
-
color_proba) = model.predict(img)
-
-
category_idx = category_proba[0].argmax()
-
color_idx = color_proba[0].argmax()
-
category_label = category_lb.classes_[category_idx]
-
color_label = color_lb.classes_[color_idx]
-
-
category_proba = category_proba[0][category_idx]
-
color_proba = color_proba[0][color_idx]
-
result = (category_label, category_proba, color_label, color_proba)
-
return result
-
-
def show_result(img, result):
-
proba_1, label_2, proba_2) = result
-
text1 = "{}: {:.2f}%".format(label_1, proba_1*100)
-
text2 = "{}: {:.2f}%".format(label_2, proba_2*100)
-
-
text1, (10, 25),
-
0.7, (0, 255, 0), 2)
-
text2, (10, 55),
-
0.7, (0, 255, 0), 2)
-
-
# show the output image
-
img)
-
cv2.waitKey(2000)
-
cv2.destroyAllWindows()
-
-
def show_fashionnet_result(img, result):
-
category_proba, color_label, color_proba) = result
-
category_text = "category: {}: {:.2f}%".format(category_label, category_proba*100)
-
color_text = "color: {}: {:.2f}%".format(color_label, color_proba*100)
-
-
category_text, (10, 25),
-
0.7, (0, 255, 0), 2)
-
color_text, (10, 55),
-
0.7, (0, 255, 0), 2)
-
-
# show the output image
-
img)
-
cv2.waitKey(2000)
-
cv2.destroyAllWindows()
-
-
-
if __name__=='__main__':
-
test_dir = './examples'
-
#model_type = 'FashionNnet'
-
model_type = None
-
for img in os.listdir(test_dir):
-
img_path = os.path.join(test_dir, img)
-
if model_type == None:
-
load_image(img_path)
= -
model_path = 'trained_mode/SimpleNet.h5'
-
labelbin_path = './labels/multi-label.pickle'
-
result = load_trained_model(image, model_path, labelbin_path)
-
result)
-
elif model_type == 'FashionNnet':
-
output = load_image(img_path, model_type)
-
model_path = 'trained_mode/FashionNet.h5'
-
categorybin_path = './labels/category.pickle'
-
colorbin_path = './labels/color.pickle'
-
-
result = load_trained_fashionnet_model(image, model_path, categorybin_path, colorbin_path)
-
result)
-
5、数据和详细完整代码
代码地址:https://github.com/zhangwei147258/fashion_mutil_label_classifier_keras
数据地址:https://pan.baidu.com/s/11LoY2H5shADwiQwPuhB6ng 提取码:pg7d