Wider Face 数据集
Wider Face 数据集最早于2015年公开首个版本。该数据集挑选出了 32203 张图片并进行了人脸标注,总共标注了 393703 个人脸数据。其中,158989 个标注人脸位于训练集,39496 个位于验证集。每一个子集都包含3个级别的检测难度:Easy,Medium,Hard。
数据集对于每张人脸都附带有更加详细的信息,包扩blur(模糊程度), expression(表情), illumination(光照), occlusion(遮挡), pose(姿态)等信息。
在数据集中,根据事件场景的类型分为了61个类。接着根据每个类别按照40% / 10% / 50%的比例划分到训练集,验证集以及测试集中。如下图:
数据集结构
数据集包括训练集(Training Images),验证集(Validation Images)以及标注文件(Face annotations)。建议按照下图目录结构摆放。
标注文件解析
在标注文件中分.mat
和.txt
两个版本,随便用哪一个都可以。这里,我们以分析txt格式为例。
首先看下readme.txt
文件里的说明:
在说明文件中,给出了详细的标签格式说明:
- 第一行File name为图片的路径名称
- 第二行Number of bounding box为该图片中标注人脸的个数
- 接下来的Number of bounding box行信息为每个人脸的详细信息x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose
我们进一步看下每个人脸的详细信息x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose:
- 其中x1, y1, w, h代表人脸边界框的左上角x、y坐标,以及宽、高信息,注意这里是绝对坐标。
- blur代表人脸的模糊程度,0代表清晰,1代表有点模糊,2代表很模糊。
- expression代表表情,0代表正常的表情,1代表夸张的表情。
- illumination代表光照条件,0代表正常光照,1代表极端的光照条件。
- invalid这个参数其实有点迷,我通过绘制了一些invalid人脸图片发现,基本都是很小,很难分辨的人脸(不仔细看,看不出来的那种),个人觉得在使用时可以忽略掉invalid的人脸即为1的情况。
- occlusion代表人脸的遮挡程度,0代表没有遮挡,1代表部分遮挡(1%-30%),2代表严重遮挡(30%以上)。
- pose代表人脸的姿态,0代表典型姿态,1代表非典型姿态。论文中给出的解释Face is annotated as atypical under two conditions: either the roll or pitch degree is larger than 30-degree; or the yaw is larger than 90-degree.。不好理解的可以看下面图示标注的Atypical pose。
解析标签文件 Python 代码
在调用 parse_wider_txt
时,传入 data_root
指向 wider_face
的路径,split
表示要解析训练集还是验证集的标签文件(传入 train
或 val
)。
import os
from tqdm import tqdm
import cv2
from create_xml import create_pascal_voc_xml
def create_xml(labels: list, img_root: str, img_path: str, save_root: str) -> bool:
source_dict = {'database': 'The WIDERFACE2017 Database',
'annotation': 'WIDERFACE 2017',
'image': 'WIDERFACE'}
img_full_path = os.path.join(img_root, img_path)
if os.path.exists(img_full_path):
im = cv2.imread(img_full_path)
im_shape = im.shape
else:
print(f"Warning: {img_path} does not exist, can't read image shape.")
im_shape = (0, 0, 0)
ob_list = []
for ob in labels:
if ob[7] == '1':
# invalid face image, skip
continue
if int(ob[2]) <= 0 or int(ob[3]) <= 0:
print(f"Warning: find bbox w or h <= 0, in {img_path}, skip.")
continue
ob_dict = {'name': 'face',
'truncated': '0' if ob[8] == '0' else '1',
'difficult': '1' if ob[4] == '2' or ob[8] == '2' else '0',
'xmin': ob[0], 'ymin': ob[1],
'xmax': str(int(ob[0]) + int(ob[2])),
'ymax': str(int(ob[1]) + int(ob[3])),
'blur': ob[4], 'expression': ob[5],
'illumination': ob[6], 'invalid': ob[7],
'occlusion': ob[8], 'pose': ob[9]}
# if ob[7] == '1':
# cv2.rectangle(im, (int(ob_dict['xmin']), int(ob_dict['ymin'])),
# (int(ob_dict['xmax']), int(ob_dict['ymax'])),
# (0, 0, 255))
# cv2.imshow("s", im)
# cv2.waitKey(0)
ob_list.append(ob_dict)
if len(ob_list) == 0:
print(f"in {img_path}, no object, skip.")
return False
create_pascal_voc_xml(filename=img_path,
years="WIDERFACE2017",
source_dict=source_dict,
objects_list=ob_list,
im_shape=im_shape,
save_root=save_root)
return True
def parse_wider_txt(data_root: str, split: str, save_root: str):
"""
refer to: torchvision.dataset.widerface.py
:param data_root:
:param split:
:param save_root:
:return:
"""
assert split in ['train', 'val'], f"split must be in ['train', 'val'], got {split}"
if os.path.exists(save_root) is False:
os.makedirs(save_root)
txt_path = os.path.join(data_root, 'wider_face_split', f'wider_face_{split}_bbx_gt.txt')
img_root = os.path.join(data_root, f'WIDER_{split}', 'images')
with open(txt_path, "r") as f:
lines = f.readlines()
file_name_line, num_boxes_line, box_annotation_line = True, False, False
num_boxes, box_counter, idx = 0, 0, 0
labels = []
xml_list = []
progress_bar = tqdm(lines)
for line in progress_bar:
line = line.rstrip()
if file_name_line:
img_path = line
file_name_line = False
num_boxes_line = True
elif num_boxes_line:
num_boxes = int(line)
num_boxes_line = False
box_annotation_line = True
elif box_annotation_line:
box_counter += 1
line_split = line.split(" ")
line_values = [x for x in line_split]
labels.append(line_values)
if box_counter >= num_boxes:
box_annotation_line = False
file_name_line = True
if num_boxes == 0:
print(f"in {img_path}, no object, skip.")
else:
if create_xml(labels, img_root, img_path, save_root):
# 只记录有目标的xml文件
xml_list.append(img_path.split("/")[-1].split(".")[0])
box_counter = 0
labels.clear()
idx += 1
progress_bar.set_description(f"{idx} images")
else:
raise RuntimeError("Error parsing annotation file {}".format(txt_path))
with open(split+'.txt', 'w') as w:
w.write("\n".join(xml_list))
parse_wider_txt("/data/wider_face/",
"val",
"./annotation/")
如果想把标注文件转化为XML格式,代码如下:
import copy
import os
from xml.dom import minidom as dom
class XMLGenerator(object):
def __init__(self, xml_name: str):
self.doc = dom.Document()
self.xml_name = xml_name
def create_append_node(self, node_name, root_node=None):
"""创建一个新node并将node添加到root_node下"""
new_node = self.doc.createElement(node_name)
if root_node is not None:
root_node.appendChild(new_node)
else:
self.doc.appendChild(new_node)
return new_node
def create_text_node(self, node_name, node_value, root_node):
"""
创建一个新node,然后在该node中添加一个text_node,
最后将node添加到root_node下
"""
new_node = self.doc.createElement(node_name)
node_data = self.doc.createTextNode(node_value)
new_node.appendChild(node_data)
root_node.appendChild(new_node)
def create_object_node(self, info_dict: dict = None, root_node: str = None):
if (info_dict is None) or (root_node is None):
return
object_node = self.create_append_node('object', root_node)
box_node = self.create_append_node('bndbox', object_node)
self.create_text_node("xmin", info_dict.pop("xmin"), box_node)
self.create_text_node("ymin", info_dict.pop("ymin"), box_node)
self.create_text_node("xmax", info_dict.pop("xmax"), box_node)
self.create_text_node("ymax", info_dict.pop("ymax"), box_node)
for k, v in info_dict.items():
self.create_text_node(k, v, object_node)
def save_xml(self):
f = open(self.xml_name, "w")
self.doc.writexml(f, addindent="\t", newl="\n")
f.close()
def create_pascal_voc_xml(filename: str = None,
years: str = 'VOC2012',
source_dict: dict = None,
objects_list: list = None,
im_shape: tuple = None,
save_root: str = os.getcwd(),
cover: bool = False):
if not (filename and source_dict and objects_list and im_shape):
return
# 0--Parade/0_Parade_marchingband_1_849.jpg -> 0_Parade_marchingband_1_849.xml
xml_name = filename.split(os.sep)[-1].split(".")[0] + '.xml'
xml_full_path = os.path.join(save_root, xml_name)
if os.path.exists(xml_full_path) and (cover is False):
print(f"{xml_full_path} already exist, skip.")
return
xml_generator = XMLGenerator(xml_full_path)
# xml root node
node_root = xml_generator.create_append_node('annotation')
xml_generator.create_text_node(node_name='folder', node_value=years, root_node=node_root)
xml_generator.create_text_node(node_name='filename', node_value=filename, root_node=node_root)
# source
node_source = xml_generator.create_append_node('source', root_node=node_root)
xml_generator.create_text_node(node_name='database', node_value=source_dict['database'], root_node=node_source)
xml_generator.create_text_node(node_name='annotation', node_value=source_dict['annotation'], root_node=node_source)
xml_generator.create_text_node(node_name='image', node_value=source_dict['image'], root_node=node_source)
# size
node_size = xml_generator.create_append_node('size', root_node=node_root)
xml_generator.create_text_node(node_name='height', node_value=str(im_shape[0]), root_node=node_size)
xml_generator.create_text_node(node_name='width', node_value=str(im_shape[1]), root_node=node_size)
xml_generator.create_text_node(node_name='depth', node_value=str(im_shape[2]), root_node=node_size)
# segmented
xml_generator.create_text_node(node_name='segmented', node_value='0', root_node=node_root)
# object
for i, ob in enumerate(objects_list):
xml_generator.create_object_node(info_dict=ob, root_node=node_root)
# XML write
xml_generator.save_xml()
def create_xml_test():
objects = []
ob = {'name': 'person', 'pose': 'Unspecified', 'truncated': '0', 'difficult': '0',
'xmin': '174', 'ymin': '101', 'xmax': '349', 'ymax': '351'}
objects.append(ob)
objects.append(copy.deepcopy(ob))
years = 'VOC2012'
filename = 'test.jpg'
source_dict = {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr'}
im_width = '500'
im_height = '700'
im_depth = '3'
im_shape = (im_width, im_height, im_depth)
create_pascal_voc_xml(filename=filename, years=years,
source_dict=source_dict, objects_list=objects,
im_shape=im_shape)
转换为XML后样本如下:
Wider Face数据集下载地址: