制作自己的voc数据集,voc数据集和coco数据集区别

到目前为止进行的所有目标检查项目都使用voc数据集形式，为了与其他算法进行比较，也需要在coco数据集上进行测试。本文来自https://blog.csdn.net/c 2250645962/article/details/105408547 /

将voc数据集格式转换为coco数据集格式。

1. voc数据集和coco数据集的目录结构datasets ((coco (annotations (train 2017 ) () ) () ) ) ) )

annotations目录的内容如下：

首先参照coco数据集的目录结构创建文件夹

其中，train2017、test2017、val2017文件夹存储用于训练、测试、验证的图像，annotations文件夹存储与这些图像对应的标记信息，分别存储实例

2 .转换json文件中的每个字段的含义如下：

3359 blog.csdn.net/c 2250645962/article/details/105367693

打开代码，存储在python中，放在图像位置，这里是用0.8、0.1、0.1分隔的三个数据集。从xml文件生成。

# coding : utf-8 # pipinstallxmlimportosimportglobimportjsonimportshutilimportnumpyasnpimportxml.etree.elementretreaseases 名称(: return root.find all ) name ) defget_and_check ) root，name， length ) :vars=root.findall(name ) if len (vars )==0: raisenotimplementederror (cannot find % s.' %=length sedtobe%d，butis%d.'% ) name，Length，len(vars ) ) if length==1: vars=vars [0] returnvarsdefconvert () json_file ) : JSON _ dict=' license ' : [ ' none ' ]、' images': []、' annotations': []， ' categories ' : [ ] } categories=pre _ define _ categories.copy (bnd _ id=start _ bounding _ box _ idall _ copy ) print('processing%s'% ) line ) ) XML _ f=linetree=et.parse (XML _ f ) ) ) filename=OS.path . size=get _ and _ check (根，' size '，1 ) width=int ) get_and_check(size，' width '，1 ).text ) height ' height': height，' width': width，' id ' : image _ id } JSON _ dict [ ' images ' ].append (image ) # cruru

t # assert segmented == '0' for obj in get(root, 'object'): category = get_and_check(obj, 'name', 1).text if category in all_categories: all_categories[category] += 1 else: all_categories[category] = 1 if category not in categories: if only_care_pre_define_categories: continue new_id = len(categories) + 1 print("[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(category, pre_define_categories, new_id)) categories[category] = new_id category_id = categories[category] bndbox = get_and_check(obj, 'bndbox', 1) xmin = int(float(get_and_check(bndbox, 'xmin', 1).text)) ymin = int(float(get_and_check(bndbox, 'ymin', 1).text)) xmax = int(float(get_and_check(bndbox, 'xmax', 1).text)) ymax = int(float(get_and_check(bndbox, 'ymax', 1).text)) assert(xmax > xmin), "xmax <= xmin, {}".format(line) assert(ymax > ymin), "ymax <= ymin, {}".format(line) o_width = abs(xmax - xmin) o_height = abs(ymax - ymin) ann = {'area': o_width*o_height, 'iscrowd': 0, 'image_id': image_id, 'bbox':[xmin, ymin, o_width, o_height], 'category_id': category_id, 'id': bnd_id, 'ignore': 0, 'segmentation': []} json_dict['annotations'].append(ann) bnd_id = bnd_id + 1 for cate, cid in categories.items(): cat = {'supercategory': 'none', 'id': cid, 'name': cate} json_dict['categories'].append(cat) json_fp = open(json_file, 'w') json_str = json.dumps(json_dict) json_fp.write(json_str) json_fp.close() print("------------create {} done--------------".format(json_file)) print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories), all_categories.keys(), len(pre_define_categories), pre_define_categories.keys())) print("category: id --> {}".format(categories)) print(categories.keys()) print(categories.values()) if __name__ == '__main__': # xml标注文件夹 xml_dir = './Annotations' # 训练数据的josn文件 save_json_train = './train.json' # 验证数据的josn文件 save_json_val = './val.json' # 验证数据的test文件 save_json_test = './test.json' # 类别，这里只有dog一个类别，如果是多个类别，往classes中添加类别名字即可，比如['dog', 'person', 'cat'] classes = ['hel','sub','ill'] pre_define_categories = {} for i, cls in enumerate(classes): pre_define_categories[cls] = i + 1 only_care_pre_define_categories = True # 训练数据集比例 train_ratio = 0.8 val_ratio = 0.1 print('xml_dir is {}'.format(xml_dir)) xml_list = glob.glob(xml_dir + "/*.xml") xml_list = np.sort(xml_list)# print('xml_list is {}'.format(xml_list)) np.random.seed(100) np.random.shuffle(xml_list) train_num = int(len(xml_list)*train_ratio) val_num = int(len(xml_list)*val_ratio) print('训练样本数目是 {}'.format(train_num)) print('验证样本数目是 {}'.format(val_num)) print('测试样本数目是 {}'.format(len(xml_list) - train_num - val_num)) xml_list_val = xml_list[:val_num] xml_list_train = xml_list[val_num:train_num+val_num] xml_list_test = xml_list[train_num+val_num:] # 对训练数据集对应的xml进行coco转换 convert(xml_list_train, save_json_train) # 对验证数据集的xml进行coco转换 convert(xml_list_val, save_json_val) # 对测试数据集的xml进行coco转换 convert(xml_list_test, save_json_test)