[Feature] Add tool for converting labelme data to coco format#2041
[Feature] Add tool for converting labelme data to coco format#2041lllrj wants to merge 1 commit intoopen-mmlab:dev-1.xfrom
Conversation
Codecov ReportPatch and project coverage have no change.
Additional details and impacted files@@ Coverage Diff @@
## dev-1.x #2041 +/- ##
========================================
Coverage 82.15% 82.16%
========================================
Files 224 224
Lines 13362 13362
Branches 2265 2265
========================================
+ Hits 10978 10979 +1
+ Misses 1873 1872 -1
Partials 511 511
Flags with carried forward coverage won't be shown. Click here to find out more. see 1 file with indirect coverage changes Help us with your feedback. Take ten seconds to tell us how you rate us. Have a feature suggestion? Share it here. ☔ View full report in Codecov by Sentry. |
| from sklearn.model_selection import train_test_split | ||
| from tqdm import tqdm | ||
|
|
||
| # import sys |
There was a problem hiding this comment.
please remove unnecessary codes.
There was a problem hiding this comment.
sorry,I think some commented code might be useful so kept it.
| category['id'] = id | ||
| category['name'] = name | ||
| # n个关键点数据 | ||
| category['keypoint'] = [ |
There was a problem hiding this comment.
Should be another input instead of hard-coded.
| annotation['image_id'] = self.img_id | ||
| annotation['category_id'] = int(self.classname_to_id[label]) | ||
| annotation['iscrowd'] = 0 | ||
| annotation['area'] = 1.0 |
There was a problem hiding this comment.
area should be calculated, not 1.0
There was a problem hiding this comment.
oh I forgot to modify this code when I found it,thank you!
| ..., | ||
| ], | ||
| """ | ||
| labels = ['wrist', 'thumb1', 'thumb2', ...] |
There was a problem hiding this comment.
Should not be hard-coded.
| - class_name 根类名字 | ||
| """ | ||
|
|
||
| self.classname_to_id = {args.class_name: 1} |
There was a problem hiding this comment.
Is it possible to have multiple classes?
LareinaM
left a comment
There was a problem hiding this comment.
Re-formatted the codes and add a corresponding document.
| # Copyright (c) OpenMMLab. All rights reserved. | ||
| import argparse | ||
| import glob | ||
| import json | ||
| import os | ||
| import shutil | ||
|
|
||
| import numpy as np | ||
| from sklearn.model_selection import train_test_split | ||
| from tqdm import tqdm | ||
|
|
||
| # import sys | ||
|
|
||
|
|
||
| class Labelme2coco_keypoints(): | ||
|
|
||
| def __init__(self, args): | ||
| """Lableme 关键点数据集转 COCO 数据集的构造函数: | ||
|
|
||
| Args | ||
| args:命令行输入的参数 | ||
| - class_name 根类名字 | ||
| """ | ||
|
|
||
| self.classname_to_id = {args.class_name: 1} | ||
| self.images = [] | ||
| self.annotations = [] | ||
| self.categories = [] | ||
| self.ann_id = 0 | ||
| self.img_id = 0 | ||
|
|
||
| def save_coco_json(self, instance, save_path): | ||
| json.dump( | ||
| instance, | ||
| open(save_path, 'w', encoding='utf-8'), | ||
| ensure_ascii=False, | ||
| indent=1) | ||
|
|
||
| def read_jsonfile(self, path): | ||
| with open(path, 'r', encoding='utf-8') as f: | ||
| return json.load(f) | ||
|
|
||
| def _get_box(self, points): | ||
| min_x = min_y = np.inf | ||
| max_x = max_y = 0 | ||
| for x, y in points: | ||
| min_x = min(min_x, x) | ||
| min_y = min(min_y, y) | ||
| max_x = max(max_x, x) | ||
| max_y = max(max_y, y) | ||
| return [min_x, min_y, max_x - min_x, max_y - min_y] | ||
|
|
||
| def _get_keypoints(self, points, keypoints, num_keypoints, label): | ||
| """解析 labelme 的原始数据, 生成 coco 标注的 关键点对象. | ||
|
|
||
| 例如: | ||
| "keypoints": [ | ||
| 67.06149888292556, # x 的值 | ||
| 122.5043507571318, # y 的值 | ||
| 1, # 相当于 Z 值,2D关键点 v = 0表示不可见, | ||
| v = 1表示标记但不可见,v = 2表示标记且可见 | ||
| 82.42582269256718, | ||
| 109.95672933232304, | ||
| 1, | ||
| ..., | ||
| ], | ||
| """ | ||
| labels = ['wrist', 'thumb1', 'thumb2', ...] | ||
| flag = label.split('_')[-1] | ||
| x = label.split('_')[0] | ||
| visible = 0 | ||
| if flag == 'occluded': | ||
| visible = 1 | ||
| else: | ||
| visible = 2 | ||
| x = labels.index(x) | ||
| keypoints[x * 3] = points[0] | ||
| keypoints[x * 3 + 1] = points[1] | ||
| keypoints[x * 3 + 2] = visible | ||
| num_keypoints += 1 | ||
|
|
||
| return num_keypoints | ||
|
|
||
| def _image(self, obj, path): | ||
| """解析 labelme 的 obj 对象,生成 coco 的 image 对象. | ||
|
|
||
| 生成包括:id,file_name,height,width 4个属性 | ||
|
|
||
| 示例: | ||
| { | ||
| "file_name": "training/rgb/00031426.jpg", | ||
| "height": 224, | ||
| "width": 224, | ||
| "id": 31426 | ||
| } | ||
| """ | ||
|
|
||
| image = {} | ||
|
|
||
| # 此处通过imageData获得数据 | ||
| # 获得原始 labelme 标签的 imageData 属性,并通过 labelme 的工具方法转成 array | ||
| # img_x = utils.img_b64_to_arr(obj['imageData']) | ||
| # image['height'], image['width'] = img_x.shape[:-1] # 获得图片的宽高 | ||
|
|
||
| # 此处直接通过imageHeight,imageWidth得到,避免labelme中的imageData问题 | ||
| image['height'], image['width'] = obj['imageHeight'], obj[ | ||
| 'imageWidth'] # 获得图片的宽高 | ||
| # self.img_id = int(os.path.basename(path).split(".json")[0]) | ||
| self.img_id = self.img_id + 1 | ||
| image['id'] = self.img_id | ||
|
|
||
| image['file_name'] = os.path.basename(path).replace('.json', '.jpg') | ||
|
|
||
| return image | ||
|
|
||
| def _annotation(self, bboxes_list, keypoints_list, json_path): | ||
| """生成coco标注. | ||
|
|
||
| Args: bboxes_list: 矩形标注框 keypoints_list: 关键点 json_path:json文件路径 | ||
| """ | ||
| # 核对一个bbox里有n个keypoints; 然而本人不要求每个bbox里都要有n个点 | ||
| # if len(keypoints_list) != args.join_num * len(bboxes_list): | ||
| # print( | ||
| # 'you loss {} keypoint(s) with file {}'\ | ||
| # .format(args.join_num * len(bboxes_list) -\ | ||
| # len(keypoints_list), json_path) | ||
| # ) | ||
| # print('Please check !!!') | ||
| # sys.exit() | ||
|
|
||
| i = 0 | ||
| # 对每个bbox分别保存keypoints | ||
| for object in bboxes_list: | ||
| annotation = {} | ||
| keypoints = [0 for i in range(36) | ||
| ] # 每个keypoint数组初始化为[0,..] len = 36 对应12个点(x,y,v) | ||
| num_keypoints = 0 | ||
|
|
||
| label = object['label'] | ||
| bbox = object['points'] | ||
| annotation['id'] = self.ann_id | ||
| annotation['image_id'] = self.img_id | ||
| annotation['category_id'] = int(self.classname_to_id[label]) | ||
| annotation['iscrowd'] = 0 | ||
| annotation['area'] = 1.0 | ||
| annotation['segmentation'] = [np.asarray(bbox).flatten().tolist() | ||
| ] # 两个坐标点 | ||
| annotation['bbox'] = self._get_box(bbox) # 矩形框左上角的坐标和矩形框的长宽 | ||
|
|
||
| # 生成keypoint的list | ||
| for keypoint in keypoints_list: | ||
| point = keypoint['points'] | ||
| label = keypoint['label'] # 点的名字 | ||
| num_keypoints = self._get_keypoints(point[0], keypoints, | ||
| num_keypoints, label) | ||
| annotation['keypoints'] = keypoints | ||
| annotation['num_keypoints'] = num_keypoints | ||
|
|
||
| i += 1 | ||
| self.ann_id += 1 | ||
| self.annotations.append(annotation) | ||
|
|
||
| def _init_categories(self): | ||
| """初始化 COCO 的 标注类别. | ||
|
|
||
| 例如: | ||
| "categories": [ | ||
| { | ||
| "supercategory": "hand", | ||
| "id": 1, | ||
| "name": "hand", | ||
| "keypoints": [ | ||
| "wrist", | ||
| "thumb1", | ||
| "thumb2", | ||
| ..., | ||
| ], | ||
| "skeleton": [ | ||
| ] | ||
| } | ||
| ] | ||
| """ | ||
|
|
||
| for name, id in self.classname_to_id.items(): | ||
| category = {} | ||
|
|
||
| category['supercategory'] = name | ||
| category['id'] = id | ||
| category['name'] = name | ||
| # n个关键点数据 | ||
| category['keypoint'] = [ | ||
| 'wrist', | ||
| 'thumb1', | ||
| 'thumb2', | ||
| ..., | ||
| ] | ||
| # category['keypoint'] = [str(i + 1) for i in range(args.join_num)] | ||
|
|
||
| self.categories.append(category) | ||
|
|
||
| def to_coco(self, json_path_list): | ||
| """Labelme 原始标签转换成 coco 数据集格式,生成的包括标签和图像. | ||
|
|
||
| Args: json_path_list:原始数据集的目录 | ||
| """ | ||
|
|
||
| self._init_categories() | ||
| # 整个文件夹里的json进行逐个处理 | ||
| for json_path in tqdm(json_path_list): | ||
| obj = self.read_jsonfile(json_path) # 解析一个标注文件 | ||
| self.images.append(self._image(obj, json_path)) # 解析图片 | ||
| shapes = obj['shapes'] # 读取 labelme shape 标注 | ||
|
|
||
| bboxes_list, keypoints_list = [], [] | ||
| for shape in shapes: | ||
| if shape['shape_type'] == 'rectangle': # bboxs | ||
| bboxes_list.append(shape) | ||
| elif shape['shape_type'] == 'point': # keypoints | ||
| keypoints_list.append(shape) | ||
| # 输入为一个文件的keypoints和bbox,即一张图里的信息 | ||
| self._annotation(bboxes_list, keypoints_list, json_path) | ||
|
|
||
| keypoints = {} | ||
| keypoints['info'] = { | ||
| 'description': 'Air Dataset', | ||
| 'version': 1.0, | ||
| 'year': 2022 | ||
| } | ||
| keypoints['license'] = ['BUAA'] | ||
| keypoints['images'] = self.images | ||
| keypoints['annotations'] = self.annotations | ||
| keypoints['categories'] = self.categories | ||
| return keypoints | ||
|
|
||
|
|
||
| def init_dir(base_path): | ||
| """初始化COCO数据集的文件夹结构; | ||
|
|
||
| coco - annotations #标注文件路径 | ||
| - train #训练数据集 | ||
| - val #验证数据集 | ||
| Args: | ||
| base_path:数据集放置的根路径 | ||
| """ | ||
| if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')): | ||
| os.makedirs(os.path.join(base_path, 'coco', 'annotations')) | ||
| if not os.path.exists(os.path.join(base_path, 'coco', 'train')): | ||
| os.makedirs(os.path.join(base_path, 'coco', 'train')) | ||
| if not os.path.exists(os.path.join(base_path, 'coco', 'val')): | ||
| os.makedirs(os.path.join(base_path, 'coco', 'val')) | ||
|
|
||
|
|
||
| def convert(path, target): | ||
| parser = argparse.ArgumentParser() | ||
| parser.add_argument( | ||
| '--class_name', '--n', help='class name', type=str, default='airplane') | ||
| parser.add_argument( | ||
| '--input', | ||
| '--i', | ||
| help='json file path (labelme)', | ||
| type=str, | ||
| default=path) | ||
| parser.add_argument( | ||
| '--output', | ||
| '--o', | ||
| help='output file path (coco format)', | ||
| type=str, | ||
| default=path) | ||
| parser.add_argument( | ||
| '--join_num', '--j', help='number of join', type=int, default=12) | ||
| parser.add_argument( | ||
| '--ratio', | ||
| '--r', | ||
| help='train and test split ratio', | ||
| type=float, | ||
| default=0.25) | ||
| args = parser.parse_args() | ||
|
|
||
| labelme_path = args.input | ||
| saved_coco_path = args.output | ||
|
|
||
| init_dir(saved_coco_path) # 初始化COCO数据集的文件夹结构 | ||
|
|
||
| json_list_path = glob.glob(labelme_path + '/*.json') | ||
| train_path, val_path = train_test_split( | ||
| json_list_path, test_size=args.ratio) | ||
| print('{} for training'.format(len(train_path)), | ||
| '\n{} for testing'.format(len(val_path))) | ||
| print('Start transform please wait ...') | ||
|
|
||
| l2c_train = Labelme2coco_keypoints(args) # 构造数据集生成类 | ||
|
|
||
| # 生成训练集 | ||
| train_keypoints = l2c_train.to_coco(train_path) | ||
| l2c_train.save_coco_json( | ||
| train_keypoints, | ||
| os.path.join(saved_coco_path, 'coco', 'annotations', | ||
| 'keypoints_train.json')) | ||
|
|
||
| # 生成验证集 | ||
| l2c_val = Labelme2coco_keypoints(args) | ||
| val_instance = l2c_val.to_coco(val_path) | ||
| l2c_val.save_coco_json( | ||
| val_instance, | ||
| os.path.join(saved_coco_path, 'coco', 'annotations', | ||
| 'keypoints_val.json')) | ||
|
|
||
| # 拷贝 labelme 的原始图片到训练集和验证集里面 | ||
| for file in train_path: | ||
| shutil.copy( | ||
| file.replace('json', 'jpg'), | ||
| os.path.join(saved_coco_path, 'coco', 'train')) | ||
| for file in val_path: | ||
| shutil.copy( | ||
| file.replace('json', 'jpg'), | ||
| os.path.join(saved_coco_path, 'coco', 'val')) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| source = 'your labelme path' | ||
| target = 'your coco path' | ||
| convert(source, target) |
There was a problem hiding this comment.
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import argparse | |
| import glob | |
| import json | |
| import os | |
| import shutil | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from tqdm import tqdm | |
| # import sys | |
| class Labelme2coco_keypoints(): | |
| def __init__(self, args): | |
| """Lableme 关键点数据集转 COCO 数据集的构造函数: | |
| Args | |
| args:命令行输入的参数 | |
| - class_name 根类名字 | |
| """ | |
| self.classname_to_id = {args.class_name: 1} | |
| self.images = [] | |
| self.annotations = [] | |
| self.categories = [] | |
| self.ann_id = 0 | |
| self.img_id = 0 | |
| def save_coco_json(self, instance, save_path): | |
| json.dump( | |
| instance, | |
| open(save_path, 'w', encoding='utf-8'), | |
| ensure_ascii=False, | |
| indent=1) | |
| def read_jsonfile(self, path): | |
| with open(path, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| def _get_box(self, points): | |
| min_x = min_y = np.inf | |
| max_x = max_y = 0 | |
| for x, y in points: | |
| min_x = min(min_x, x) | |
| min_y = min(min_y, y) | |
| max_x = max(max_x, x) | |
| max_y = max(max_y, y) | |
| return [min_x, min_y, max_x - min_x, max_y - min_y] | |
| def _get_keypoints(self, points, keypoints, num_keypoints, label): | |
| """解析 labelme 的原始数据, 生成 coco 标注的 关键点对象. | |
| 例如: | |
| "keypoints": [ | |
| 67.06149888292556, # x 的值 | |
| 122.5043507571318, # y 的值 | |
| 1, # 相当于 Z 值,2D关键点 v = 0表示不可见, | |
| v = 1表示标记但不可见,v = 2表示标记且可见 | |
| 82.42582269256718, | |
| 109.95672933232304, | |
| 1, | |
| ..., | |
| ], | |
| """ | |
| labels = ['wrist', 'thumb1', 'thumb2', ...] | |
| flag = label.split('_')[-1] | |
| x = label.split('_')[0] | |
| visible = 0 | |
| if flag == 'occluded': | |
| visible = 1 | |
| else: | |
| visible = 2 | |
| x = labels.index(x) | |
| keypoints[x * 3] = points[0] | |
| keypoints[x * 3 + 1] = points[1] | |
| keypoints[x * 3 + 2] = visible | |
| num_keypoints += 1 | |
| return num_keypoints | |
| def _image(self, obj, path): | |
| """解析 labelme 的 obj 对象,生成 coco 的 image 对象. | |
| 生成包括:id,file_name,height,width 4个属性 | |
| 示例: | |
| { | |
| "file_name": "training/rgb/00031426.jpg", | |
| "height": 224, | |
| "width": 224, | |
| "id": 31426 | |
| } | |
| """ | |
| image = {} | |
| # 此处通过imageData获得数据 | |
| # 获得原始 labelme 标签的 imageData 属性,并通过 labelme 的工具方法转成 array | |
| # img_x = utils.img_b64_to_arr(obj['imageData']) | |
| # image['height'], image['width'] = img_x.shape[:-1] # 获得图片的宽高 | |
| # 此处直接通过imageHeight,imageWidth得到,避免labelme中的imageData问题 | |
| image['height'], image['width'] = obj['imageHeight'], obj[ | |
| 'imageWidth'] # 获得图片的宽高 | |
| # self.img_id = int(os.path.basename(path).split(".json")[0]) | |
| self.img_id = self.img_id + 1 | |
| image['id'] = self.img_id | |
| image['file_name'] = os.path.basename(path).replace('.json', '.jpg') | |
| return image | |
| def _annotation(self, bboxes_list, keypoints_list, json_path): | |
| """生成coco标注. | |
| Args: bboxes_list: 矩形标注框 keypoints_list: 关键点 json_path:json文件路径 | |
| """ | |
| # 核对一个bbox里有n个keypoints; 然而本人不要求每个bbox里都要有n个点 | |
| # if len(keypoints_list) != args.join_num * len(bboxes_list): | |
| # print( | |
| # 'you loss {} keypoint(s) with file {}'\ | |
| # .format(args.join_num * len(bboxes_list) -\ | |
| # len(keypoints_list), json_path) | |
| # ) | |
| # print('Please check !!!') | |
| # sys.exit() | |
| i = 0 | |
| # 对每个bbox分别保存keypoints | |
| for object in bboxes_list: | |
| annotation = {} | |
| keypoints = [0 for i in range(36) | |
| ] # 每个keypoint数组初始化为[0,..] len = 36 对应12个点(x,y,v) | |
| num_keypoints = 0 | |
| label = object['label'] | |
| bbox = object['points'] | |
| annotation['id'] = self.ann_id | |
| annotation['image_id'] = self.img_id | |
| annotation['category_id'] = int(self.classname_to_id[label]) | |
| annotation['iscrowd'] = 0 | |
| annotation['area'] = 1.0 | |
| annotation['segmentation'] = [np.asarray(bbox).flatten().tolist() | |
| ] # 两个坐标点 | |
| annotation['bbox'] = self._get_box(bbox) # 矩形框左上角的坐标和矩形框的长宽 | |
| # 生成keypoint的list | |
| for keypoint in keypoints_list: | |
| point = keypoint['points'] | |
| label = keypoint['label'] # 点的名字 | |
| num_keypoints = self._get_keypoints(point[0], keypoints, | |
| num_keypoints, label) | |
| annotation['keypoints'] = keypoints | |
| annotation['num_keypoints'] = num_keypoints | |
| i += 1 | |
| self.ann_id += 1 | |
| self.annotations.append(annotation) | |
| def _init_categories(self): | |
| """初始化 COCO 的 标注类别. | |
| 例如: | |
| "categories": [ | |
| { | |
| "supercategory": "hand", | |
| "id": 1, | |
| "name": "hand", | |
| "keypoints": [ | |
| "wrist", | |
| "thumb1", | |
| "thumb2", | |
| ..., | |
| ], | |
| "skeleton": [ | |
| ] | |
| } | |
| ] | |
| """ | |
| for name, id in self.classname_to_id.items(): | |
| category = {} | |
| category['supercategory'] = name | |
| category['id'] = id | |
| category['name'] = name | |
| # n个关键点数据 | |
| category['keypoint'] = [ | |
| 'wrist', | |
| 'thumb1', | |
| 'thumb2', | |
| ..., | |
| ] | |
| # category['keypoint'] = [str(i + 1) for i in range(args.join_num)] | |
| self.categories.append(category) | |
| def to_coco(self, json_path_list): | |
| """Labelme 原始标签转换成 coco 数据集格式,生成的包括标签和图像. | |
| Args: json_path_list:原始数据集的目录 | |
| """ | |
| self._init_categories() | |
| # 整个文件夹里的json进行逐个处理 | |
| for json_path in tqdm(json_path_list): | |
| obj = self.read_jsonfile(json_path) # 解析一个标注文件 | |
| self.images.append(self._image(obj, json_path)) # 解析图片 | |
| shapes = obj['shapes'] # 读取 labelme shape 标注 | |
| bboxes_list, keypoints_list = [], [] | |
| for shape in shapes: | |
| if shape['shape_type'] == 'rectangle': # bboxs | |
| bboxes_list.append(shape) | |
| elif shape['shape_type'] == 'point': # keypoints | |
| keypoints_list.append(shape) | |
| # 输入为一个文件的keypoints和bbox,即一张图里的信息 | |
| self._annotation(bboxes_list, keypoints_list, json_path) | |
| keypoints = {} | |
| keypoints['info'] = { | |
| 'description': 'Air Dataset', | |
| 'version': 1.0, | |
| 'year': 2022 | |
| } | |
| keypoints['license'] = ['BUAA'] | |
| keypoints['images'] = self.images | |
| keypoints['annotations'] = self.annotations | |
| keypoints['categories'] = self.categories | |
| return keypoints | |
| def init_dir(base_path): | |
| """初始化COCO数据集的文件夹结构; | |
| coco - annotations #标注文件路径 | |
| - train #训练数据集 | |
| - val #验证数据集 | |
| Args: | |
| base_path:数据集放置的根路径 | |
| """ | |
| if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')): | |
| os.makedirs(os.path.join(base_path, 'coco', 'annotations')) | |
| if not os.path.exists(os.path.join(base_path, 'coco', 'train')): | |
| os.makedirs(os.path.join(base_path, 'coco', 'train')) | |
| if not os.path.exists(os.path.join(base_path, 'coco', 'val')): | |
| os.makedirs(os.path.join(base_path, 'coco', 'val')) | |
| def convert(path, target): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| '--class_name', '--n', help='class name', type=str, default='airplane') | |
| parser.add_argument( | |
| '--input', | |
| '--i', | |
| help='json file path (labelme)', | |
| type=str, | |
| default=path) | |
| parser.add_argument( | |
| '--output', | |
| '--o', | |
| help='output file path (coco format)', | |
| type=str, | |
| default=path) | |
| parser.add_argument( | |
| '--join_num', '--j', help='number of join', type=int, default=12) | |
| parser.add_argument( | |
| '--ratio', | |
| '--r', | |
| help='train and test split ratio', | |
| type=float, | |
| default=0.25) | |
| args = parser.parse_args() | |
| labelme_path = args.input | |
| saved_coco_path = args.output | |
| init_dir(saved_coco_path) # 初始化COCO数据集的文件夹结构 | |
| json_list_path = glob.glob(labelme_path + '/*.json') | |
| train_path, val_path = train_test_split( | |
| json_list_path, test_size=args.ratio) | |
| print('{} for training'.format(len(train_path)), | |
| '\n{} for testing'.format(len(val_path))) | |
| print('Start transform please wait ...') | |
| l2c_train = Labelme2coco_keypoints(args) # 构造数据集生成类 | |
| # 生成训练集 | |
| train_keypoints = l2c_train.to_coco(train_path) | |
| l2c_train.save_coco_json( | |
| train_keypoints, | |
| os.path.join(saved_coco_path, 'coco', 'annotations', | |
| 'keypoints_train.json')) | |
| # 生成验证集 | |
| l2c_val = Labelme2coco_keypoints(args) | |
| val_instance = l2c_val.to_coco(val_path) | |
| l2c_val.save_coco_json( | |
| val_instance, | |
| os.path.join(saved_coco_path, 'coco', 'annotations', | |
| 'keypoints_val.json')) | |
| # 拷贝 labelme 的原始图片到训练集和验证集里面 | |
| for file in train_path: | |
| shutil.copy( | |
| file.replace('json', 'jpg'), | |
| os.path.join(saved_coco_path, 'coco', 'train')) | |
| for file in val_path: | |
| shutil.copy( | |
| file.replace('json', 'jpg'), | |
| os.path.join(saved_coco_path, 'coco', 'val')) | |
| if __name__ == '__main__': | |
| source = 'your labelme path' | |
| target = 'your coco path' | |
| convert(source, target) | |
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import argparse | |
| import collections | |
| import datetime | |
| import glob | |
| import json | |
| import os | |
| import shutil | |
| import uuid | |
| import numpy as np | |
| import PIL.Image | |
| import PIL.ImageDraw | |
| from mmengine import Config | |
| from sklearn.model_selection import train_test_split | |
| from tqdm import tqdm | |
| try: | |
| import pycocotools.mask | |
| except ImportError: | |
| import sys | |
| print('Please install pycocotools:\n\n pip install pycocotools\n') | |
| sys.exit(1) | |
| class Labelme2coco_keypoints(): | |
| """Convert Labelme annotated keypoints into COCO format. | |
| Args: | |
| category_names (list): A list of category names in this dataset. | |
| keypoints_names (list): A list of keypoint names in this dataset. | |
| skeleton (list): A list of skeleton information in this dataset. | |
| """ | |
| def __init__(self, category_names, keypoints_names, skeleton): | |
| self.classname_to_id = {} | |
| for id, name in enumerate(category_names): | |
| self.classname_to_id[name] = id + 1 | |
| self.images = [] | |
| self.annotations = [] | |
| self.categories = [] | |
| self.ann_id = 0 | |
| self.img_id = 0 | |
| self.keypoints_names = keypoints_names | |
| self.skeleton = skeleton | |
| def save_coco_json(self, instance, save_path): | |
| json.dump( | |
| instance, | |
| open(save_path, 'w', encoding='utf-8'), | |
| ensure_ascii=False, | |
| indent=1) | |
| def read_jsonfile(self, path): | |
| with open(path, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| def shape_to_mask(self, | |
| img_shape, | |
| points, | |
| shape_type=None, | |
| point_size=5): | |
| mask = np.zeros(img_shape[:2], dtype=np.uint8) | |
| mask = PIL.Image.fromarray(mask) | |
| draw = PIL.ImageDraw.Draw(mask) | |
| xy = [tuple(point) for point in points] | |
| if shape_type == 'rectangle': | |
| assert len( | |
| xy) == 2, 'Shape of shape_type=rectangle must have 2 points' | |
| draw.rectangle(xy, outline=1, fill=1) | |
| elif shape_type == 'point': | |
| assert len(xy) == 1, 'Shape of shape_type=point must have 1 points' | |
| cx, cy = xy[0] | |
| r = point_size | |
| draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=1, fill=1) | |
| else: | |
| assert len(xy) > 2, 'Polygon must have points more than 2' | |
| draw.polygon(xy=xy, outline=1, fill=1) | |
| mask = np.array(mask, dtype=bool) | |
| return mask | |
| def _image(self, path, obj): | |
| """Parse the obj object of Labelme to generate the image object of | |
| COCO. | |
| Args: | |
| obj (JSON): The JSON object corresponding to ``path``. | |
| path (str): Path to the Labelme json file. | |
| Returns: | |
| Dict: A dictionary representing the image, the keys include | |
| ``'id'``, ``'file_name'``, ``'license'``, ``'height'`` and | |
| ``'width'``. | |
| """ | |
| image = {} | |
| # Directly obtain by imageHeight and imageWidth here to avoid problems | |
| # imageData in Labelme | |
| image['height'], image['width'] = obj['imageHeight'], obj[ | |
| 'imageWidth'] # get image's width and height | |
| self.img_id += 1 | |
| image['id'] = self.img_id | |
| image['file_name'] = os.path.basename(path).replace('.json', '.jpg') | |
| image['license'] = 1 | |
| return image | |
| def _annotation(self, shapes, img): | |
| """Generate COCO annotations. | |
| Args: | |
| shapes (list): The list of shapes in Labelme annotation. | |
| img (dict): The configuration of the image. | |
| """ | |
| groupId_keypoints = collections.defaultdict(list) | |
| masks = {} # for area | |
| segmentations = collections.defaultdict(list) # for segmentation | |
| for shape in shapes: | |
| group_id = shape.get('group_id') | |
| if group_id is None: | |
| group_id = uuid.uuid1() | |
| if shape['shape_type'] == 'point': | |
| groupId_keypoints[group_id].append(shape) | |
| else: | |
| points = shape['points'] | |
| label = shape['label'] | |
| shape_type = shape.get('shape_type', 'polygon') | |
| mask = self.shape_to_mask([img['height'], img['width']], | |
| points, shape_type) | |
| instance = (label, group_id) | |
| if instance in masks: | |
| masks[instance] = masks[instance] | mask | |
| else: | |
| masks[instance] = mask | |
| if shape_type == 'rectangle': | |
| (x1, y1), (x2, y2) = points | |
| x1, x2 = sorted([x1, x2]) | |
| y1, y2 = sorted([y1, y2]) | |
| points = [x1, y1, x2, y1, x2, y2, x1, y2] | |
| points = np.asarray(points).flatten().tolist() | |
| segmentations[instance].append(points) | |
| for instance, mask in masks.items(): | |
| cls_name, group_id = instance | |
| if cls_name not in self.classname_to_id: | |
| continue | |
| cls_id = self.classname_to_id[cls_name] | |
| mask = np.asfortranarray(mask.astype(np.uint8)) | |
| mask = pycocotools.mask.encode(mask) | |
| area = float(pycocotools.mask.area(mask)) | |
| bbox = pycocotools.mask.toBbox(mask).flatten().tolist() | |
| keypoints = [0] * (3 * len(self.keypoints_names)) | |
| keypoints_list = groupId_keypoints[group_id] | |
| for keypoint in keypoints_list: | |
| idx = self.keypoints_names.index(keypoint['label']) | |
| point = keypoint['points'][0] | |
| visible = 1 if 'occluded' in keypoint['flags'] and keypoint[ | |
| 'flags']['occluded'] else 2 | |
| keypoints[idx * 3] = point[0] | |
| keypoints[idx * 3 + 1] = point[1] | |
| keypoints[idx * 3 + 2] = visible | |
| self.annotations.append( | |
| dict( | |
| id=len(self.annotations), | |
| image_id=img['id'], | |
| category_id=cls_id, | |
| segmentation=segmentations[instance], | |
| area=area, | |
| bbox=bbox, | |
| iscrowd=0, | |
| num_keypoints=len(keypoints_list), | |
| keypoints=keypoints, | |
| )) | |
| def _init_categories(self): | |
| """Initialize the COCO labeling category.""" | |
| for name, id in self.classname_to_id.items(): | |
| category = {} | |
| category['supercategory'] = name | |
| category['id'] = id | |
| category['name'] = name | |
| category['keypoints'] = self.keypoints_names | |
| category['skeleton'] = self.skeleton | |
| self.categories.append(category) | |
| def to_coco(self, json_path_list): | |
| """Convert Labelme raw labels into COCO dataset format. The generated | |
| results include labels and images. | |
| Args: | |
| json_path_list (list): Paths of original datasets. | |
| Returns: | |
| Dict: A dictionary in COCO annotation format. | |
| """ | |
| self._init_categories() | |
| # The json files representing each image in the folder are processed | |
| for json_path in tqdm(json_path_list): | |
| obj = self.read_jsonfile(json_path) | |
| img = self._image(json_path, obj) | |
| self.images.append(img) | |
| self._annotation(obj['shapes'], img) | |
| now = datetime.datetime.today() | |
| coco_json = {} | |
| coco_json['info'] = dict( | |
| description='Converted COCO dataset', | |
| version=None, | |
| contributor=None, | |
| url=None, | |
| year=now.year, | |
| date_created=now.strftime('%Y/%m/%d')) | |
| coco_json['licenses'] = [dict( | |
| url=None, | |
| id=1, | |
| name=None, | |
| )] | |
| coco_json['images'] = self.images | |
| coco_json['annotations'] = self.annotations | |
| coco_json['categories'] = self.categories | |
| return coco_json | |
| def init_dir(base_path): | |
| """Initializing the folder structure of the COCO dataset. | |
| Args: | |
| base_path (str): Root path for placing the COCO dataset. | |
| """ | |
| if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')): | |
| os.makedirs(os.path.join(base_path, 'coco', 'annotations')) | |
| if not os.path.exists(os.path.join(base_path, 'coco', 'train')): | |
| os.makedirs(os.path.join(base_path, 'coco', 'train')) | |
| if not os.path.exists(os.path.join(base_path, 'coco', 'val')): | |
| os.makedirs(os.path.join(base_path, 'coco', 'val')) | |
| def parse_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| '--input', | |
| '--i', | |
| help='input json file folder', | |
| type=str, | |
| required=True) | |
| parser.add_argument( | |
| '--output', | |
| '--o', | |
| help='output data folder (for COCO data)', | |
| type=str, | |
| required=True) | |
| parser.add_argument( | |
| '--categories', | |
| '--c', | |
| type=str, | |
| help='category names file', | |
| default=None) | |
| parser.add_argument( | |
| '--dataset_cfg', | |
| '--d', | |
| type=str, | |
| help='dataset config file', | |
| default='configs/_base_/datasets/coco.py') | |
| parser.add_argument( | |
| '--ratio', | |
| '--r', | |
| help='train and test split ratio', | |
| type=float, | |
| default=0.25) | |
| args = parser.parse_args() | |
| return args | |
| def parse_dataset_config(dataset_cfg='configs/_base_/datasets/coco.py'): | |
| """Parse the dataset configuration file. | |
| Args: | |
| dataset_cfg (str, optional): Path to the dataset configuration | |
| file. A valid configuration file must include ``dataset_info`` | |
| and the following keys: | |
| - keypoint_info (dict): the keypoint information | |
| - skeleton_info (dict): the skeleton information | |
| If not specified, will use default COCO dataset. Defaults to | |
| 'configs/_base_/datasets/coco.py'. | |
| Returns: | |
| Tuple[list]: Return the keypoint names and skeleton info. | |
| """ | |
| dataset_info = Config.fromfile(dataset_cfg).dataset_info | |
| keypoints_names = ['' for _ in range(len(dataset_info['keypoint_info']))] | |
| skeleton = [] | |
| keypoint2id = {} | |
| for keypoint_info in dataset_info['keypoint_info'].values(): | |
| name = keypoint_info['name'] | |
| keypoint_id = keypoint_info['id'] | |
| keypoints_names[keypoint_id] = name | |
| keypoint2id[name] = keypoint_id | |
| for skeleton_info in dataset_info['skeleton_info'].values(): | |
| link = skeleton_info['link'] | |
| skeleton.append([keypoint2id[link[0]], keypoint2id[link[1]]]) | |
| return keypoints_names, skeleton | |
| def convert(category_names, keypoints_names, skeleton, labelme_paths, | |
| coco_file_path): | |
| """Convert Labelme annotated files into COCO format and save it. | |
| Args: | |
| category_names (list): The list of category names of this dataset. | |
| keypoints_names (list): The list of keypoint names of this dataset. | |
| skeleton (list): The list of skeleton info of this dataset. | |
| labelme_paths (list): The list of path of Labelme files. | |
| coco_file_path (str): The path to save the COCO annotation file. | |
| """ | |
| l2c = Labelme2coco_keypoints(category_names, keypoints_names, skeleton) | |
| # generate train dateset | |
| coco = l2c.to_coco(labelme_paths) | |
| l2c.save_coco_json(coco, coco_file_path) | |
| def main(): | |
| args = parse_args() | |
| labelme_path = args.input | |
| saved_coco_path = args.output | |
| init_dir( | |
| saved_coco_path) # Initialize the folder structure of the COCO dataset | |
| json_list_path = glob.glob(labelme_path + '/*.json') | |
| train_path, val_path = train_test_split( | |
| json_list_path, test_size=args.ratio) | |
| print('{} for training'.format(len(train_path)), | |
| '\n{} for testing'.format(len(val_path))) | |
| print('Start transform please wait ...') | |
| keypoints_names, skeleton = parse_dataset_config(args.dataset_cfg) | |
| category_names = [] | |
| if args.categories: | |
| for line in open(args.categories).readlines(): | |
| category_names.append(line.strip()) | |
| else: | |
| category_names = ['person'] | |
| # generate train dateset | |
| convert( | |
| category_names, keypoints_names, skeleton, train_path, | |
| os.path.join(saved_coco_path, 'coco', 'annotations', | |
| 'keypoints_train.json')) | |
| # generate val dateset | |
| convert( | |
| category_names, keypoints_names, skeleton, val_path, | |
| os.path.join(saved_coco_path, 'coco', 'annotations', | |
| 'keypoints_val.json')) | |
| # Copy the original images of Labelme into the training and validation sets | |
| for file in train_path: | |
| shutil.copy( | |
| file.replace('json', 'jpg'), | |
| os.path.join(saved_coco_path, 'coco', 'train')) | |
| for file in val_path: | |
| shutil.copy( | |
| file.replace('json', 'jpg'), | |
| os.path.join(saved_coco_path, 'coco', 'val')) | |
| if __name__ == '__main__': | |
| main() | |
Also with a document introducing the usage of the script.
labelme2coco.md
Motivation
Provides a tool for labelme2coco that does not include the ability to assemble initially scattered labelme files into a single file.
Modification
BC-breaking (Optional)
Use cases (Optional)
Checklist
Before PR:
After PR: