学习系列二：常用目标检测的格式转换脚本文件txt,json等

阅读量：804 次

发布时间：2023-04-05

本文共 14134 字，大约阅读时间需要 47 分钟。

常用目标检测格式转换脚本

一、JSON格式转YOLO的TXT格式

本节主要介绍如何将JSON格式的目标检测标签文件转换为YOLO格式的标签TXT文件。

1.1 代码说明

以下是将JSON格式标签文件转换为YOLO格式TXT文件的Python脚本示例：

import json
import os
def convert(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)
def decode_json(json_path, output_dir, classes):
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    base_name = os.path.splitext(os.path.basename(data['imagePath']))[0]
    txt_path = os.path.join(output_dir, base_name + '.txt')
    with open(txt_path, 'w', encoding='utf-8') as txt_file:
        for shape in data['shapes']:
            if shape['shape_type'] == 'rectangle':
                label = shape['label']
                if label not in classes:
                    continue
                cls_id = classes.index(label)
                points = shape['points']
                x1, y1 = points[0]
                x2, y2 = points[1]
                bb = convert((data['imageWidth'], data['imageHeight']), (x1, y1, x2, y2))
                txt_file.write(f"{cls_id} {'.6f} {'.6f} {'.6f} {'.6f}\n")

1.2 使用说明

类别文件：确保classes列表中包含所有目标检测的类别名称

JSON文件路径：指定需要转换的JSON文件路径

输出目录：指定生成的TXT文件存储路径

运行脚本：将上述代码保存为convert.py，然后运行：

python convert.py --json_folder_path ./json --output_dir ./txt --classes ['loose', 'un-loose']

二、YOLOv8关键点标注的JSON格式转可训练的TXT格式

本节介绍如何将YOLOv8的关键点标注文件转换为可训练的TXT格式。

2.1 代码说明

以下是将YOLOv8关键点标注文件转换为可训练格式的Python脚本示例：

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
import json
import tqdm
# 定义物体类别和关键点列表
class_list = ["fks"]
keypoint_list = ["P1", "P2", "P3", "P4"]
def json_to_yolo(img_data, json_data):
    h, w = img_data.shape[:2]
    rectangles = {}
    
    for shape in json_data["shapes"]:
        label = shape["label"]
        group_id = shape["group_id"]
        points = shape["points"]
        shape_type = shape["shape_type"]
        
        if shape_type == "rectangle":
            if group_id not in rectangles:
                rectangles[group_id] = {
                    "label": label,
                    "rect": points[0] + points[1],
                    "keypoints_list": []
                }
    
    for keypoint in keypoint_list:
        for shape in json_data["shapes"]:
            label = shape["label"]
            group_id = shape["group_id"]
            points = shape["points"]
            if label == keypoint:
                rectangles[group_id]["keypoints_list"].append(points[0])
    
    yolo_list = []
    for id, rectangle in rectangles.items():
        result_list = []
        label_id = class_list.index(rectangle["label"])
        x1, y1, x2, y2 = rectangle["rect"]
        center_x = (x1 + x2) / 2
        center_y = (y1 + y2) / 2
        width = abs(x1 - x2)
        height = abs(y1 - y2)
        
        center_x /= w
        center_y /= h
        width /= w
        height /= h
        
        center_x = round(center_x, 6)
        center_y = round(center_y, 6)
        width = round(width, 6)
        height = round(height, 6)
        
        result_list = [label_id, center_x, center_y, width, height]
        
        for point in rectangle["keypoints_list"]:
            x, y = point
            x /= w
            y /= h
            x = round(x, 6)
            y = round(y, 6)
            result_list.extend([x, y, 2])
        yolo_list.append(result_list)
    return yolo_list

2.2 使用说明

类别文件：确保class_list中包含所有目标检测的类别名称

JSON文件路径：指定需要转换的JSON文件路径

图片文件路径：获取所有需要处理的图片文件路径

运行脚本：将上述代码保存为json_to_yolo.py，然后运行：

python json_to_yolo.py --img_list ./images/train/*.jpg --json_folder ./annotations/train/

三、YOLO的TXT格式转COCO数据集标签的JSON格式

本节介绍如何将YOLO格式的标签文件转换为COCO数据集格式的JSON文件。

3.1 代码说明

以下是将YOLO标签文件转换为COCO数据集格式的Python脚本示例：

import os
import cv2
import json
from tqdm import tqdm
from sklearn.model_selection import train_test_split
class Yolo2Coco:
    def __init__(self, arg):
        self.arg = arg
        
    def train_test_val_split_random(self, img_paths, ratio_train=0.8, ratio_test=0.1, ratio_val=0.1):
        train_img, middle_img = train_test_split(img_paths, test_size=1 - ratio_train, random_state=233)
        ratio = ratio_val / (1 - ratio_train)
        val_img, test_img = train_test_split(middle_img, test_size=ratio, random_state=233)
        print(f"NUMS of train: {len(train_img)} : {len(val_img)} : {len(test_img)}")
        return train_img, val_img, test_img
        
    def train_test_val_split_by_files(self, img_paths, root_dir):
        phases = ['train', 'val', 'test']
        img_split = []
        for p in phases:
            phase_path = os.path.join(root_dir, f'{p}.txt')
            with open(phase_path, 'r') as f:
                img_paths = f.readlines()
            img_split.append(img_paths)
        return img_split[0], img_split[1], img_split[2]
        
    def yolo2coco(self):
        root_path = self.arg.root_dir
        origin_labels_dir = os.path.join(root_path, 'labels')
        origin_images_dir = os.path.join(root_path, 'images')
        
        with open(os.path.join(root_path, 'classes.txt')) as f:
            classes = f.read().strip().split()
            
        train_dataset = {
            'categories': [],
            'annotations': [],
            'images': []
        }
        val_dataset = {
            'categories': [],
            'annotations': [],
            'images': []
        }
        test_dataset = {
            'categories': [],
            'annotations': [],
            'images': []
        }
        
        for i, cls in enumerate(classes, 0):
            train_dataset['categories'].append({
                'id': i,
                'name': cls,
                'supercategory': 'mark'
            })
            val_dataset['categories'].append({
                'id': i,
                'name': cls,
                'supercategory': 'mark'
            })
            test_dataset['categories'].append({
                'id': i,
                'name': cls,
                'supercategory': 'mark'
            })
        
        if self.arg.random_split:
            train_img, val_img, test_img = self.train_test_val_split_random(
                list(img_paths), ratio_train=0.8, ratio_test=0.1, ratio_val=0.1
            )
        elif self.arg.split_by_file:
            train_img, val_img, test_img = self.train_test_val_split_by_files(
                list(img_paths), root_dir=root_path
            )
        else:
            dataset = {
                'categories': [],
                'annotations': [],
                'images': []
            }
            for i, cls in enumerate(classes, 0):
                dataset['categories'].append({
                    'id': i,
                    'name': cls,
                    'supercategory': 'mark'
                })
            
            for k, index in enumerate(tqdm(img_paths)):
                txtFile = index.replace('images', 'txt').replace('.jpg', '.txt').replace('.png', '.txt')
                img = cv2.imread(os.path.join(root_path, 'images/', index))
                height, width, _ = img.shape
                
                if index in train_img:
                    current_dataset = train_dataset
                elif index in val_img:
                    current_dataset = val_dataset
                elif index in test_img:
                    current_dataset = test_dataset
                
                current_dataset['images'].append({
                    'file_name': index,
                    'id': k,
                    'width': width,
                    'height': height
                })
                
                labelFile = os.path.join(origin_labels_dir, txtFile)
                if not os.path.exists(labelFile):
                    continue
                with open(labelFile, 'r') as fr:
                    labelLines = fr.readlines()
                    for label in labelLines:
                        label = label.strip().split()
                        x = float(label[1])
                        y = float(label[2])
                        w = float(label[3])
                        h = float(label[4])
                        
                        H, W, _ = img.shape
                        x1 = (x - w / 2) * W
                        y1 = (y - h / 2) * H
                        x2 = (x + w / 2) * W
                        y2 = (y + h / 2) * H
                        
                        cls_id = int(label[0])
                        width = max(0, x2 - x1)
                        height = max(0, y2 - y1)
                        
                        current_dataset['annotations'].append({
                            'area': width * height,
                            'bbox': [x1, y1, width, height],
                            'category_id': cls_id,
                            'id': ann_id_cnt,
                            'image_id': k,
                            'iscrowd': 0,
                            'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
                        })
                        ann_id_cnt += 1

3.2 使用说明

类别文件：确保classes.txt文件包含所有目标检测的类别名称

图片文件路径：指定需要处理的图片文件路径

运行脚本：将上述代码保存为yolo2coco.py，然后运行：

python yolo2coco.py --root_dir ./dataset_coco --random_split --split_by_file

四、XML格式转YOLO数据集标签的TXT格式

本节介绍如何将XML格式的标签文件转换为YOLO数据集格式的标签TXT文件。

4.1 代码说明

以下是将XML格式标签文件转换为YOLO格式的Python脚本示例：

import xml.etree.ElementTree as ET
import os
import cv2
import numpy as np
classes = ["person"]
def convert_annotation(image_name):
    xml_path = os.path.join(r'C:\Users\Administrator\Desktop\person\labels\val-voc', image_name[:-3] + 'xml')
    txt_path = os.path.join(r'C:\Users\Administrator\Desktop\person\labels\val', image_name[:-3] + 'txt')
    
    with open(xml_path, 'r') as f:
        xml_text = f.read()
        root = ET.fromstring(xml_text)
        
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    
    with open(txt_path, 'w') as txt_file:
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls not in classes:
                continue
            cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), 
                 float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
            bb = convert((w, h), b)
            txt_file.write(f"{cls_id} {'.6f} {'.6f} {'.6f} {'.6f}\n")

4.2 使用说明

类别文件：确保classes列表中包含所有目标检测的类别名称

XML文件路径：指定需要转换的XML文件路径

输出目录：指定生成的TXT文件存储路径

运行脚本：将上述代码保存为convert_annotation.py，然后运行：

python convert_annotation.py --image_path ./images/train/*.jpg --output_dir ./labels/train/

五、YOLO目标检测训练的最好权重推理图片

本节介绍如何根据YOLO的目标检测训练的最好权重推理图片，并绘制目标检测框。

5.1 代码说明

以下是使用YOLOv5模型进行推理的Python脚本示例：

import torch
import cv2
import numpy as np
class YoloDetector:
    def __init__(self, weight_path):
        self.yolo_detector = torch.hub.load('yolov5', 'yolov5', 
                                         path=weight_path, 
                                         source='local')
        self.yolo_detector.conf = 0.5
        
    def detect(self, img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = self.yolo_detector(img)
        pd = results.pandas().xyxy[0]
        return pd
    
    def draw_bbox(self, img, bboxes):
        for bbox in bboxes:
            l, t, r, b = bbox[:4].astype('int')
            conf, id_index, id_label = bbox[4:]
            cv2.rectangle(img, (l, t), (r, b), (0, 255, 0), 1)
            cv2.putText(img, f'{id_label} {conf:.2f}', 
                       (l, t-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 1)
        return img
# 初始化模型
yolo_demo = YoloDetector('./weight/fks.pt')
# 推理图片
img_path = r'./test/V15R.jpg'
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = yolo_demo.detect(img_rgb)
# 绘制并保存图片
yolo_demo.draw_bbox(img_rgb, results)
cv2.imwrite('./dete.jpg', img_rgb)

5.2 使用说明

权重文件路径：指定YOLO模型的权重文件路径

图片路径：指定需要推理的图片路径

运行脚本：将上述代码保存为yolo_demo.py，然后运行：

python yolo_demo.py

六、根据YOLO标签的TXT文件提取某一个特征类别的标签，并绘制原图

本节介绍如何根据YOLO标签的TXT文件提取某一个特征类别的标签，并绘制原图。

6.1 代码说明

以下是提取特定类别标签并绘制原图的Python脚本示例：

import cv2
import numpy as np
import os
import glob
image_folder = './train/img'
label_folder = './train/labels'
output_folder = './train/output'
# 创建输出文件夹
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
# 获取图片文件夹中的所有文件名
image_files = glob.glob(os.path.join(image_folder, '*.jpg'))
for image_path in image_files:
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    label_path = os.path.join(label_folder, base_name + '.txt')
    
    if not os.path.exists(label_path):
        print(f'Label file for {image_path} not found, skipping.')
        continue
    
    img = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), -1)
    with open(label_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    height, width, _ = img.shape
    
    for line in lines:
        parts = line.strip().split()
        if len(parts) != 5:
            continue
        class_id, x_center, y_center, w, h = map(float, parts)
        
        if class_id not in [0]:
            continue
            
        x_center = int(x_center * width)
        y_center = int(y_center * height)
        w = int(w * width)
        h = int(h * height)
        
        x1 = int(x_center - w / 2)
        y1 = int(y_center - h / 2)
        x2 = int(x_center + w / 2)
        y2 = int(y_center + h / 2)
        
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    output_image_path = os.path.join(output_folder, base_name + '_labeled.jpg')
    cv2.imencode('.jpg', img)[1].tofile(output_image_path)
    print(f'Processed {os.path.basename(image_path)}, saved labeled image to {output_image_path}')