当前位置：首页 > article >正文

保姆级教程：用Python脚本将Animal Pose数据集一键转成YOLO格式（含关键点）

article 2026/5/6 10:17:12

从零实现Animal Pose数据集到YOLO格式的完整转换指南当你第一次打开Animal Pose数据集时面对那些密密麻麻的JSON标注文件是否感到无从下手别担心这篇文章将带你一步步完成从COCO格式到YOLO格式的完整转换过程包括处理那些令人头疼的图片格式混杂和关键点坐标越界问题。1. 准备工作与环境配置在开始转换之前我们需要确保所有必要的工具和库已经就位。这个阶段看似简单但却是整个流程顺利进行的基础。首先创建一个干净的Python虚拟环境是个好习惯python -m venv animalpose_env source animalpose_env/bin/activate # Linux/Mac # 或者 animalpose_env\Scripts\activate # Windows接下来安装必要的依赖库pip install pillow tqdm numpy提示建议使用Python 3.8或更高版本以避免潜在的兼容性问题。数据集下载后你的文件夹结构应该如下animal-pose-dataset/ ├── annotations/ │ ├── keypoints.json ├── images/ │ ├── 000001.jpg │ ├── 000002.jpeg │ └── ...2. 理解数据集结构与YOLO格式要求Animal Pose数据集包含5类动物狗、猫、牛、马、羊的20个关键点标注。每个关键点包含x、y坐标和可见性标志0不可见1可见2遮挡。YOLO格式要求每个图像对应一个.txt文件每行包含class_id x_center y_center width height x1 y1 v1 ... x20 y20 v20其中所有坐标都是相对于图像宽高的归一化值0-1之间。3. 核心转换代码实现让我们逐步构建转换脚本。首先创建一个convert_animalpose_to_yolo.py文件import os import json from tqdm import tqdm import argparse from PIL import Image def parse_args(): parser argparse.ArgumentParser() parser.add_argument(--json_path, requiredTrue, helpCOCO格式的JSON标注文件路径) parser.add_argument(--img_dir, requiredTrue, help原始图片所在目录) parser.add_argument(--save_dir, requiredTrue, helpYOLO格式标签保存目录) return parser.parse_args() def convert_bbox(size, box): 将COCO格式的bbox转换为YOLO格式 dw 1.0 / size[0] dh 1.0 / size[1] x_center (box[0] box[2]) / 2.0 y_center (box[1] box[3]) / 2.0 width box[2] - box[0] height box[3] - box[1] return [ round(x_center * dw, 6), round(y_center * dh, 6), round(width * dw, 6), round(height * dh, 6) ] def convert_keypoints(size, keypoints): 处理关键点坐标确保在0-1范围内 normalized [] for i in range(0, len(keypoints), 3): x min(1.0, max(0.0, keypoints[i] / size[0])) y min(1.0, max(0.0, keypoints[i1] / size[1])) v keypoints[i2] normalized.extend([round(x, 6), round(y, 6), int(v)]) return normalized4. 处理实际工程中的坑点在实际操作中你会遇到几个常见问题图片格式混杂问题数据集包含.jpg、.jpeg和.png格式图片关键点坐标越界部分标注点超出图像边界路径配置问题不同操作系统路径分隔符差异4.1 统一图片格式解决方案创建一个convert_images.bat文件Windows或convert_images.shLinux/Macecho off for %%i in (*.jpeg) do ( ren %%i %%~ni.jpg ) for %%i in (*.png) do ( mogrify -format jpg %%i del %%i )注意需要先安装ImageMagick来使用mogrify命令4.2 主转换逻辑实现继续完善我们的Python脚本def main(): args parse_args() # 创建保存目录 os.makedirs(args.save_dir, exist_okTrue) # 加载COCO格式标注 with open(args.json_path) as f: data json.load(f) # 创建类别映射 category_map {cat[id]: idx for idx, cat in enumerate(data[categories])} # 保存类别文件 with open(os.path.join(args.save_dir, classes.txt), w) as f: f.write(\n.join([cat[name] for cat in data[categories]])) # 处理每张图片 for img_info in tqdm(data[images], descProcessing images): img_path os.path.join(args.img_dir, img_info[file_name]) try: with Image.open(img_path) as img: width, height img.size except FileNotFoundError: print(fWarning: {img_path} not found, skipping) continue # 为每张图片创建对应的标签文件 txt_path os.path.join(args.save_dir, os.path.splitext(img_info[file_name])[0] .txt) with open(txt_path, w) as f_txt: # 找到该图片的所有标注 annotations [ann for ann in data[annotations] if ann[image_id] img_info[id]] for ann in annotations: # 转换边界框 bbox convert_bbox((width, height), ann[bbox]) # 转换关键点 keypoints convert_keypoints((width, height), ann[keypoints]) # 写入YOLO格式行 line [str(category_map[ann[category_id]])] \ [str(x) for x in bbox] \ [str(x) for x in keypoints] f_txt.write( .join(line) \n) if __name__ __main__: main()5. 验证转换结果转换完成后我们需要验证生成的文件是否正确。以下是一个简单的验证脚本import os import random from PIL import Image, ImageDraw def visualize_yolo_label(img_path, label_path, class_names): 可视化YOLO格式的标签 img Image.open(img_path) draw ImageDraw.Draw(img) w, h img.size with open(label_path) as f: for line in f: parts line.strip().split() class_id int(parts[0]) bbox list(map(float, parts[1:5])) keypoints list(map(float, parts[5:])) # 绘制边界框 x_center, y_center, width, height bbox x1 (x_center - width/2) * w y1 (y_center - height/2) * h x2 (x_center width/2) * w y2 (y_center height/2) * h draw.rectangle([x1, y1, x2, y2], outlinered, width2) # 绘制关键点 for i in range(0, len(keypoints), 3): x keypoints[i] * w y keypoints[i1] * h v int(keypoints[i2]) color green if v 0 else gray draw.ellipse([x-3, y-3, x3, y3], fillcolor) return img # 随机选择一张图片进行可视化 label_dir path_to_yolo_labels img_dir path_to_images class_names [dog, cat, cow, horse, sheep] label_files os.listdir(label_dir) sample_file random.choice(label_files) img_file os.path.splitext(sample_file)[0] .jpg visual_img visualize_yolo_label( os.path.join(img_dir, img_file), os.path.join(label_dir, sample_file), class_names ) visual_img.show()6. 高级技巧与优化建议6.1 批量处理脚本优化对于大型数据集可以考虑使用多进程加速处理from multiprocessing import Pool def process_image(args): img_info, data, img_dir, save_dir, category_map args # ...处理逻辑与之前相同... if __name__ __main__: args parse_args() # ...初始化代码... # 多进程处理 with Pool(processesos.cpu_count()) as pool: tasks [(img, data, args.img_dir, args.save_dir, category_map) for img in data[images]] list(tqdm(pool.imap(process_image, tasks), totallen(tasks)))6.2 数据集分割通常需要将数据集分为训练集、验证集和测试集import random from sklearn.model_selection import train_test_split def split_dataset(label_dir, output_dir, test_size0.2, val_size0.1): all_files [f for f in os.listdir(label_dir) if f.endswith(.txt)] random.shuffle(all_files) # 先分测试集 train_val, test train_test_split(all_files, test_sizetest_size) # 再从剩余中分验证集 train, val train_test_split(train_val, test_sizeval_size/(1-test_size)) # 写入划分文件 for name, files in [(train, train), (val, val), (test, test)]: with open(os.path.join(output_dir, f{name}.txt), w) as f: f.write(\n.join([f./images/{name}/{os.path.splitext(f)[0]}.jpg for f in files]))6.3 数据增强考虑在转换过程中可以考虑添加一些简单的数据增强def apply_augmentation(img_info, bbox, keypoints): 简单的数据增强示例 # 随机水平翻转 if random.random() 0.5: width img_info[width] bbox flip_bbox(bbox, width) keypoints flip_keypoints(keypoints, width) # 随机调整亮度/对比度 # ... return bbox, keypoints def flip_bbox(bbox, img_width): x_center, y_center, w, h bbox new_x 1.0 - x_center return [new_x, y_center, w, h] def flip_keypoints(keypoints, img_width): flipped [] for i in range(0, len(keypoints), 3): x 1.0 - keypoints[i] flipped.extend([x, keypoints[i1], keypoints[i2]]) return flipped7. 实际训练准备转换完成后你的目录结构应该如下yolo_animal_pose/ ├── images/ │ ├── train/ │ │ ├── 000001.jpg │ │ └── ... │ ├── val/ │ │ ├── 000101.jpg │ │ └── ... ├── labels/ │ ├── train/ │ │ ├── 000001.txt │ │ └── ... │ ├── val/ │ │ ├── 000101.txt │ │ └── ... ├── train.txt ├── val.txt └── classes.txt创建YOLO训练配置文件animal_pose.yaml:# 训练/验证数据路径 train: ./yolo_animal_pose/train.txt val: ./yolo_animal_pose/val.txt # 类别数量 nc: 5 # 类别名称 names: [dog, cat, cow, horse, sheep] # 关键点配置 kpt_shape: [20, 3] # 20个关键点每个点(x,y,visibility) flip_idx: [2,1, 4,3, 6,5, 8,7, 10,9, 12,11, 14,13, 16,15, 18,17, 20,19] # 对称关键点索引现在你就可以使用YOLOv8-Pose等模型开始训练了yolo pose train dataanimal_pose.yaml modelyolov8n-pose.pt epochs100 imgsz640

保姆级教程：用Python脚本将Animal Pose数据集一键转成YOLO格式（含关键点）

相关文章：

保姆级教程：用Python脚本将Animal Pose数据集一键转成YOLO格式（含关键点）

WarcraftHelper：魔兽争霸3现代硬件兼容性终极解决方案

ESP32-C3 AWS IoT ExpressLink模块开发指南

别再傻傻分不清了！CODESYS编程中FUN、FB、PRG到底怎么选？附实战场景对比

Beyond Compare 5企业级授权管理实战指南：3种密钥生成与部署方案

OpenCore Legacy Patcher：让老款Mac重获新生的三大核心功能

ARM926EJ-S PXP芯片时钟架构与复位系统解析

基于MCP协议构建AI记忆系统：从向量检索到生产部署全解析

qmc-decoder：解锁QQ音乐专属格式的完整解决方案，3分钟实现音频自由

把ESP32-CAM玩出花：除了局域网监控，它还能做这5个有趣项目

不止于计算器：用C++的ExprTk库给你的应用嵌入一个“迷你脚本引擎”

Docker怎么快速入门？实操教程有哪些步骤？

Java科学可视化框架设计与线程安全实践

Atlas200I DK A2内核编译避坑记：手动为AX210网卡定制驱动模块

百度网盘直链解析工具：3步告别限速，实现高速下载

AI写作质量与安全扫描：OpenClaw智能审查技能的设计与应用

WechatDecrypt：三步解密微信聊天记录，重获你的数字记忆宝库

Go语言实现轻量级HTTP代理1proxy：部署、配置与性能调优指南

18_AI视频创作必存：14种新增创意运镜的视觉实验与提示词库

SAP Migration Cockpit实战：手把手教你搞定物料主数据迁移（附Excel模板避坑指南）

GPT-4 API应用开发实战：从零构建智能对话系统

不止是Move命令：用Python脚本给你的Windows文件管理加上‘智能过滤’开关

WLP封装技术解析与可靠性测试实践

新手开发者首次使用 Taotoken 完成从注册到调用的全流程体验

智能图像浏览解决方案：零配置高效看图助手

AutoGPT-Next-Web：一键部署个人AI智能体Web应用全攻略

Clawthority：为AI代理构建代码级安全护栏的插件式策略引擎

量子电路合成：MDL原则与零样本迁移的创新方法

命令行与微信集成：运维自动化通知与交互式助手实战

ECharts custom series实战：手把手教你为多系列柱状图添加渐变/条纹背景（Vue3+TS示例）