# 下载 YOLOv6 并安装

下载地址:https://github.com/meituan/YOLOv6.git

1
git clone https://github.com/meituan/YOLOv6.git

创建指定版本的虚拟环境

1
conda create -n yolov6 python=3.8

安装依赖

1
2
cd YOLOv6
pip install -r requirements.txt

# 数据集准备

赛题来源自 Google 街景图像中的门牌号数据集(The Street View House Numbers Dataset, SVHN),该数据来自真实场景的门牌号。训练集数据包括 3W 张照片,验证集数据包括 1W 张照片,每张照片包括颜色图像和对应的编码类别和具体位置;为了保证比赛的公平性,测试集 A 包括 4W 张照片,测试集 B 包括 4W 张照片。

从天池官方发的 csv 文件中找到对应的数据集下载链接:

file size link
mchar_train.zip 345.91MB http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531795/mchar_train.zip
mchar_train.json 3.16MB http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531795/mchar_train.json
mchar_val.zip 200.16MB http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531795/mchar_val.zip
mchar_val.json 1.03MB http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531795/mchar_val.json
mchar_test_a.zip 370.6MB http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531795/mchar_test_a.zip
mchar_sample_submit_A.csv 507.83KB http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531795/mchar_sample_submit_A.csv

下载下列数据集到本地

image-20221226174900179

解压并整理文件目录,按照下图存放图像和标签,images 中分别存放训练集和验证集的图片

image-20221226182049602

image-20221226190953406

由于当前数据集并不是 yolo 可用的数据集格式,需要使用脚本进行转换,脚本代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import json  
import cv2
import os

json_dir = 'mchar_val.json' # json文件路径
out_dir = 'labels/mchar_val/' # 输出的 txt 文件路径
img_dir='images/mchar_val/' # 图片所在路径

def main():
# 读取 json 文件数据
with open(json_dir, 'r') as load_f:
content = json.load(load_f)

count = 1
# 循环处理
for t,value in content.items():
tmp = t.split('.')
filename = out_dir + tmp[0] + '.txt'

#左上角x
left=value['left']
#左上角y
top=value['top']
#字符高度
height=value['height']
#字符宽度
width=value['width']
#字符值
label=value['label']

#图片宽和高
image_cv = cv2.imread(img_dir+tmp[0]+'.png')
image_height=image_cv.shape[0]
image_width=image_cv.shape[1]

print("{}/{}".format(count,len(content.items())))
count += 1

for index in range(len(left)):
bbox0=left[index]
bbox1 = top[index]
bbox2=bbox0+width[index]
bbox3=bbox1+height[index]

theLabel=label[index]

# 计算 yolo 数据格式所需要的中心点的 相对 x, y 坐标, w,h 的值
x = (bbox0 + bbox2) / 2 / image_width
y = (bbox1 + bbox3) / 2 / image_height
w = (bbox2 - bbox0) / image_width
h = (bbox3 - bbox1) / image_height
fp = open(filename, mode="a+", encoding="utf-8")
file_str = str(theLabel) + ' ' + str(round(x, 6)) + ' ' + str(round(y, 6)) + ' ' + str(round(w, 6)) + ' ' + str(round(h, 6))

#写入文件
fp.write(file_str+os.linesep)
fp.close()


if __name__ == '__main__':
main()

运行脚本,进行格式转换

image-20221226192110080

将路径修改为验证集的标签和图片再执行一次

1
2
3
json_dir = 'mchar_val.json' # json文件路径  
out_dir = 'labels/mchar_val/' # 输出的 txt 文件路径
img_dir='images/mchar_val/' # 图片所在路径

得到如下标签文件

image-20221227132558889

共有 30000 个训练集标签和 10000 个验证集标签,最终数据集目录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
custom_dataset
├── images
│   ├── train
│   │   ├── train0.jpg
│   │   └── train1.jpg
│   ├── val
│   │   ├── val0.jpg
│   │   └── val1.jpg
│   └── test
│   ├── test0.jpg
│   └── test1.jpg
└── labels
├── train
│   ├── train0.txt
│   └── train1.txt
└── val
   ├── val0.txt
   └── val1.txt

# YOLO 相关配置

# 数据集标签

修改 $DIR_YOLOv6​/data 目录下的 dataset.yaml

1
2
3
4
5
6
7
8
9
10
# Please insure that your custom_dataset are put in same parent dir with YOLOv6_DIR
train: ../data/images/mchar_train # train images
val: ../data/images/mchar_val # val images


# whether it is coco dataset, only coco dataset should be set to True.
is_coco: False
# Classes
nc: 10 # number of classes
names: ['0','1', '2', '3', '4', '5', '6', '7', '8', '9'] # class names

配置训练和验证集的路径,修改种类的数量和标签名

# 模型架构配置

如果需要修改模型的优化器和超参数等配置信息,可以创建一个新的 config 文件并将其放置到‘config’目录下,当然也可以直接使用官方提供的模型配置 ‘$YOLOV6_HOME/configs/*_finetune.py’

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
## YOLOv6s Model config file
model = dict(
type='YOLOv6s',
pretrained='./weights/yolov6s.pt', # download the pretrained model from YOLOv6 github if you're going to use the pretrained model
depth_multiple = 0.33,
width_multiple = 0.50,
...
)
solver=dict(
optim='SGD',
lr_scheduler='Cosine',
...
)

data_aug = dict(
hsv_h=0.015,
hsv_s=0.7,
hsv_v=0.4,
...
)
cuda环境配置

先下载新版本的 N 卡驱动程序,安装对应版本的 cuda 和 cudnn(不在这里赘述)

image-20221227150634763

卸载原先 cpu 版本的 pytorch

1
pip3 uninstall torch torchvision torchaudio

然后安装对应 cuda 版本的 pytroch

1
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117

由于国内可能无法访问,可能需要借助代理

1
2
set HTTP_PROXY=http://localhost:port
set HTTPS_PROXY=http://localhost:port

之后就能成功下载安装该版本的 pytorch

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
(YOLOv6) C:\Users\cy\project\yolov6_tianchi\YOLOv6>pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple, https://download.pytorch.org/whl/cu117
Collecting torch
Downloading https://download.pytorch.org/whl/cu117/torch-1.13.1%2Bcu117-cp38-cp38-win_amd64.whl (2255.7 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.3/2.3 GB 1.3 MB/s eta 0:00:00
Collecting torchvision
Downloading https://download.pytorch.org/whl/cu117/torchvision-0.14.1%2Bcu117-cp38-cp38-win_amd64.whl (4.8 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.8/4.8 MB 10.3 MB/s eta 0:00:00
Collecting torchaudio
Downloading https://download.pytorch.org/whl/cu117/torchaudio-0.13.1%2Bcu117-cp38-cp38-win_amd64.whl (2.3 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.3/2.3 MB 16.2 MB/s eta 0:00:00
Requirement already satisfied: typing-extensions in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from torch) (4.4.0)
Requirement already satisfied: requests in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from torchvision) (2.28.1)
Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from torchvision) (9.3.0)
Requirement already satisfied: numpy in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from torchvision) (1.24.0)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from requests->torchvision) (1.26.13)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from requests->torchvision) (2022.12.7)
Requirement already satisfied: idna<4,>=2.5 in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from requests->torchvision) (3.4)
Requirement already satisfied: charset-normalizer<3,>=2 in c:\users\cy\miniconda3\envs\yolov6\lib\site-packages (from requests->torchvision) (2.1.1)
Installing collected packages: torch, torchvision, torchaudio
Successfully installed torch-1.13.1+cu117 torchaudio-0.13.1+cu117 torchvision-0.14.1+cu117

进入 python 命令行进行验证

1
2
3
4
5
Python 3.8.15 (default, Nov 24 2022, 14:38:14) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> torch.__version__
'1.13.1+cu117'

成功安装 cuda 版本的 pytorch

# 开始训练!

进入 YOLOv6 目录下,下载预训练的模型文件

1
2
3
4
cd YOLOv6
mkdir weights
cd weights
wget https://github.com/meituan/YOLOv6/releases/download/0.2.1/yolov6s_base.pt

将其重命名为 ‘yolov6s.pt

单个 GPU 进行训练,直接使用官方提供的配置进行训练,修改 batch size 为 16,默认的 256 我的 6G 显存怕是撑不住

1
2
cd ..
python tools/train.py --batch 16 --conf configs/yolov6s_finetune.py --data data/dataset.yaml --device 0
(可选,使用超算节点的DDP加速训练)

以后有机会再用超算节点吧(

1
python -m torch.distributed.launch --nproc_per_node 4 tools/train.py --batch 256 --conf configs/yolov6s_finetune.py --data data/data.yaml --device 0,1,2,3

训练过程如图

image-20221227163218376

# 模型评估和测试

执行模型评估,‘output_dir/name’ 替换为上次运行生成的模型文件路径,一般是位于‘runs/trains/expX’,X 为最大值的那个目录,我将该目录下的‘best_ckpt.pt’移动到了 ‘YOLOv6/out/weights’目录下

1
python tools/eval.py --data data/dataset.yaml  --weights out/weights/best_ckpt.pt  --device 0

执行模型测试

1
python tools/infer.py --weights --weights out/weights/best_ckpt.pt --source img.jpg --device 0

会在‘run/inference/exp’目录下生成推断结果的图片

image-20221227222447058

可以看出结果还不错,而这仅仅只是训练了 20 个 epoch 的结果。

继续训练达到 80 个 epoch,执行模型评估,结果如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.430
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.899
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.339
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.430
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.433
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.371
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.480
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.592
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.598
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.597
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.603
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.649
Results saved to runs\train\exp25
Epoch: 80 | [email protected]: 0.8993700836937124 | [email protected]:0.95: 0.43044386965998743

可以看到 mAP 在该数据集上达到了较好水平。

对全部测试图片进行识别并生成结果

1
python tools/infer.py --weights out/weights/best_ckpt.pt --source ../data/test/mchar_test_a --save-txt --save-dir out/results --device 0

image-20221228130603016

1
2
3
4
5
6
7
8
9
10
(YOLOv6) C:\Users\cy\project\yolov6_tianchi\YOLOv6>python tools/infer.py --weights out/weights/best_ckpt.pt --source ../data/test/mchar_test_a --save-txt --save-dir out/results --device 0
Namespace(agnostic_nms=False, classes=None, conf_thres=0.4, device='0', half=False, hide_conf=False, hide_labels=False, img_size=[640, 640], iou_thres=0.45, max_det=1000, name='exp', not_save_img=False, project='runs/inference', save_dir='out/results', save_txt=True, source='../data/test/mchar_test_a', view_img=False, weights='out/weights/best_ckpt.pt', yaml='data/coco.yaml')
Save directory already existed
Loading checkpoint from out/weights/best_ckpt.pt

Fusing model...
Switch model to deploy modality.
C:\Users\cy\miniconda3\envs\YOLOv6\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\TensorShape.cpp:3191.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
15%|███████████▋ | 6074/40000 [06:32<39:05, 14.46it/s]

# 数据整理和提交

经过对全部数据识别,得到了测试数据集的识别标签文件,需要将其整理为天池官方需要的 csv 数据格式。

在‘YOLOv6’目录下执行下面的 python 脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os  

img_dir='../data/test/mchar_test_a/' #测试图片路径
result_dir='out/results/mchar_test_a/' #识别结果路径
out_dir = 'out/results/mchar_test_result/' # 输出的 txt 文件路径


# 获取列表的第二个元素
def takeSecond(elem):
return elem[1]

# 读取
dirs=os.listdir(img_dir)

fp = open(out_dir+'result.csv', mode="w+", encoding="utf-8")
lines = 0
fp.write('file_name'+','+ 'file_code'+ '\n')
for file in dirs:

txtFileName=file.title().split(".")[0]+'.txt'

listCode = []

if os.access(result_dir+txtFileName,os.F_OK):

with open(result_dir+txtFileName, "r") as f:
for line in f.readlines():
tmp = line.split(' ')
listCode.append((tmp[0],float(tmp[1])))

# 按tmp[1]从小到大排序
listCode.sort(key=takeSecond)

theNumber=''
for code in listCode:
theNumber+=code[0]

#保存到文件,格式:fileName,theNumber
fileName=file.title().lower()
fp.write(fileName+','+ theNumber+ '\n')
lines += 1

fp.close()
print(f'total lines:{lines}')

结果竟然出人意料的不错

image-20221228155230835