4. RetinaNetยถ
3์ฅ์์๋ ์ ๊ณต๋ ๋ฐ์ดํฐ์ augmentation์ ๊ฐํ๋ ๋ฐฉ๋ฒ๊ณผ ๋ฐ์ดํฐ์ ํด๋์ค๋ฅผ ๋ง๋๋ ๋ฐฉ๋ฒ์ ํ์ธํ์ต๋๋ค. ์ด๋ฒ ์ฅ์์๋ torchvision์์ ์ ๊ณตํ๋ one-stage ๋ชจ๋ธ์ธ RetinaNet์ ํ์ฉํด ์๋ฃ์ฉ ๋ง์คํฌ ๊ฒ์ถ ๋ชจ๋ธ์ ๊ตฌ์ถํด๋ณด๊ฒ ์ต๋๋ค.
4.1์ ๋ถํฐ 4.3์ ๊น์ง๋ 2์ฅ๊ณผ 3์ฅ์์ ํ์ธํ ๋ด์ฉ์ ๋ฐํ์ผ๋ก ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๊ณ ํ๋ จ์ฉ, ์ํ์ฉ ๋ฐ์ดํฐ๋ก ๋๋ ํ ๋ฐ์ดํฐ์ ํด๋์ค๋ฅผ ์ ์ํ๊ฒ ์ต๋๋ค. 4.4์ ์์๋ torchvision API๋ฅผ ํ์ฉํ์ฌ ์ฌ์ ํ๋ จ๋ ๋ชจ๋ธ์ ๋ถ๋ฌ์ค๊ฒ ์ต๋๋ค. 4.5์ ์์๋ ์ ์ด ํ์ต์ ํตํด ๋ชจ๋ธ ํ์ต์ ์งํํ ํ 4.6์ ์์ ์์ธก๊ฐ ์ฐ์ถ ๋ฐ ๋ชจ๋ธ ์ฑ๋ฅ์ ํ์ธํด๋ณด๊ฒ ์ต๋๋ค.
4.1 ๋ฐ์ดํฐ ๋ค์ด๋ก๋ยถ
๋ชจ๋ธ๋ง ์ค์ต์ ์ํด 2.1์ ์ ๋์จ ์ฝ๋๋ฅผ ํ์ฉํ์ฌ ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๊ฒ ์ต๋๋ค.
!git clone https://github.com/Pseudo-Lab/Tutorial-Book-Utils
!python Tutorial-Book-Utils/PL_data_loader.py --data FaceMaskDetection
!unzip -q Face\ Mask\ Detection.zip
Cloning into 'Tutorial-Book-Utils'...
remote: Enumerating objects: 12, done.
remote: Counting objects: 100% (12/12), done.
remote: Compressing objects: 100% (11/11), done.
remote: Total 12 (delta 1), reused 2 (delta 0), pack-reused 0
Unpacking objects: 100% (12/12), done.
Face Mask Detection.zip is done!
4.2 ๋ฐ์ดํฐ ๋ถ๋ฆฌยถ
3.3์ ์์ ํ์ธํ ๋ฐ์ดํฐ ๋ถ๋ฆฌ ๋ฐฉ๋ฒ์ ํ์ฉํ์ฌ ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฆฌํ๊ฒ ์ต๋๋ค.
import os
import random
import numpy as np
import shutil
print(len(os.listdir('annotations')))
print(len(os.listdir('images')))
!mkdir test_images
!mkdir test_annotations
random.seed(1234)
idx = random.sample(range(853), 170)
for img in np.array(sorted(os.listdir('images')))[idx]:
shutil.move('images/'+img, 'test_images/'+img)
for annot in np.array(sorted(os.listdir('annotations')))[idx]:
shutil.move('annotations/'+annot, 'test_annotations/'+annot)
print(len(os.listdir('annotations')))
print(len(os.listdir('images')))
print(len(os.listdir('test_annotations')))
print(len(os.listdir('test_images')))
853
853
683
683
170
170
4.3 ๋ฐ์ดํฐ์ ํด๋์ค ์ ์ยถ
ํ์ดํ ์น ๋ชจ๋ธ์ ํ์ต์ํค๊ธฐ ์ํด์ ๋ฐ์ดํฐ์
ํด๋์ค๋ฅผ ์ ์ํด์ผ ํฉ๋๋ค. torchvision์์ ์ ๊ณตํ๋ ๊ฐ์ฒด ํ์ง ๋ชจ๋ธ์ ํ์ต์ํค๊ธฐ ์ํ ๋ฐ์ดํฐ์
ํด๋์ค์ __getitem__
๋ฉ์๋๋ ์ด๋ฏธ์ง ํ์ผ๊ณผ ๋ฐ์ด๋ฉ ๋ฐ์ค ์ขํ๋ฅผ ๋ฐํ ํฉ๋๋ค. ๋ฐ์ดํฐ์
ํด๋์ค๋ฅผ 3์ฅ์์ ํ์ฉํ ์ฝ๋๋ฅผ ์์ฉํด ์๋์ ๊ฐ์ด ์ ์ ํ๊ฒ ์ต๋๋ค.
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
from bs4 import BeautifulSoup
from PIL import Image
import cv2
import numpy as np
import time
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
from matplotlib import pyplot as plt
import os
def generate_box(obj):
xmin = float(obj.find('xmin').text)
ymin = float(obj.find('ymin').text)
xmax = float(obj.find('xmax').text)
ymax = float(obj.find('ymax').text)
return [xmin, ymin, xmax, ymax]
def generate_label(obj):
if obj.find('name').text == "with_mask":
return 1
elif obj.find('name').text == "mask_weared_incorrect":
return 2
return 0
def generate_target(file):
with open(file) as f:
data = f.read()
soup = BeautifulSoup(data, "html.parser")
objects = soup.find_all("object")
num_objs = len(objects)
boxes = []
labels = []
for i in objects:
boxes.append(generate_box(i))
labels.append(generate_label(i))
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.as_tensor(labels, dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
return target
def plot_image_from_output(img, annotation):
img = img.cpu().permute(1,2,0)
rects = []
for idx in range(len(annotation["boxes"])):
xmin, ymin, xmax, ymax = annotation["boxes"][idx]
if annotation['labels'][idx] == 0 :
rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='r',facecolor='none')
elif annotation['labels'][idx] == 1 :
rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='g',facecolor='none')
else :
rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='orange',facecolor='none')
rects.append(rect)
return img, rects
class MaskDataset(Dataset):
def __init__(self, path, transform=None):
self.path = path
self.imgs = list(sorted(os.listdir(self.path)))
self.transform = transform
def __len__(self):
return len(self.imgs)
def __getitem__(self, idx):
file_image = self.imgs[idx]
file_label = self.imgs[idx][:-3] + 'xml'
img_path = os.path.join(self.path, file_image)
if 'test' in self.path:
label_path = os.path.join("test_annotations/", file_label)
else:
label_path = os.path.join("annotations/", file_label)
img = Image.open(img_path).convert("RGB")
target = generate_target(label_path)
to_tensor = torchvision.transforms.ToTensor()
if self.transform:
img, transform_target = self.transform(np.array(img), np.array(target['boxes']))
target['boxes'] = torch.as_tensor(transform_target)
# tensor๋ก ๋ณ๊ฒฝ
img = to_tensor(img)
return img, target
def collate_fn(batch):
return tuple(zip(*batch))
dataset = MaskDataset('images/')
test_dataset = MaskDataset('test_images/')
data_loader = torch.utils.data.DataLoader(dataset, batch_size=4, collate_fn=collate_fn)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=2, collate_fn=collate_fn)
์ต์ข
์ ์ผ๋ก ํ๋ จ์ฉ ๋ฐ์ดํฐ์ ์ํ์ฉ ๋ฐ์ดํฐ๋ฅผ batch ๋จ์๋ก ๋ถ๋ฌ์ฌ ์ ์๊ฒ torch.utils.data.DataLoader
ํจ์๋ฅผ ํ์ฉํด data_loader
์ test_data_loader
๋ฅผ ๊ฐ๊ฐ ์ ์ํฉ๋๋ค.
4.4 ๋ชจ๋ธ ๋ถ๋ฌ์ค๊ธฐยถ
torchvision
์์๋ ๊ฐ์ข
์ปดํจํฐ ๋น์ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๊ธฐ ์ํ ๋ฅ๋ฌ๋ ๋ชจ๋ธ์ ์ฝ๊ฒ ๋ถ๋ฌ์ฌ ์ ์๋ API๋ฅผ ์ ๊ณตํฉ๋๋ค. torchvision.models
๋ชจ๋์ ํ์ฉํ์ฌ RetinaNet ๋ชจ๋ธ์ ๋ถ๋ฌ์ค๋๋ก ํ๊ฒ ์ต๋๋ค. RetinaNet์ torchvision
0.8.0 ์ด์์์ ์ ๊ณต๋๋ฏ๋ก, ์๋ ์ฝ๋๋ฅผ ํ์ฉํ์ฌ torchvision
๋ฒ์ ์ ๋ง์ถฐ์ค๋๋ค.
!pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Requirement already satisfied: torch==1.7.0+cu101 in /usr/local/lib/python3.6/dist-packages (1.7.0+cu101)
Requirement already satisfied: torchvision==0.8.1+cu101 in /usr/local/lib/python3.6/dist-packages (0.8.1+cu101)
Collecting torchaudio==0.7.0
?25l Downloading https://files.pythonhosted.org/packages/3f/23/6b54106b3de029d3f10cf8debc302491c17630357449c900d6209665b302/torchaudio-0.7.0-cp36-cp36m-manylinux1_x86_64.whl (7.6MB)
|โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ| 7.6MB 11.1MB/s
?25hRequirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (0.8)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (3.7.4.3)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (1.18.5)
Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch==1.7.0+cu101) (0.16.0)
Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from torchvision==0.8.1+cu101) (7.0.0)
Installing collected packages: torchaudio
Successfully installed torchaudio-0.7.0
import torchvision
import torch
torchvision.__version__
'0.8.1+cu101'
torchvision.__version__
๋ช
๋ น์ด๋ฅผ ํตํด ํ์ฌ cuda 10.1 ๋ฒ์ ์์ ์๋ํ๋ torchvision
0.8.1 ๋ฒ์ ์ด ์ค์น ๋์์ ํ์ธํ ์ ์์ต๋๋ค. ๋ค์์ผ๋ก๋ ์๋ ์ฝ๋๋ฅผ ์คํํ์ฌ RetinaNet ๋ชจ๋ธ์ ๋ถ๋ฌ์ต๋๋ค. Face Mask Detection ๋ฐ์ดํฐ์
์ 3๊ฐ์ ํด๋์ค๊ฐ ์กด์ฌํ๋ฏ๋ก num_classes ๋งค๊ฐ๋ณ์๋ฅผ 3์ผ๋ก ์ ์ํ๊ณ , ์ ์ด ํ์ต์ ํ ๊ฒ์ด๊ธฐ ๋๋ฌธ์ backbone ๊ตฌ์กฐ๋ ์ฌ์ ํ์ต ๋ ๊ฐ์ค์น๋ฅผ, ๊ทธ ์ธ ๊ฐ์ค์น๋ ์ด๊ธฐํ ์ํ๋ก ๊ฐ์ ธ์ต๋๋ค. backbone์ ๊ฐ์ฒด ํ์ง ๋ฐ์ดํฐ์
์ผ๋ก ์ ๋ช
ํ COCO ๋ฐ์ดํฐ์
์ ์ฌ์ ํ์ต ๋์ต๋๋ค.
retina = torchvision.models.detection.retinanet_resnet50_fpn(num_classes = 3, pretrained=False, pretrained_backbone = True)
4.5 ์ ์ด ํ์ตยถ
๋ชจ๋ธ์ ๋ถ๋ฌ์์ผ๋ฉด ์๋ ์ฝ๋๋ฅผ ํ์ฉํ์ฌ ์ ์ด ํ์ต์ ์งํํฉ๋๋ค.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_epochs = 30
retina.to(device)
# parameters
params = [p for p in retina.parameters() if p.requires_grad] # gradient calculation์ด ํ์ํ params๋ง ์ถ์ถ
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
len_dataloader = len(data_loader)
# epoch ๋น ์ฝ 4๋ถ ์์
for epoch in range(num_epochs):
start = time.time()
retina.train()
i = 0
epoch_loss = 0
for images, targets in data_loader:
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = retina(images, targets)
losses = sum(loss for loss in loss_dict.values())
i += 1
optimizer.zero_grad()
losses.backward()
optimizer.step()
epoch_loss += losses
print(epoch_loss, f'time: {time.time() - start}')
/usr/local/lib/python3.6/dist-packages/torch/nn/_reduction.py:44: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
warnings.warn(warning.format(ret))
tensor(285.9670, device='cuda:0', grad_fn=<AddBackward0>) time: 242.22558188438416
tensor(268.1001, device='cuda:0', grad_fn=<AddBackward0>) time: 251.5482075214386
tensor(248.4554, device='cuda:0', grad_fn=<AddBackward0>) time: 248.92862486839294
tensor(233.0612, device='cuda:0', grad_fn=<AddBackward0>) time: 249.69438576698303
tensor(234.2285, device='cuda:0', grad_fn=<AddBackward0>) time: 247.88670659065247
tensor(202.4744, device='cuda:0', grad_fn=<AddBackward0>) time: 249.68517541885376
tensor(172.9739, device='cuda:0', grad_fn=<AddBackward0>) time: 250.47061586380005
tensor(125.8968, device='cuda:0', grad_fn=<AddBackward0>) time: 251.4771168231964
tensor(102.0443, device='cuda:0', grad_fn=<AddBackward0>) time: 251.20848298072815
tensor(88.1749, device='cuda:0', grad_fn=<AddBackward0>) time: 251.144877910614
tensor(78.1594, device='cuda:0', grad_fn=<AddBackward0>) time: 251.8066761493683
tensor(73.6921, device='cuda:0', grad_fn=<AddBackward0>) time: 251.669575214386
tensor(69.6965, device='cuda:0', grad_fn=<AddBackward0>) time: 251.8230264186859
tensor(63.9101, device='cuda:0', grad_fn=<AddBackward0>) time: 252.08272123336792
tensor(56.2955, device='cuda:0', grad_fn=<AddBackward0>) time: 252.18470931053162
tensor(56.2638, device='cuda:0', grad_fn=<AddBackward0>) time: 252.03237462043762
tensor(50.2047, device='cuda:0', grad_fn=<AddBackward0>) time: 252.09569120407104
tensor(45.9254, device='cuda:0', grad_fn=<AddBackward0>) time: 253.205641746521
tensor(44.4599, device='cuda:0', grad_fn=<AddBackward0>) time: 253.05651235580444
tensor(43.9277, device='cuda:0', grad_fn=<AddBackward0>) time: 253.1837260723114
tensor(40.4117, device='cuda:0', grad_fn=<AddBackward0>) time: 253.18618297576904
tensor(39.0882, device='cuda:0', grad_fn=<AddBackward0>) time: 253.36814761161804
tensor(35.3732, device='cuda:0', grad_fn=<AddBackward0>) time: 253.41503262519836
tensor(34.0460, device='cuda:0', grad_fn=<AddBackward0>) time: 252.93738174438477
tensor(35.8844, device='cuda:0', grad_fn=<AddBackward0>) time: 253.25822925567627
tensor(33.1177, device='cuda:0', grad_fn=<AddBackward0>) time: 253.25469851493835
tensor(28.4753, device='cuda:0', grad_fn=<AddBackward0>) time: 253.2648823261261
tensor(30.3831, device='cuda:0', grad_fn=<AddBackward0>) time: 253.4244725704193
tensor(28.0954, device='cuda:0', grad_fn=<AddBackward0>) time: 253.57142424583435
tensor(28.5899, device='cuda:0', grad_fn=<AddBackward0>) time: 253.16517424583435
๋ชจ๋ธ ์ฌ์ฌ์ฉ์ ์ํด ์๋ ์ฝ๋๋ฅผ ์คํํ์ฌ ํ์ต๋ ๊ฐ์ค์น๋ฅผ ์ ์ฅํด์ค๋๋ค. torch.save
ํจ์๋ฅผ ํ์ฉํด ์ง์ ํ ์์น์ ํ์ต๋ ๊ฐ์ค์น๋ฅผ ์ ์ฅํ ์ ์์ต๋๋ค.
torch.save(retina.state_dict(),f'retina_{num_epochs}.pt')
retina.load_state_dict(torch.load(f'retina_{num_epochs}.pt'))
<All keys matched successfully>
ํ์ต๋ ๊ฐ์ค์น๋ฅผ ๋ถ๋ฌ์ฌ ๋๋ load_state_dict
๊ณผ torch.load
ํจ์๋ฅผ ์ฌ์ฉํ๋ฉด ๋ฉ๋๋ค. ๋ง์ฝ retina ๋ณ์๋ฅผ ์๋กญ๊ฒ ์ง์ ํ์ ๊ฒฝ์ฐ, ํด๋น ๋ชจ๋ธ์ GPU ๋ฉ๋ชจ๋ฆฌ์ ์ฌ๋ ค์ฃผ์ด์ผ GPU ์ฐ์ฐ์ด ๊ฐ๋ฅํฉ๋๋ค.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
retina.to(device)
4.6 ์์ธกยถ
ํ๋ จ์ด ๋ง๋ฌด๋ฆฌ ๋์์ผ๋ฉด, ์์ธก ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ๋๋ก ํ๊ฒ ์ต๋๋ค. test_data_loader์์ ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ ๋ชจ๋ธ์ ๋ฃ์ด ํ์ต ํ, ์์ธก๋ ๊ฒฐ๊ณผ์ ์ค์ ๊ฐ์ ๊ฐ๊ฐ ์๊ฐํ ํด๋ณด๋๋ก ํ๊ฒ ์ต๋๋ค. ์ฐ์ ์์ธก์ ํ์ํ ํจ์๋ฅผ ์ ์ํ๊ฒ ์ต๋๋ค.
def make_prediction(model, img, threshold):
model.eval()
preds = model(img)
for id in range(len(preds)) :
idx_list = []
for idx, score in enumerate(preds[id]['scores']) :
if score > threshold : #threshold ๋๋ idx ๊ตฌํจ
idx_list.append(idx)
preds[id]['boxes'] = preds[id]['boxes'][idx_list]
preds[id]['labels'] = preds[id]['labels'][idx_list]
preds[id]['scores'] = preds[id]['scores'][idx_list]
return preds
make_prediction
ํจ์์๋ ํ์ต๋ ๋ฅ๋ฌ๋ ๋ชจ๋ธ์ ํ์ฉํด ์์ธกํ๋ ์๊ณ ๋ฆฌ์ฆ์ด ์ ์ฅ๋ผ ์์ต๋๋ค. threshold
ํ๋ผ๋ฏธํฐ๋ฅผ ์กฐ์ ํด ์ ๋ขฐ๋๊ฐ ์ผ์ ์์ค ์ด์์ ๋ฐ์ด๋ฉ ๋ฐ์ค๋ง ์ ํํฉ๋๋ค. ๋ณดํต 0.5 ์ด์์ธ ๊ฐ์ ์ต์ข
์ ํํฉ๋๋ค. ๋ค์์ผ๋ก๋ for๋ฌธ์ ํ์ฉํด test_data_loader
์ ์๋ ๋ชจ๋ ๋ฐ์ดํฐ์ ๋ํด ์์ธก์ ์ค์ํ๊ฒ ์ต๋๋ค.
from tqdm import tqdm
labels = []
preds_adj_all = []
annot_all = []
for im, annot in tqdm(test_data_loader, position = 0, leave = True):
im = list(img.to(device) for img in im)
#annot = [{k: v.to(device) for k, v in t.items()} for t in annot]
for t in annot:
labels += t['labels']
with torch.no_grad():
preds_adj = make_prediction(retina, im, 0.5)
preds_adj = [{k: v.to(torch.device('cpu')) for k, v in t.items()} for t in preds_adj]
preds_adj_all.append(preds_adj)
annot_all.append(annot)
100%|โโโโโโโโโโ| 85/85 [00:24<00:00, 3.47it/s]
tqdm
ํจ์๋ฅผ ํ์ฉํด ์งํ ์ํฉ์ ํ์ธํ๊ณ ์์ต๋๋ค. ์์ธก๋ ๋ชจ๋ ๊ฐ์ preds_adj_all
๋ณ์์ ์ ์ฅ๋์ต๋๋ค. ๋ค์์ผ๋ก๋ ์ค์ ๋ฐ์ด๋ฉ ๋ฐ์ค์ ์์ธกํ ๋ฐ์ด๋ฉ ๋ฐ์ค์ ๋ํ ์๊ฐํ๋ฅผ ์งํํด๋ณด๊ฒ ์ต๋๋ค.
nrows = 8
ncols = 2
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*4, nrows*4))
batch_i = 0
for im, annot in test_data_loader:
pos = batch_i * 4 + 1
for sample_i in range(len(im)) :
img, rects = plot_image_from_output(im[sample_i], annot[sample_i])
axes[(pos)//2, 1-((pos)%2)].imshow(img)
for rect in rects:
axes[(pos)//2, 1-((pos)%2)].add_patch(rect)
img, rects = plot_image_from_output(im[sample_i], preds_adj_all[batch_i][sample_i])
axes[(pos)//2, 1-((pos+1)%2)].imshow(img)
for rect in rects:
axes[(pos)//2, 1-((pos+1)%2)].add_patch(rect)
pos += 2
batch_i += 1
if batch_i == 4:
break
# xtick, ytick ์ ๊ฑฐ
for idx, ax in enumerate(axes.flat):
ax.set_xticks([])
ax.set_yticks([])
colnames = ['True', 'Pred']
for idx, ax in enumerate(axes[0]):
ax.set_title(colnames[idx])
plt.tight_layout()
plt.show()
for๋ฌธ์ ํ์ฉํด 4๊ฐ์ batch, ์ด 8๊ฐ์ ์ด๋ฏธ์ง์ ๋ํ ์ค์ ๊ฐ๊ณผ ์์ธก ๊ฐ์ ์๊ฐํด ๋ณด์์ต๋๋ค. ์ผ์ชฝ ์ด์ด ์ค์ ๋ฐ์ด๋ฉ ๋ฐ์ค์ ๋ผ๋ฒจ๊ณผ ์์น์ด๋ฉฐ ์ค๋ฅธ์ชฝ ์ด์ด ๋ชจ๋ธ์ ์์ธก ๊ฐ์ ๋๋ค. ๋ง์คํฌ ์ฐฉ์ฉ์(์ด๋ก์)๋ ์ ํ์งํ๊ณ ์๋ ๊ฒ์ ๊ด์ธกํ๊ณ ์์ผ๋ฉฐ, ๋ง์คํฌ ๋ฏธ์ฐฉ์ฉ์(๋นจ๊ฐ์)์ ๋ํด์๋ ๊ฐ๋์ฉ ๋ง์คํฌ๋ฅผ ์ฌ๋ฐ๋ฅด์ง ์๊ฒ ์ฐฉ์ฉํ ๊ฒ(์ฃผํฉ์)์ผ๋ก ํ์งํ ๊ฒ์ ๋ณผ ์ ์์ต๋๋ค. ์ ๋ฐ์ ์ธ ๋ชจ๋ธ ์ฑ๋ฅ์ ํ๊ฐํ๊ธฐ ์ํด mean Average Precision (mAP)๋ฅผ ์ฐ์ถํด๋ณด๊ฒ ์ต๋๋ค. mAP๋ ๊ฐ์ฒด ํ์ง ๋ชจ๋ธ์ ํ๊ฐํ ๋ ์ฌ์ฉํ๋ ์งํ์ ๋๋ค.
๋ฐ์ดํฐ ๋ค์ด๋ก๋์ ๋ถ๋ฌ์๋ Tutorial-Book-Utils ํด๋ ๋ด์๋ utils_ObjectDetection.py ํ์ผ์ด ์์ต๋๋ค. ํด๋น ๋ชจ๋ ๋ด์ ์๋ ํจ์๋ฅผ ํ์ฉํด mAP๋ฅผ ์ฐ์ถํด๋ณด๊ฒ ์ต๋๋ค. ์ฐ์ utils_ObjectDetection.py ๋ชจ๋์ ๋ถ๋ฌ์ต๋๋ค.
%cd Tutorial-Book-Utils/
import utils_ObjectDetection as utils
/content/Tutorial-Book-Utils
sample_metrics = []
for batch_i in range(len(preds_adj_all)):
sample_metrics += utils.get_batch_statistics(preds_adj_all[batch_i], annot_all[batch_i], iou_threshold=0.5)
batch ๋ณ mAP๋ฅผ ์ฐ์ถํ๋๋ฐ ํ์ํ ์ ๋ณด๋ฅผ sample_metrics
์ ์ ์ฅ ํ ap_per_class
ํจ์๋ฅผ ํ์ฉํด mAP๋ฅผ ์ฐ์ถํฉ๋๋ค.
true_positives, pred_scores, pred_labels = [torch.cat(x, 0) for x in list(zip(*sample_metrics))] # ๋ฐฐ์น๊ฐ ์ ๋ถ ํฉ์ณ์ง
precision, recall, AP, f1, ap_class = utils.ap_per_class(true_positives, pred_scores, pred_labels, torch.tensor(labels))
mAP = torch.mean(AP)
print(f'mAP : {mAP}')
print(f'AP : {AP}')
mAP : 0.5824690281035101
AP : tensor([0.7684, 0.9188, 0.0603], dtype=torch.float64)
๊ฒฐ๊ณผ๋ฅผ ํด์ํ๋ฉด 0๋ฒ ํด๋์ค์ธ ๋ง์คํฌ๋ฅผ ๋ฏธ์ฐฉ์ฉํ ๊ฐ์ฒด์ ๋ํด์๋ 0.7684 AP๋ฅผ ๋ณด์ด๋ฉฐ 1๋ฒ ํด๋์ค์ธ ๋ง์คํฌ ์ฐฉ์ฉ ๊ฐ์ฒด์ ๋ํด์๋ 0.9188 AP๋ฅผ ๋ณด์ด๊ณ , 2๋ฒ ํด๋์ค์ธ ๋ง์คํฌ๋ฅผ ์ฌ๋ฐ๋ฅด๊ฒ ์ฐฉ์ฉํ์ง ์์ ๊ฐ์ฒด์ ๋ํด์๋ 0.06 AP๋ฅผ ๋ณด์ ๋๋ค.
์ง๊ธ๊น์ง RetinaNet์ ๋ํ ์ ์ด ํ์ต์ ์ค์ํด ์๋ฃ์ฉ ๋ง์คํฌ ํ์ง ๋ชจ๋ธ์ ๋ง๋ค์ด ๋ณด์์ต๋๋ค. ๋ค์ ์ฅ์์๋ Two-Stage Detector์ธ Faster R-CNN์ ํ์ฉํด ํ์ง ์ฑ๋ฅ์ ๋์ฌ๋ณด๊ฒ ์ต๋๋ค.