#tensorflow #deep-learning #neural-network #object-detection #yolo
#tensorflow #глубокое обучение #нейронная сеть #обнаружение объектов #yolo
Вопрос:
Я пытаюсь создать пользовательский детектор объектов, который обнаруживает сорняки на изображениях, сделанных UAS (подробности были сохранены для упрощения вопроса). Я экспериментировал с алгоритмами YOLO, а также использовал модели в зоопарке моделей обнаружения TensorFlow 2, поскольку существует множество вариантов построения CNN.
Делая обзор литературы, я наткнулся на очень интересную статью (Абдулсалам и Ауф 2020) на эту тему, представленную на конференции во Франции. Исследователи сделали кое-что очень интересное, когда они объединили алгоритм YOLO с алгоритмом ResNet-50:
«Этот функциональный подход предполагает принятие результата одного из уровней из ResNet-50 в качестве входных данных для YOLOv2. Этот сетевой уровень из ResNet-50 указан как для извлечения объектов в YOLOv2. В этой работе мы использовали слой ReLU (activation_49_relu) для слоя извлечения объектов. Теперь этот слой будет входом YOLOv2 «.
Делая это, они смогли получить очень высокую точность классификации при отображении заражений сорняками. Глядя на зоопарк TensorFlow, есть возможность использовать предварительно обученную модель (Faster R-CNN ResNet50 V1 1024×1024), которая использует архитектуру ResNet-50. Код приведен ниже:
# Faster R-CNN with Resnet-50 (v1)
# Trained on COCO, initialized from Imagenet classification checkpoint
# This config is TPU compatible.
model {
faster_rcnn {
num_classes: 90
image_resizer {
fixed_shape_resizer {
width: 1024
height: 1024
}
}
feature_extractor {
type: 'faster_rcnn_resnet50_keras'
batch_norm_trainable: true
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
share_box_across_classes: true
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
use_static_shapes: true
use_matmul_crop_and_resize: true
clip_anchors_to_image: true
use_static_balanced_label_sampler: true
use_matmul_gather_in_matcher: true
}
}
train_config: {
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
num_steps: 100000
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED"
fine_tune_checkpoint_type: "classification"
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_square_crop_by_scale {
scale_min: 0.6
scale_max: 1.3
}
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
use_bfloat16: true # works only on TPUs
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
Что именно мне нужно было бы сделать, чтобы интегрировать эту структуру алгоритма ResNet-50 в алгоритм YOLO, как это было сделано в этом примере?
Вот основной файл для файла YOLOv4, который удалось получить, выполняя pip install yolov4
:
import tensorflow as tf
from tensorflow.keras import layers, Model, Sequential
from .common import YOLOConv2D
class _ResBlock(Model):
def __init__(
self,
filters_1: int,
filters_2: int,
activation: str = "mish",
kernel_regularizer=None,
):
super(_ResBlock, self).__init__()
self.conv1 = YOLOConv2D(
filters=filters_1,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv2 = YOLOConv2D(
filters=filters_2,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.add = layers.Add()
def call(self, x):
ret = self.conv1(x)
ret = self.conv2(ret)
x = self.add([x, ret])
return x
class ResBlock(Model):
def __init__(
self,
filters_1: int,
filters_2: int,
iteration: int,
activation: str = "mish",
kernel_regularizer=None,
):
super(ResBlock, self).__init__()
self.iteration = iteration
self.sequential = Sequential()
for _ in range(self.iteration):
self.sequential.add(
_ResBlock(
filters_1=filters_1,
filters_2=filters_2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
)
def call(self, x):
return self.sequential(x)
class CSPResNet(Model):
"""
Cross Stage Partial connections(CSP)
"""
def __init__(
self,
filters_1: int,
filters_2: int,
iteration: int,
activation: str = "mish",
kernel_regularizer=None,
):
super(CSPResNet, self).__init__()
self.pre_conv = YOLOConv2D(
filters=filters_1,
kernel_size=3,
strides=2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
# Do not change the order of declaration
self.part2_conv = YOLOConv2D(
filters=filters_2,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.part1_conv1 = YOLOConv2D(
filters=filters_2,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.part1_res_block = ResBlock(
filters_1=filters_1 // 2,
filters_2=filters_2,
iteration=iteration,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.part1_conv2 = YOLOConv2D(
filters=filters_2,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat1_2 = layers.Concatenate(axis=-1)
self.post_conv = YOLOConv2D(
filters=filters_1,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
def call(self, x):
x = self.pre_conv(x)
part2 = self.part2_conv(x)
part1 = self.part1_conv1(x)
part1 = self.part1_res_block(part1)
part1 = self.part1_conv2(part1)
x = self.concat1_2([part1, part2])
x = self.post_conv(x)
return x
class SPP(Model):
"""
Spatial Pyramid Pooling layer(SPP)
"""
def __init__(self):
super(SPP, self).__init__()
self.pool1 = layers.MaxPooling2D((13, 13), strides=1, padding="same")
self.pool2 = layers.MaxPooling2D((9, 9), strides=1, padding="same")
self.pool3 = layers.MaxPooling2D((5, 5), strides=1, padding="same")
self.concat = layers.Concatenate(axis=-1)
def call(self, x):
return self.concat([self.pool1(x), self.pool2(x), self.pool3(x), x])
class CSPDarknet53(Model):
def __init__(
self,
activation0: str = "mish",
activation1: str = "leaky",
kernel_regularizer=None,
):
super(CSPDarknet53, self).__init__(name="CSPDarknet53")
self.conv0 = YOLOConv2D(
filters=32,
kernel_size=3,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block1 = CSPResNet(
filters_1=64,
filters_2=64,
iteration=1,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block2 = CSPResNet(
filters_1=128,
filters_2=64,
iteration=2,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block3 = CSPResNet(
filters_1=256,
filters_2=128,
iteration=8,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block4 = CSPResNet(
filters_1=512,
filters_2=256,
iteration=8,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.res_block5 = CSPResNet(
filters_1=1024,
filters_2=512,
iteration=4,
activation=activation0,
kernel_regularizer=kernel_regularizer,
)
self.conv72 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv73 = YOLOConv2D(
filters=1024,
kernel_size=3,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv74 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.spp = SPP()
self.conv75 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv76 = YOLOConv2D(
filters=1024,
kernel_size=3,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
self.conv77 = YOLOConv2D(
filters=512,
kernel_size=1,
activation=activation1,
kernel_regularizer=kernel_regularizer,
)
def call(self, x):
x = self.conv0(x)
x = self.res_block1(x)
x = self.res_block2(x)
x = self.res_block3(x)
route1 = x
x = self.res_block4(x)
route2 = x
x = self.res_block5(x)
x = self.conv72(x)
x = self.conv73(x)
x = self.conv74(x)
x = self.spp(x)
x = self.conv75(x)
x = self.conv76(x)
x = self.conv77(x)
route3 = x
return (route1, route2, route3)
class CSPDarknet53Tiny(Model):
def __init__(
self,
activation: str = "leaky",
kernel_regularizer=None,
):
super(CSPDarknet53Tiny, self).__init__(name="CSPDarknet53Tiny")
self.conv0 = YOLOConv2D(
filters=32,
kernel_size=3,
strides=2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv1 = YOLOConv2D(
filters=64,
kernel_size=3,
strides=2,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv2 = YOLOConv2D(
filters=64,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv3 = YOLOConv2D(
filters=32,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv4 = YOLOConv2D(
filters=32,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat3_4 = layers.Concatenate(axis=-1)
self.conv5 = YOLOConv2D(
filters=64,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat2_5 = layers.Concatenate(axis=-1)
self.maxpool5 = layers.MaxPool2D((2, 2), strides=2, padding="same")
self.conv6 = YOLOConv2D(
filters=128,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv7 = YOLOConv2D(
filters=64,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv8 = YOLOConv2D(
filters=64,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat7_8 = layers.Concatenate(axis=-1)
self.conv9 = YOLOConv2D(
filters=128,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat6_9 = layers.Concatenate(axis=-1)
self.maxpool9 = layers.MaxPool2D((2, 2), strides=2, padding="same")
self.conv10 = YOLOConv2D(
filters=256,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv11 = YOLOConv2D(
filters=128,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.conv12 = YOLOConv2D(
filters=128,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat11_12 = layers.Concatenate(axis=-1)
self.conv13 = YOLOConv2D(
filters=256,
kernel_size=1,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
self.concat10_13 = layers.Concatenate(axis=-1)
self.maxpool13 = layers.MaxPool2D((2, 2), strides=2, padding="same")
self.conv14 = YOLOConv2D(
filters=512,
kernel_size=3,
activation=activation,
kernel_regularizer=kernel_regularizer,
)
def call(self, x):
x1 = self.conv0(x)
x1 = self.conv1(x1)
x1 = self.conv2(x1)
_, x2 = tf.split(x1, 2, axis=-1)
x2 = self.conv3(x2)
x3 = self.conv4(x2)
x3 = self.concat3_4([x3, x2])
x3 = self.conv5(x3)
x3 = self.concat2_5([x1, x3])
x1 = self.maxpool5(x3)
x1 = self.conv6(x1)
_, x2 = tf.split(x1, 2, axis=-1)
x2 = self.conv7(x2)
x3 = self.conv8(x2)
x3 = self.concat7_8([x3, x2])
x3 = self.conv9(x3)
x3 = self.concat6_9([x1, x3])
x1 = self.maxpool9(x3)
x1 = self.conv10(x1)
_, x2 = tf.split(x1, 2, axis=-1)
x2 = self.conv11(x2)
x3 = self.conv12(x2)
x3 = self.concat11_12([x3, x2])
route1 = self.conv13(x3)
x3 = self.concat10_13([x1, route1])
x1 = self.maxpool13(x3)
route2 = self.conv14(x1)
return route1, route2