https://www.bilibili.com/video/av43996494/?p=6
1 补充说明(修正前面代码存在问题):
# 先验框筛选def choose_anchor_boxes(self, predictions, anchor_box, n_box):# predictions列表里面的元素表示:类别预测的置信度, shape = [-1, 10, 10, box_num, num_classes]anchor_box = tf.reshape(anchor_box, [n_box, 4]) # 5d张量改为2d张量 ?=? n_box是所有的锚点框的总数量=批数x10x10x6x4predictions = tf.reshape(predictions, [n_box, 21])[:, 1:] # 第一个0是背景的置信度, 我们不需要,从1往后取classes = tf.argmax(predictions, axis=1) + 1 # 得到preditions的概率最大类别的索引值, +1: 是因为代码里是0开始scores = tf.reduce_max(predictions, axis=1) # 得到最大类别的得分, 当大于阈值就保留下来classes = tf.boolean_mask(classes, scores > self.threshold) # 前面放筛选目标, 后面放筛选条件scores = tf.boolean_mask(scores, scores > self.threshold)anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)return classes, scores, anchor_box# ?=?需要学习的指令:tf.reshape() tf.reduce_max() tf.boolean_mask()
上述代码存在问题:
因为scores会被覆盖
classes = tf.boolean_mask(classes, scores > self.threshold) # 前面放筛选目标, 后面放筛选条件
scores = tf.boolean_mask(scores, scores > self.threshold)
anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)
=>因而使用下面算法代替,进而不影响scores值: ?=?不太清楚此代码的具体实现
filter_mask = scores > self.threshold
classes = tf.boolean_mask(classes, filter_mask) # 前面放筛选目标, 后面放筛选条件
scores = tf.boolean_mask(scores, filter_mask)
anchor_box = tf.boolean_mask(anchor_box, filter_mask)
=>或者改名为scores_nk
classes = tf.boolean_mask(classes, scores > self.threshold) # 前面放筛选目标, 后面放筛选条件
scores_nk = tf.boolean_mask(scores, scores > self.threshold)
anchor_box = tf.boolean_mask(anchor_box, scores > self.threshold)更新之后结果:
# /&#61;&#61;&#61; &#61;&#61;&#61;> 3:先验框筛选 <&#61;&#61;&#61; &#61;&#61;&#61;\def choose_anchor_boxes(self, predictions, anchor_box, n_box):# predictions列表里面的元素表示&#xff1a;类别预测的置信度, shape &#61; [-1, 10, 10, box_num, num_classes]anchor_box &#61; tf.reshape(anchor_box, [n_box, 4]) # 5d张量改为2d张量 ?&#61;? n_box是所有的锚点框的总数量&#61;批数x10x10x6x4prediction &#61; tf.reshape(predictions, [n_box, 21])prediction &#61; prediction[:, 1:] # 第一个0是背景的置信度, 我们不需要,从1往后取classes &#61; tf.argmax(prediction, axis&#61;1) &#43; 1 # 得到preditions的概率最大类别的索引值, 1表示按行找最大&#43;1: 是因为代码里是0开始scores &#61; tf.reduce_max(prediction, axis&#61;1) # 得到最大类别的得分, 当大于阈值就保留下来(下面后话)filter_mask &#61; scores > self.thresholdclasses &#61; tf.boolean_mask(classes, filter_mask) # 前面放筛选目标, 后面放筛选条件scores &#61; tf.boolean_mask(scores, filter_mask)anchor_box &#61; tf.boolean_mask(anchor_box, filter_mask)return classes, scores, anchor_box# 需要学习的指令&#xff1a;tf.reshape() tf.reduce_max() tf.boolean_mask()# \&#61;&#61;&#61; &#61;&#61;&#61;> 3:先验框筛选 <&#61;&#61;&#61; &#61;&#61;&#61;/
# \&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;> 先验框生成*解码*先验框筛选-start <&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;/
2 先验框排序
# /&#61;&#61;&#61; &#61;&#61;&#61;> 1:先验框排序 <&#61;&#61;&#61; &#61;&#61;&#61;\def bboxes_sort(self, classes, scores, bboxes, top_k&#61;400):idxes &#61; np.argsort(-scores) # 先把scores从高到低排序,然后按照scores的顺序排序类别,分数,框classes &#61; classes[idxes][:top_k] # 取前面400分数最高的scores &#61; scores[idxes][:top_k]bboxes &#61; bboxes[idxes][:top_k]return classes, scores, bboxes
# \&#61;&#61;&#61; &#61;&#61;&#61;> 1:先验框排序 <&#61;&#61;&#61; &#61;&#61;&#61;/np.argsort()
3 IOU
# /&#61;&#61;&#61; &#61;&#61;&#61;> 2:计算IOU <&#61;&#61;&#61; &#61;&#61;&#61;\def bboxes_iou(self, bboxes1, bboxes2):bboxes1 &#61; np.transpose(bboxes1)bboxes2 &#61; np.transpose(bboxes2)# 计算两个box的交集, 交集左上角的点取两个box的max, 交集右下角的点取两个box的min# bboxes1[0]里面的元素:y1x1y2x2int_ymin &#61; np.maximun(bboxes1[0], bboxes2[0]) # 交叠框左上角的点y坐标 &#61; 两个框左上角点的y坐标相比较, 取最大值int_xmin &#61; np.maximun(bboxes1[1], bboxes2[1]) # 交叠框左上角的点x坐标 &#61; 两个框左上角点的x坐标相比较, 取最大值int_ymax &#61; np.minimun(bboxes1[2], bboxes2[2]) # 交叠框右下角的点y坐标 &#61; 两个框左上角点的y坐标相比较, 取最小值int_xmax &#61; np.minmun(bboxes1[3], bboxes2[3]) # 交叠框右下角的点x坐标 &#61; 两个框左上角点的x坐标相比较, 取最小值# 计算两个box交集的wh: 如果两个box没有交集, 那么w,h为0(计算方式&#xff1a;w,h为负值, 跟0比较取最大)int_h &#61; np.maximun(int_ymax - int_ymin, 0.)int_w &#61; np.maximun(int_xmax - int_xmin, 0.)# 计算IOUint_vol &#61; int_h * int_w # 交集面积vol1 &#61; (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[0]) # bboxes1面积vol2 &#61; (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[0]) # bboxes2面积iou &#61; int_vol / (vol1 &#43; vol2 - int_vol) # IOU &#61; 交集/并集
# \&#61;&#61;&#61; &#61;&#61;&#61;> 2:计算IOU <&#61;&#61;&#61; &#61;&#61;&#61;/np.transpose()
&#xff14;非极大值抑制
https://mp.csdn.net/postedit/98534699
# /&#61;&#61;&#61; &#61;&#61;&#61;> 3:非极大值抑制nms <&#61;&#61;&#61; &#61;&#61;&#61;\def bboxes_nms(self, classes, scores, bboxes, nms_threshold&#61;0.5):keep_bboxes &#61; np.ones(scores.shape, dtype&#61;np.bool)for i in range(scores.size - 1):if keep_bboxes[i]:overlap &#61; self.bboxes_iou(bboxes[i], bboxes[(i &#43; 1):]) # 每个bboxes[i]都与其他的计算得到ioukeep_overlap &#61; np.logical_or(overlap
# \&#61;&#61;&#61; &#61;&#61;&#61;> 3:非极大值抑制nms <&#61;&#61;&#61; &#61;&#61;&#61;/ # /&#61;&#61;&#61; &#61;&#61;&#61;> 3:非极大值抑制nms <&#61;&#61;&#61; &#61;&#61;&#61;\
&#61;> def bboxes_nms(self, classes, scores, bboxes, nms_threshold&#61;0.5):
&#61;> keep_bboxes &#61; np.ones(scores.shape, dtype&#61;np.bool)
>>> scores.shape &#61; array([[1, 2], [3, 4]])
>>> keep_bboxes &#61; np.ones(a.shape, dtype&#61;np.bool)
>>> keep_bboxes
array([[ True, True],
[ True, True]], dtype&#61;bool)
&#61;> for i in range(scores.size - 1):
>>> a.size &#61; 4
&#61;> if keep_bboxes[i]:
&#61;> overlap &#61; self.bboxes_iou(bboxes[i], bboxes[(i &#43; 1):]) # 每个bboxes[i]都与其他的计算得到iou
这里自动广播&#xff08;&#xff09;broadcost &#61;> bboxes[(i &#43; 1):]是多个框
overlap是一个
示例如下&#xff1a;
# &#61;&#61;&#61;> 解释上述代码 <&#61;&#61;&#61;
import numpy as np
def bboxes_iou(a, b):iou &#61; a &#43; breturn iou
if __name__ &#61;&#61; &#39;__main__&#39;:c &#61; np.array([1, 2, 3, 4])for i in range(3):overlap &#61; bboxes_iou(c[i], c[(i&#43;1):])print(overlap)
>>>[3 4 5]
>>>[5 6]
>>>[7]&#61;> keep_overlap &#61; np.logical_or(overlap
# 逻辑或
交并比<阈值的 or 两者类别不同的, 会被选中到keep_voerlap里面, 这些都是不要的
# &#61;&#61;&#61;> 解释上述代码 <&#61;&#61;&#61;
import numpy as np
def bboxes_iou(a, b):iou &#61; a &#43; breturn iou
if __name__ &#61;&#61; &#39;__main__&#39;:a &#61; np.array([1, 2, 3, 1])classes &#61; np.array([1, 1, 2, 1])for i in range(3):overlap &#61; bboxes_iou(a[i], a[(i&#43;1):])# print(&#39;overlap:&#39;, overlap)# print(&#39;classes[i:] !&#61; classes[i]:&#39;, classes[i:] !&#61; classes[i])keepoverlap &#61; np.logical_or(overlap <3, classes[(i&#43;1):] !&#61; classes[i])# print(&#39;keepoverlap:&#39;, keepoverlap)# print(&#39;&#61;&#61;&#61;> 换行 <&#61;&#61;&#61;&#39;)
# &#61;&#61;&#61;> 输出 <&#61;&#61;&#61;
overlap: [3 4 2]
classes[i:] !&#61; classes[i]: [False True False]
keepoverlap: [False True True]
# &#61;&#61;&#61;> 换行 <&#61;&#61;&#61;
overlap: [5 3]
classes[i:] !&#61; classes[i]: [ True False]
keepoverlap: [ True False]
# &#61;&#61;&#61;> 换行 <&#61;&#61;&#61;
overlap: [4]
classes[i:] !&#61; classes[i]: [ True]
keepoverlap: [ True]
# &#61;&#61;&#61;> 换行 <&#61;&#61;&#61;
&#61;> keep_bboxes[(i&#43;1):] &#61; np.logical_and(keep_bboxes[(i&#43;1):], keep_overlap) # 逻辑与
# keep_bboxes[(i&#43;1):]是当前bbox后面的一些框, keep_overlap是数组里面有true false, 与操作留下想要的
c &#61; np.ones([2, ], dtype&#61;np.bool)
print(c)
a &#61; np.array([True, False])
b &#61; np.array([1, 2])
c &#61; np.logical_and(a, b)
print(c)# [True True]# [True False]&#61;> idxes &#61; np.where(keep_bboxes)
# https://www.cnblogs.com/massquantity/p/8908859.html
a &#61; np.array([True, False])
b &#61; np.array([1, 2])
print(np.where(a))# (array([0]),)
&#61;> return classes[idxes], scores[idxes], bboxes[idxes]
# \&#61;&#61;&#61; &#61;&#61;&#61;> 3:非极大值抑制nms <&#61;&#61;&#61; &#61;&#61;&#61;/&#61;>此部分指令&#xff1a;
np.ones(scores.shape, dtype&#61;np.bool)np.logical_or(overlap
idxes &#61; np.where(keep_bboxes)
需要学的python命令:
1 np.argsort()
2 np.transpose()
3 np.ones(scores.shape, dtype&#61;np.bool)
>>> a &#61; np.array([[1,2],[3,4]])
>>> a
array([[1, 2],[3, 4]])
>>> b &#61; np.ones(a.shape, dtype&#61;np.bool)
>>> b
array([[ True, True],[ True, True]], dtype&#61;bool)
>>> a.size
44 np.logical_or(overlap
5 keep_bboxes[(i&#43;1):] &#61; np.logical_and(keep_bboxes[(i&#43;1):], keep_overlap)
>>> np.logical_and(True, False)
False
>>> np.logical_and([True, False], [False, False])
array([False, False], dtype&#61;bool)
>>> x &#61; np.arange(5)
>>> x
array([0, 1, 2, 3, 4])
>>> np.logical_and(x>1, x<4)
array([False, False, True, True, False], dtype&#61;bool)6 idxes &#61; np.where(keep_bboxes)
当前最终版&#xff1a;
#!usr/bin/python
# -*- coding: utf-8 -*-
# Creation Date: 2019/7/10
import tensorflow as tf
import numpy as np
import cv2
&#39;&#39;&#39; 注释说明
# 1 /&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;>xxxx<&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;\ 为一级标题-start|&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;>xxxx 为一级标题的补充说明\&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;>xxxx<&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;/ 为一级标题-end# 2 /&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;>xxxx<&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;\ 为二级标题-start|&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;>xxxx 为二级标题的补充说明\&#61;&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;>xxxx<&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;/ 为二级标题-end# 3 /&#61;&#61;&#61; &#61;&#61;&#61;>xxxx<&#61;&#61;&#61; &#61;&#61;&#61;\ 为三级标题-start|&#61;&#61;&#61; &#61;&#61;&#61;>xxxx 为三级标题的补充说明\&#61;&#61;&#61; &#61;&#61;&#61;>xxxx<&#61;&#61;&#61; &#61;&#61;&#61;/ 为三级标题-end# 4 /&#61;&#61;&#61;>xxxx<&#61;&#61;&#61;\ 为四级标题 &#61;&#61;&#61;>xxxx<&#61;&#61;&#61; 为四级标题的简化形式|&#61;&#61;&#61;> xxx 为四级标题的补充说明\&#61;&#61;&#61;>xxxx<&#61;&#61;&#61;/ 为四级标题-end# 5 &#61;&#61;> or &#61;> 为重点-特殊情况# 6 ?&#61;? 存在疑惑&#39;&#39;&#39;class ssd(object):def __init__(self):# &#61;&#61;&#61;>完善&#xff1a;构造函数的参数<&#61;&#61;&#61;self.num_boxes &#61; [] # 统计锚点框的个数self.feaeture_map_size &#61; [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)] # 特征图的大小self.classes &#61; ["aeroplane", "bicycle", "bird", "boat", "bottle","bus", "car", "cat", "chair", "cow","diningtable", "dog", "horse", "motorbike", "person","pottedplant", "sheep", "sofa", "train", "tvmonitor"] # 这里有20个&#xff0c;加上背景是21个类别self.feature_layers &#61; [&#39;block4&#39;, &#39;block7&#39;, &#39;block8&#39;, &#39;block9&#39;, &#39;block10&#39;, &#39;block11&#39;] # 用于检测的特征层的名字self.img_size &#61; (300, 300) # 图片的大小self.num_classes &#61; 21 # 类别的个数,背景也算一类, 第一个类似是:&#39;bg&#39;背景self.boxes_len &#61; [4, 6, 6, 6, 4, 4] # 6个特征图生成的一组锚点框的框的个数&#xff0c;4 10 11层是4个一组,其它的是6个一组# block4: 38x38大小的特征图就会生成 38x38x4 个锚点框 5766# block7: 19x19大小的特征图就会生成 19x19x6 个锚点框 2166# block8: 10x10大小的特征图就会生成 10x10x6 个锚点框 600# block9: 5x5大小的特征图就会生成 5x5x6 个锚点框 150# block10: 3x3大小的特征图就会生成 3x3x4 个锚点框 36# block11: 1x1大小的特征图就会生成 1x1x4 个锚点框 4# 一共8732个锚点框 &#xff1f;&#61;&#xff1f;我算出来的是8722个self.isL2norm &#61; [True, False, False, False, False, False] # block4比较靠前, 因为norm太大需要L2normself.anchor_size &#61; [(21., 45.), (45., 99.), (99., 153.), (153., 207.), (207., 261.), (261., 315.)]self.anchor_ratios &#61; [[2, .5], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5, 3, 1./3], [2, .5], [2, .5]]self.anchor_steps &#61; [8, 16, 32, 64, 100, 300]self.prior_scaling &#61; [0.1, 0.1, 0.2, 0.2] # 特征先验框缩放比例: 0.1:xy坐标的缩放比, 0.2:wy坐标的缩放比self.n_boxes &#61; [5776, 2166, 600, 150, 36, 4]# 一共8732个 # ?&#61;?怎么计算的, 每个featuremap(特征图)先验框的个数# 4层: 38x38x4# 7层: 19x19x6# 8层: 10x10x6# 9层: 5x5x6# 10层: 3x3x4# 11层: 1x1x4self.threshold &#61; 0.2 # 原文中是0.5, 为了检测到更多的物体设置为0.2# /&#61;&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;> ssd网络架构部分 <&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;&#61;\# &#61;&#61;&#61;&#61; &#61;&#61;&#61;>l2正则化<&#61;&#61;&#61; &#61;&#61;&#61;&#61;def l2norm(self, x, scale, trainable&#61;True, scope&#61;&#39;L2Normalization&#39;):n_channels &#61; x.get_shape().as_list()[-1] # 通道数. 得到形状,变成列表,取后一个l2_norm &#61; tf.nn.l2_normalize(x, dim&#61;[3], epsilon&#61;1e-12) # 只对每个像素点在channels上做归一化with tf.variable_scope(scope):gamma &#61; tf.get_variable("gamma", shape&#61;[n_channels, ], dtype&#61;tf.float32,initializer&#61;tf.constant_initializer(scale), # ?&#61;?为何作者没有这步骤trainable&#61;trainable)return l2_norm * gamma# /&#61;&#61;&#61; &#61;&#61;&#61;>下面:定义cnn所需组件<&#61;&#61;&#61; &#61;&#61;&#61;&#61;\# |&#61;&#61;&#61; &#61;&#61;&#61;conv2d, max_pool2d, pad2d, dropout# |&#61;&#61;&#61; &#61;&#61;&#61;tf.layers.conv2d(inputs&#61;xxx, filters&#61;xxx, kernel_size&#61;xxx,# stride&#61;xxx, padding&#61;xxx, dilation_rate&#61;xxx,# name&#61;xxx, activation&#61;xxx)# |&#61;&#61;&#61; &#61;&#61;&#61;tf.layers.max_pooling2d(inputs&#61;xxx, pool_size&#61;xxx,# strides&#61;xxx, padding&#61;xxx,# name&#61;xxx)# |&#61;&#61;&#61; &#61;&#61;&#61;tf.pad(x, paddings&#61;xxx)# |&#61;&#61;&#61; &#61;&#61;&#61;tf.layers.dropout(inputs&#61;xxx, rate&#61;xxx)# |&#61;&#61;&#61; &#61;&#61;&#61;定义一个卷积的操作: 1输入 2卷积核个数 3卷积核大小| 4步长 5padding 6膨胀| 7激活函数 8名字def conv2d(self, x, filter, k_size, # 输入x, 卷积核的个数filter, k_size卷积核是几*几stride&#61;[1, 1], padding&#61;&#39;same&#39;, dilation&#61;[1, 1], # # 步长stride, padding, 空洞卷积指数这里1代表正常卷积activation&#61;tf.nn.relu, scope&#61;&#39;conv2d&#39;): # 激活函数relu, 名字scopereturn tf.layers.conv2d(inputs&#61;x, filters&#61;filter, kernel_size&#61;k_size,strides&#61;stride, dilation_rate&#61;dilation, padding&#61;padding,name&#61;scope, activation&#61;activation)def max_pool2d(self, x, pool_size,stride,scope&#61;&#39;max_pool2d&#39;): # 我猜padding是vaildreturn tf.layers.max_pooling2d(inputs&#61;x, pool_size&#61;pool_size, strides&#61;stride, padding&#61;&#39;valid&#39;, name&#61;scope)# 用于填充s&#61;2的第8,9层. 从6层往后的卷积层需要自己填充, 不要用它自带的填充.def pad2d(self, x,pad):return tf.pad(x, paddings&#61;[[0, 0], [pad, pad], [pad, pad], [0, 0]])def dropout(self, x, d_rate&#61;0.5):return tf.layers.dropout(inputs&#61;x, rate&#61;d_rate)def ssd_prediction(self, x, num_classes, box_num, isL2norm, scope&#61;&#39;multibox&#39;):reshape &#61; [-1] &#43; x.getshape().as_list()[1:-1] # 去除第1,4数,拿到2,3个数,变成列表.即去除第一个和最后一个得到shape# python中 a &#61; [1,2,3,4], b &#61; a[1:3]&#61;a[1:-1], c &#61; [-1] &#43; b# print(b) &#61; [2,3], print(c) &#61; [-1, 1, 2, 3, 4]# block8为例&#xff1a;shape &#61; (?, 10, 10, 512)需要把第2,3个数拿出来# 前面的-1表示batch, 因为不知道是多少在这里tf一般写-1# reshape &#61; [-1, 10, 10]with tf.variable_scope(scope): # 开始进行卷积if isL2norm:x &#61; self.l2norm(x) # 先判断是否需要归一化# &#61;&#61;>预测位置:坐标和大小&#xff0c;回归问题:不需softmaxlocation_pred &#61; self.conv2d(x, filter&#61;box_num * 4, k_size&#61;[3 * 3],activation&#61;None, scope&#61;&#39;conv_loc&#39;)&#39;&#39;&#39;filter:卷积核的个数&#61;一个锚点多少框 x 一个框对应的4个数据xywh, 卷积核3x3,不需要激活函数,默认def conv2d有激活函数的&#39;&#39;&#39;location_pred &#61; tf.reshape(location_pred, reshape &#43; [box_num, 4]) # 每个中心点生成一个锚点框?&#61;?# reshape &#43; [box_num, 4] &#61; [-1, 10, 10, box_num, 4]# &#61;&#61;>预测类别:分类问题:需要softmaxclass_pred &#61; self.conv2d(x, filter&#61;box_num * num_classes, k_size&#61;3 * 3,activation&#61;None, scope&#61;&#39;conv_cls&#39;)&#39;&#39;&#39;filter:卷积核的个数&#61;一个锚点多少框 x 一个框对应的21个类别, 卷积核3x3,不需要激活函数,默认def conv2d是有激活函数的&#39;&#39;&#39;class_pred &#61; tf.shape(class_pred, reshape &#43; [box_num, num_classes]) # ?&#61;?# reshape &#43; [box_num, num_classes] &#61; [-1, 10, 10, box_num, num_classes]print(location_pred, class_pred)return location_pred, class_pred# \&#61;&#61;&#61; &#61;&#61;&#61;>上面:定义cnn所需组件<&#61;&#61;&#61; &#61;&#61;&#61;&#61;/# /&#61;&#61;&#61; &#61;&#61;&#61;>下面:具体网络架构-start<&#61;&#61;&#61; &#61;&#61;&#61;\def set_net(self):check_points &#61; {} # 装特征层的字典,用于循环迭代predictions &#61; []locations &#61; []x &#61; tf.placeholder(dtype&#61;tf.float32, shape&#61;[None, 300, 300, 3])with tf.variable_scope(&#39;ssd_300_vgg&#39;):# &#61;&#61;&#61;>VGG前5层<&#61;&#61;&#61;# b1net &#61; self.conv2d(x, filter&#61;64, k_size&#61;[3, 3], scope&#61;&#39;conv1_1&#39;) # 64个3*3卷积核, s&#61;1 默认&#xff0c;标准卷积net &#61; self.conv2d(net, 64, [3, 3], scope&#61;&#39;conv1_2&#39;) # 64个3*3卷积核, s&#61;1默认net &#61; self.max_pool2d(net, pool_size&#61;[2, 2], stride&#61;[2, 2], scope&#61;&#39;pool1&#39;) # 池化层2*2卷积核, s&#61;2 默认&#xff0c;池化层一般都是2# b2net &#61; self.conv2d(net, filter&#61;128, k_size&#61;[3, 3], scope&#61;&#39;conv2_1&#39;)net &#61; self.conv2d(net, 128, [3, 3], scope&#61;&#39;conv2_2&#39;)net &#61; self.max_pool2d(net, pool_size&#61;[2, 2], stride&#61;[2, 2], scope&#61;&#39;pool2&#39;)# b3net &#61; self.conv2d(net, filter&#61;256, k_size&#61;[3, 3], scope&#61;&#39;conv3_1&#39;)net &#61; self.conv2d(net, 256, [3, 3], scope&#61;&#39;conv3_2&#39;)net &#61; self.conv2d(net, 256, [3, 3], scope&#61;&#39;conv3_3&#39;)net &#61; self.max_pool2d(net, pool_size&#61;[2, 2], stride&#61;[2, 2], scope&#61;&#39;pool3&#39;)# b4 &#61;>第1个检测层net &#61; self.conv2d(net, filter&#61;512, k_size&#61;[3, 3], scope&#61;&#39;conv4_1&#39;)net &#61; self.conv2d(net, 512, [3, 3], scope&#61;&#39;conv4_2&#39;)net &#61; self.conv2d(net, 512, [3, 3], scope&#61;&#39;conv4_3&#39;)check_points[&#39;block4&#39;] &#61; netnet &#61; self.max_pool2d(net, pool_size&#61;[2, 2], stride&#61;[2, 2], scope&#61;&#39;pool4&#39;)# b5 关键部分来了&#xff0c;这里与vgg不同了net &#61; self.conv2d(net, filter&#61;512, k_size&#61;[3, 3], scope&#61;&#39;conv5_1&#39;)net &#61; self.conv2d(net, 512, [3, 3], scope&#61;&#39;conv5_2&#39;)net &#61; self.conv2d(net, 512, [3, 3], scope&#61;&#39;conv5_3&#39;)net &#61; self.max_pool2d(net, pool_size&#61;[3, 3], stride&#61;[1, 1], scope&#61;&#39;pool5&#39;) # &#61;>池化层3*3核&#xff0c; 步长变成1*1# &#61;&#61;&#61;>卷积层&#xff0c;代替VGG全连接层<&#61;&#61;&#61;# b6 conv6: 3x3x1024-d6net &#61; self.conv2d(net, filter&#61;1024, k_size&#61;[3, 3], dilation&#61;[6, 6], scope&#61;&#39;conv6&#39;)# &#61;> 个数1024, dilation&#61;[6, 6]# b7 conv7: 1x1x1024 &#61;>第2个检测层net &#61; self.conv2d(net, filter&#61;1024, k_size&#61;[1, 1], scope&#61;&#39;conv7&#39;)# &#61;> 个数1024, 卷积核是[1, 1]check_points[&#39;block7&#39;] &#61; net# b8 conv8_1: 1x1x256; conv8_2: 3x3x512-s2-vaild &#61;>第3个检测层net &#61; self.conv2d(net, 256, [1, 1], scope&#61;&#39;conv8_1x1&#39;) # &#61;>个数256&#xff0c;卷积核1x1net &#61; self.conv2d(self.pad2d(net, 1), 512, [3, 3], [2, 2], scope&#61;&#39;conv8_3x3&#39;, padding&#61;&#39;valid&#39;)# &#61;>个数512, 卷积核3x3, 步长2, &#39;valid&#39;check_points[&#39;block8&#39;] &#61; net# b9 conv9_1: 1x1x128 conv8_2: 3x3x256-s2-vaild &#61;>第4个检测层net &#61; self.conv2d(net, 128, [1, 1], scope&#61;&#39;conv9_1x1&#39;) # &#61;>个数128,卷积核1x1net &#61; self.conv2d(self.pad2d(net, 1), 256, [3, 3], [2, 2], scope&#61;&#39;conv9_3x3&#39;, padding&#61;&#39;valid&#39;)# &#61;>个数256,卷积核3x3&#xff0c;步长2x2, validcheck_points[&#39;block9&#39;] &#61; net# b10 conv10_1: 1x1x128 conv10_2: 3x3x256-s1-valid &#61;>第5个检测层net &#61; self.conv2d(net, 128, [1, 1], scope&#61;&#39;conv10_1x1&#39;) # &#61;>个数128,卷积核1x1net &#61; self.conv2d(net, 256, [3, 3], scope&#61;&#39;conv10_3x3&#39;, padding&#61;&#39;valid&#39;)# &#61;>个数256&#xff0c;validcheck_points[&#39;block10&#39;] &#61; net# b11 conv11_1: 1x1x128 conv11_2: 3x3x256-s1-valid &#61;>第6检测层net &#61; self.conv2d(net, 128, [1, 1], scope&#61;&#39;conv11_1x1&#39;) # &#61;>个数128,卷积核1x1net &#61; self.conv2d(net, 256, [3, 3], scope&#61;&#39;conv11_3x3&#39;, padding&#61;&#39;valid&#39;)# &#61;>个数256&#xff0c; validcheck_points[&#39;block11&#39;] &#61; netfor i, j in enumerate(self.feature_layers): # 枚举特征层i表示第几个, j是名字如&#39;block4&#39;loc, cls &#61; self.ssd_prediction(x&#61;check_points[j],num_classes&#61;self.num_classes,box_num&#61;self.boxes_len[i],isL2norm&#61;self.isL2norm[i],scope&#61;j &#43; &#39;_box&#39;)predictions.append(tf.nn.softmax(cls)) # 需要softmaxlocations.append(loc) # 不需要print(check_points) # 检查网络的结构, eg:block8: (?, 10, 10, 512)print(locations, predictions)return locations, predictions, x# locations是5d张量,最后一个维度是4,里面装着预测出来的 0x 1y 2h 3w# locations是列表, 里面的元素形如&#xff1a;[-1, 10, 10, box_num, 4]# \&#61;&#61;&#61; &#61;&#61;&#61;>上面:具体网络架构<&#61;&#61;&#61; &#61;&#61;&#61;/
# \&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;> ssd网络架构部分-end <&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;/# /&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;> 先验框生成*解码*先验框筛选-start <&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;\# /&#61;&#61;&#61; &#61;&#61;&#61;> 1&#xff1a;先验框生成-start <&#61;&#61;&#61; &#61;&#61;&#61;\# |&#61;&#61;&#61; 以block8为例10x10, 生成先验框# |&#61;&#61;&#61; h0, h1, h2, h3, h4, h5:# |&#61;&#61;&#61; 小正方形 大正方形 1/2长方形 2/1长方形 1/3长方形 3/1长方形def ssd_anchor_layer(self, img_size, feature_map_size,anchor_size, anchor_ratio, anchor_step,box_num, offset&#61;0.5):# 提取feature map 的每一个坐标y, x &#61; np.mgrid[0: feature_map_size[0], 0:feature_map_size[1]] # 以block8为例这里是 0:10,0:10# >>> y, x&#61; np.mgrid[0:10, 0:10]# >>> print(x)# [[0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]# [0 1 2 3 4 5 6 7 8 9]]# >>> print(y)# [[0 0 0 0 0 0 0 0 0 0]# [1 1 1 1 1 1 1 1 1 1]# [2 2 2 2 2 2 2 2 2 2]# [3 3 3 3 3 3 3 3 3 3]# [4 4 4 4 4 4 4 4 4 4]# [5 5 5 5 5 5 5 5 5 5]# [6 6 6 6 6 6 6 6 6 6]# [7 7 7 7 7 7 7 7 7 7]# [8 8 8 8 8 8 8 8 8 8]# [9 9 9 9 9 9 9 9 9 9]]y &#61; (y.astype(np.float32) &#43; offset) * anchor_step / img_size[0]x &#61; (x.astype(np.float32) &#43; offset) * anchor_step / img_size[1]# 计算两个长宽比为1的 h, wh &#61; np.zeros((box_num,), np.float32)w &#61; np.zeros((box_num,), np.float32)# h >>> array([ 0., 0., 0., 0., 0., 0.], dtype&#61;float32)# w >>> array([ 0., 0., 0., 0., 0., 0.], dtype&#61;float32)h[0] &#61; anchor_size[0] / img_size[0] # 小正方形w[0] &#61; anchor_size[0] / img_size[0]h[1] &#61; (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[0] # 大正方形w[1] &#61; (anchor_size[0] * anchor_size[1]) ** 0.5 / img_size[0]for i, j in enumerate(anchor_ratio):h[i &#43; 2] &#61; anchor_size[0] / img_size[0] / (j ** 0.5)w[i &#43; 2] &#61; anchor_size[0] / img_size[0] * (j ** 0.5)return y, x, h, w# h[0]&#61;99/300 w[0]&#61;99/300&#xff1a; 小 正方型 h w# h[1]&#61;sqrt(99*513)/300 w[1]&#61;sqrt(99*513)/300&#xff1a; 大 正方型 h w# h[2]&#61;99/300/sqrt(2) w[2]&#61;99/300*sqrt(2)&#xff1a; 横向 - 长方型2/1 对应 anchor_ratio[0] &#61; 2# h[3]&#61;99/300/sqrt(0.5) w[2]&#61;99/300*sqrt(0.5)&#xff1a; 纵向 | 长方型1/2 对应 anchor_ratio[1] &#61; .5# h[4]&#61;99/300/sqrt(3) w[2]&#61;99/300*sqrt(3)&#xff1a; 横向 - 长方型3/1 对应 anchor_ratio[2] &#61; 3# h[5]&#61;99/300/sqrt(1/3) w[2]&#61;99/300*sqrt(1/3)&#xff1a; 纵向 | 长方型1/3 对应 anchor_ratio[3] &#61; 1./3# h[0]正方型 h[1]正方型 h[2]长方型2/1 h[3]长方型1/2 h[4]长方型3/1 h[5]长方型1/3# h &#61; array([ 0.33000001, 0.41024384, 0.23334524, 0.46669048, 0.19052559, 0.57157677], dtype&#61;float32)# w &#61; array([ 0.33000001, 0.41024384, 0.46669048, 0.23334524, 0.57157677, 0.19052559], dtype&#61;float32)# \&#61;&#61;&#61; &#61;&#61;&#61;> 1&#xff1a;先验框生成-end <&#61;&#61;&#61; &#61;&#61;&#61;/# /&#61;&#61;&#61; &#61;&#61;&#61;> 2&#xff1a;解码-start <&#61;&#61;&#61; &#61;&#61;&#61;\def ssd_decode(self, location, box, prior_scaling):y_a, x_a, h_a, w_a &#61; boxcx &#61; location[:, :, :, :, 0] * w_a * prior_scaling[0] &#43; x_a # ?&#61;?这部分应该是w hcy &#61; location[:, :, :, :, 1] * h_a * prior_scaling[1] &#43; y_a# locations是set_net网络的返回值# locations是5d张量,最后一个维度是4,里面装着预测出来的 0x 1y 2h 3w# locations是列表, 里面的元素形如&#xff1a;[-1, 10, 10, box_num, 4]# 在这里location被带入了locations[2], 也就是block8层w &#61; w_a * tf.exp(location[:, :, :, :, 2] * prior_scaling[2])h &#61; h_a * tf.exp(location[:, :, :, :, 3] * prior_scaling[3]) # 实际格子的高度print(cx, cy, w, h)bboxes &#61; tf.stack([cy - h/2.0, cx - w/2.0, cy &#43; h/2.0, cx &#43; w/2.0], axis&#61;-1)# 特征图比较多&#xff0c;需要叠加起来# 左上角点的y坐标 cy-h/2, x坐标cx-w/2. 右下角点的y坐标 cy&#43;h/2, x坐标cx&#43;w/2print(bboxes)return bboxes# \&#61;&#61;&#61; &#61;&#61;&#61;> 2&#xff1a;解码-end <&#61;&#61;&#61; &#61;&#61;&#61;/# /&#61;&#61;&#61; &#61;&#61;&#61;> 3:先验框筛选 <&#61;&#61;&#61; &#61;&#61;&#61;\def choose_anchor_boxes(self, predictions, anchor_box, n_box):# predictions列表里面的元素表示&#xff1a;类别预测的置信度, shape &#61; [-1, 10, 10, box_num, num_classes]anchor_box &#61; tf.reshape(anchor_box, [n_box, 4]) # 5d张量改为2d张量 ?&#61;? n_box是所有的锚点框的总数量&#61;批数x10x10x6x4prediction &#61; tf.reshape(predictions, [n_box, 21])prediction &#61; prediction[:, 1:] # 第一个0是背景的置信度, 我们不需要,从1往后取classes &#61; tf.argmax(prediction, axis&#61;1) &#43; 1 # 得到preditions的概率最大类别的索引值, 1表示按行找最大&#43;1: 是因为代码里是0开始scores &#61; tf.reduce_max(prediction, axis&#61;1) # 得到最大类别的得分, 当大于阈值就保留下来(下面后话)filter_mask &#61; scores > self.thresholdclasses &#61; tf.boolean_mask(classes, filter_mask) # 前面放筛选目标, 后面放筛选条件scores &#61; tf.boolean_mask(scores, filter_mask)anchor_box &#61; tf.boolean_mask(anchor_box, filter_mask)return classes, scores, anchor_box# 需要学习的指令&#xff1a;tf.reshape() tf.reduce_max() tf.boolean_mask()# \&#61;&#61;&#61; &#61;&#61;&#61;> 3:先验框筛选 <&#61;&#61;&#61; &#61;&#61;&#61;/
# \&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;> 先验框生成*解码*先验框筛选-start <&#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61; &#61;&#61;&#61;/if __name__ &#61;&#61; &#39;__main__&#39;:sd &#61; ssd()locations, predictions, x &#61; sd.set_net()box &#61; sd.ssd_anchor_layer(sd.img_size, (10, 10), (99., 153.), [2., .5, 3., 1/3], 32, 6)boex &#61; sd.ssd_decode(locations[2], box, sd.prior_scaling)print(boex) # shape &#61; (?, 10, 10, 6, 4)# 这里以block8为例&#xff1a;的输出结果为Tensor("stacck:0", shape&#61;(?, 10, 10, 6, 4), dtype&#61;float32)# 10, 10表示的是第三个特征层为10x10, 因为是locations[2] ?&#61;?# 6表示六个特征图 ?&#61;?# 4表示 左上角&#xff06;右下角坐标 max_x max_y min_x min_y# locations[0]是38x38 locations[1]是19x19 locations[2]是10x10 locations[3]是5x5 [4]是3x3 [5]是2x2 [6]是1x1cls, sco, a_box &#61; sd.choose_anchor_boxes(predictions[2], boex, sd.n_boxes[2])print(&#39;----------------------------&#39;)print(cls, sco, a_box)