标签:end sha pre 最大的 factor hold matching from 一个
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior‘s center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] ## [8732,2] 中心点偏移
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:]) ## [8732,2] 中心点偏移 除以anchor的wh
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] # [8732,2] gt的wh除以anchor的wh
g_wh = torch.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
matched的尺寸是[8732,4]
matcheds其实是groundtruth,格式是xmin,ymin,xmax,ymax。一般而言,一张图片比如有2个目标,为啥这里变8732个了呢?这个是match函数里面得到的。
具体的match函数里面讲解。大体就是每个预设的anchor(8732个)需要绑定一个gt。
以上,就是encode过程。就是把绑定的gt与anchor中心点和wh做偏移
def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
"""Match each prior box with the ground truth box of the highest jaccard
overlap, encode the bounding boxes, then return the matched indices
corresponding to both confidence and location preds.
Args:
threshold: (float) The overlap threshold used when mathing boxes.
truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
variances: (tensor) Variances corresponding to each prior coord,
Shape: [num_priors, 4].
labels: (tensor) All the class labels for the image, Shape: [num_obj].
loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
idx: (int) current batch index
Return:
The matched indices corresponding to 1)location and 2)confidence preds.
"""
# jaccard index
overlaps = jaccard(
truths, #[2,4]
point_form(priors) #[8732,4]
)#overlaps [2,8732] 计算每个gt与anchor的交并比
# (Bipartite Matching)
# [1,num_objects] best prior for each ground truth
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
# best_prior_overlap [2,1]
# best_prior_idx [2,1]
#best_prior_idx表示的是anchor的坐标索引,例如tensor([[8444],[5084]])
#意义就是每个gt与anchor的最大交并比和对应的anchor的位置。
#这个是以gt为主。每行取一个最大的。
# [1,num_priors] best ground truth for each prior
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
#best_truth_overlap [1,8732]
#best_truth_idx [1,8732]
#best_truth_idx 表示的是,如果有2个gt,那么这个里面的值取值范围是0-1.
#以anchor为主。每列取最大。
best_truth_idx.squeeze_(0) # [1,8732] --> [8732]
best_truth_overlap.squeeze_(0) # [1,8732] --> [8732]
best_prior_idx.squeeze_(1) #[2,1] -->[2]
best_prior_overlap.squeeze_(1)#[2,1] -->[2]
best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior
# TODO refactor: index best_prior_idx with long tensor
# ensure every gt matches with its prior of max overlap
for j in range(best_prior_idx.size(0)):
best_truth_idx[best_prior_idx[j]] = j
matches = truths[best_truth_idx] # Shape: [num_priors,4]
conf = labels[best_truth_idx] + 1 # Shape: [num_priors]
conf[best_truth_overlap < threshold] = 0 # label as background
loc = encode(matches, priors, variances)
loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
conf_t[idx] = conf # [num_priors] top class label for each prior
标签:end sha pre 最大的 factor hold matching from 一个
原文地址:https://www.cnblogs.com/yanghailin/p/14882807.html