rcnn详解,faster rcnn网络结构

生成一个batch图像的所有anchors信息。 shape是[b，21420，4 ] anchor _ sizes=() 32，)、64，)、128，)、256，)、512，) ) ape * len (anchor _ sizzes )

1 .首先获取各特征层上的宽度和高度，然后获取一个batch内图像的宽度和高度

2 .根据特征图和原始图像的宽度和高度计算特征层上的步长，与原始图像上的步长相同

3 .根据提供的sizes和aspect_ratios生成anchors模板

4 .计算/读取所有anchors的坐标信息。 (此处的anchors信息是所有映射到原始图像的anchors信息，而不是anchors模板)。)。

5 .得到的是list列表，根据每个预测特征图被映射到原始图的anchors坐标信息

defforward(self，image_list，feature_maps ) :#type: ) image_list， List[Tensor] ) List[Tensor]是获取各自的width ) grid _ sizes=list ([ feature _ map.shape [-2: ] for feature ) 输入图像的height和width image _ size=image _ list.tensors.shape [-2: ] #变量类型和设备类型dtype、要获取device=feature_maps[0] .的fa ture _ maps [0].device # onestepinfeaturemapequatenpixelstrideinoriginimage #计算菲原始图像上的步骤strides=[torch.tensooge]/g[0]、dtype=torch.int64、device=device (，torch.tensor(image_size(1 ) ) 根据evice=device((forgingrid_sizes ) )提供的sizes和aspect_ratios生成anchors模板self.set_cell_anchors ) dtype 读取(此处的anchors信息是映射到原始图的所有anchors信息，而不是anchors模板)得到的是列表。与每个预测特征图对应地映射到原始图的anchors坐标信息anchors _ over _ all _ feature _ maps=self.cached _ grid _ anchors (grid _ sizes ) strides (anchors=torch.JIT.annotate (list [ list [ torch.tensor ]，[] ) ) batch中的每个图像for i、遍历(image_height )的image _ width (in enumerate (image _ list.image _ sizes ) : anchors_in_image=[] # ) 每个预测特征图被映射到原始图的anchors坐标信息for anchors _ per _ feature _ mapinanchors _ over _ all _ feature _ maps 3360 anchors _ in 图像的所有预测特征层的anchors坐标信息被合并为一个list，其中每个元素是一个图像的所有anchors信息anchors=[ torch.cat (anchors _ per _ image ) ] clearthecacheincasethatmemoryleaks.self._ cache.clear (returnanchorsclassanchorsgenerator (nn

.Module): __annotations__ = { "cell_anchors": Optional[List[torch.Tensor]], "_cache": Dict[str, List[torch.Tensor]] } def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)): super(AnchorsGenerator, self).__init__() if not isinstance(sizes[0], (list, tuple)): # TODO change this sizes = tuple((s,) for s in sizes) if not isinstance(aspect_ratios[0], (list, tuple)): aspect_ratios = (aspect_ratios,) * len(sizes) assert len(sizes) == len(aspect_ratios) self.sizes = sizes self.aspect_ratios = aspect_ratios self.cell_anchors = None self._cache = {} def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device("cpu")): # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor """ compute anchor sizes Arguments: scales: sqrt(anchor_area) aspect_ratios: h/w ratios dtype: float32 device: cpu/gpu """ scales = torch.as_tensor(scales, dtype=dtype, device=device) aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) h_ratios = torch.sqrt(aspect_ratios) w_ratios = 1.0 / h_ratios # [r1, r2, r3]' * [s1, s2, s3] # number of elements is len(ratios)*len(scales) ws = (w_ratios[:, None] * scales[None, :]).view(-1) hs = (h_ratios[:, None] * scales[None, :]).view(-1) # left-top, right-bottom coordinate relative to anchor center(0, 0) # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4] base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 return base_anchors.round() # round 四舍五入 def set_cell_anchors(self, dtype, device): # type: (torch.dtype, torch.device) -> None if self.cell_anchors is not None: cell_anchors = self.cell_anchors assert cell_anchors is not None # suppose that all anchors have the same device # which is a valid assumption in the current state of the codebase if cell_anchors[0].device == device: return # 根据提供的sizes和aspect_ratios生成anchors模板 # anchors模板都是以(0, 0)为中心的anchor cell_anchors = [ self.generate_anchors(sizes, aspect_ratios, dtype, device) for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios) ] self.cell_anchors = cell_anchors def num_anchors_per_location(self): # 计算每个预测特征层上每个滑动窗口的预测目标数 return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. def grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """ anchors position in grid coordinate axis map into origin image 计算预测特征图对应原始图像上的所有anchors的坐标 Args: grid_sizes: 预测特征矩阵的height和width strides: 预测特征矩阵上一步对应原始图像上的步距 """ anchors = [] cell_anchors = self.cell_anchors assert cell_anchors is not None # 遍历每个预测特征层的grid_size，strides和cell_anchors for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors): grid_height, grid_width = size stride_height, stride_width = stride device = base_anchors.device # For output anchor, compute [x_center, y_center, x_center, y_center] # shape: [grid_width] 对应原图上的x坐标(列) shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width # shape: [grid_height] 对应原图上的y坐标(行) shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量) # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵 # shape: [grid_height, grid_width] shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) shift_x = shift_x.reshape(-1) shift_y = shift_y.reshape(-1) # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量 # shape: [grid_width*grid_height, 4] shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1) # For every (base anchor, output anchor) pair, # offset each zero-centered base anchor by the center of the output anchor. # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制) shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4) anchors.append(shifts_anchor.reshape(-1, 4)) return anchors # List[Tensor(all_num_anchors, 4)] def cached_grid_anchors(self, grid_sizes, strides): # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor] """将计算得到的所有anchors信息进行缓存""" key = str(grid_sizes) + str(strides) # self._cache是字典类型 if key in self._cache: return self._cache[key] anchors = self.grid_anchors(grid_sizes, strides) self._cache[key] = anchors return anchors