@@ -104,7 +104,46 @@ class MaskRCNN(FasterRCNN):
104104
105105 Example::
106106
107- >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
107+ >>> import torchvision
108+ >>> from torchvision.models.detection import MaskRCNN
109+ >>> from torchvision.models.detection.rpn import AnchorGenerator
110+ >>>
111+ >>> # load a pre-trained model for classification and return
112+ >>> # only the features
113+ >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
114+ >>> # MaskRCNN needs to know the number of
115+ >>> # output channels in a backbone. For mobilenet_v2, it's 1280
116+ >>> # so we need to add it here
117+ >>> backbone.out_channels = 1280
118+ >>>
119+ >>> # let's make the RPN generate 5 x 3 anchors per spatial
120+ >>> # location, with 5 different sizes and 3 different aspect
121+ >>> # ratios. We have a Tuple[Tuple[int]] because each feature
122+ >>> # map could potentially have different sizes and
123+ >>> # aspect ratios
124+ >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
125+ >>> aspect_ratios=((0.5, 1.0, 2.0),))
126+ >>>
127+ >>> # let's define what are the feature maps that we will
128+ >>> # use to perform the region of interest cropping, as well as
129+ >>> # the size of the crop after rescaling.
130+ >>> # if your backbone returns a Tensor, featmap_names is expected to
131+ >>> # be [0]. More generally, the backbone should return an
132+ >>> # OrderedDict[Tensor], and in featmap_names you can choose which
133+ >>> # feature maps to use.
134+ >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
135+ >>> output_size=7,
136+ >>> sampling_ratio=2)
137+ >>>
138+ >>> mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
139+ >>> output_size=14,
140+ >>> sampling_ratio=2)
141+ >>> # put the pieces together inside a FasterRCNN model
142+ >>> model = MaskRCNN(backbone,
143+ >>> num_classes=2,
144+ >>> rpn_anchor_generator=anchor_generator,
145+ >>> box_roi_pool=roi_pooler,
146+ >>> mask_roi_pool=mask_roi_pooler)
108147 >>> model.eval()
109148 >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
110149 >>> predictions = model(x)
@@ -149,8 +188,10 @@ def __init__(self, backbone, num_classes=None,
149188 mask_head = MaskRCNNHeads (out_channels , mask_layers , mask_dilation )
150189
151190 if mask_predictor is None :
152- mask_dim_reduced = 256 # == mask_layers[-1]
153- mask_predictor = MaskRCNNPredictor (out_channels , mask_dim_reduced , num_classes )
191+ mask_predictor_in_channels = 256 # == mask_layers[-1]
192+ mask_dim_reduced = 256
193+ mask_predictor = MaskRCNNPredictor (mask_predictor_in_channels ,
194+ mask_dim_reduced , num_classes )
154195
155196 super (MaskRCNN , self ).__init__ (
156197 backbone , num_classes ,
0 commit comments