Skip to main content
Open In Colab

Region Proposal Network (RPN)

Notebook 3 of 6 in the Faster RCNN from-scratch series The RPN is the key innovation of Faster RCNN: a small network that slides over FPN feature maps and proposes object-containing regions using anchor boxes.
import sys, os, pathlib
# Locate frcnn_common.py — works whether run via papermill or interactively
_nb_candidates = [
    pathlib.Path.cwd().parent,  # interactive: cwd is the notebook dir
    pathlib.Path.cwd() / 'notebooks' / 'scene-understanding' / 'object-detection' / 'faster-rcnn' / 'pytorch',  # papermill: cwd is repo root
]
for _p in _nb_candidates:
    if (_p / 'frcnn_common.py').exists():
        sys.path.insert(0, str(_p))
        break

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from typing import List, Tuple

from frcnn_common import (
    AnchorGenerator, RPNHead, RegionProposalNetwork,
    decode_boxes, box_iou, encode_boxes,
    Bottleneck, ResNet50, FPN,
    IMG_SIZE, DEVICE,
)

print(f"Device: {DEVICE}")
/workspaces/eng-ai-agents/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
Device: cuda
# AnchorGenerator is imported from frcnn_common — inspect its structure:
gen = AnchorGenerator()
print(f"Anchor sizes: {gen.anchor_sizes}")
print(f"Aspect ratios: {gen.aspect_ratios}")
print(f"Strides: {gen.strides}")
Anchor sizes: (32, 64, 128, 256, 512)
Aspect ratios: (0.5, 1.0, 2.0)
Strides: (4, 8, 16, 32, 64)
# RPNHead is imported from frcnn_common — inspect its structure:
head = RPNHead(in_ch=256, k=3)
print(head)
RPNHead(
  (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (cls): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
  (box): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
)

Decoding anchor adjustments

The RPN predicts four deltas per anchor—center shifts (dx,dy)(d_x,d_y) and log-scale factors (dw,dh)(d_w,d_h). decode_boxes() follows the Faster R-CNN transform: compute each anchor’s width/height/center, clamp the scale deltas to avoid exploding exponentials, recover the proposal center (px,py)(p_x,p_y) and size (pw,ph)(p_w,p_h), then convert back to corner form [x1, y1, x2, y2]. The resulting boxes are ready for clipping, min-size filtering, and NMS.
# decode_boxes and RegionProposalNetwork are imported from frcnn_common.
# Inspect decode_boxes:
help(decode_boxes)

# Smoke test
rpn = RegionProposalNetwork(head, gen)
feat_maps = [
    torch.randn(1, 256, 200, 200),
    torch.randn(1, 256, 100, 100),
    torch.randn(1, 256,  50,  50),
    torch.randn(1, 256,  25,  25),
    torch.randn(1, 256,  13,  13),
]
proposals, _ = rpn(feat_maps, (800, 800))
print(f"Proposals per image: {[len(p) for p in proposals]}")
Help on function decode_boxes in module frcnn_common:

decode_boxes(anchors, deltas)
    Inverse of encode_boxes with additional clamping for numerical stability.
Proposals per image: [1000]
# Inspection: anchor grid at P3 (stride 8) — sample every 8th cell
stride, fh, fw = 8, 100, 100
fig, ax = plt.subplots(figsize=(8, 8))
ax.set_xlim(0, 800); ax.set_ylim(800, 0)
ax.set_facecolor('#1a1a2e')
ax.set_title('Anchor centres at P3 (stride 8), every 8th cell')
for r in range(0, fh, 8):
    for c in range(0, fw, 8):
        ax.plot((c + 0.5) * stride, (r + 0.5) * stride, 'c.', markersize=2)
plt.tight_layout()
plt.savefig('images/anchor_grid.png', dpi=100, bbox_inches='tight')
plt.show()
print(f"P3 total locations: {fh * fw:,}  |  total anchors: {fh * fw * 3:,}")
Output from cell 5
P3 total locations: 10,000  |  total anchors: 30,000
# Inspection: objectness score distribution (random weights)
# Re-run head on feat_maps to get scores
cls_logits_list, bbox_preds_list = head(feat_maps)
all_scores_flat = torch.cat([
    c.permute(0, 2, 3, 1).reshape(-1) for c in cls_logits_list
]).sigmoid().detach().numpy()

fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(all_scores_flat, bins=80, color='steelblue', alpha=0.8, edgecolor='none')
ax.set_xlabel('Objectness score'); ax.set_ylabel('Anchor count')
ax.set_title('Objectness score distribution (random-weight RPN)')
plt.tight_layout()
plt.savefig('images/objectness_dist.png', dpi=100, bbox_inches='tight')
plt.show()
Output from cell 6
# Inspection: top-50 proposals before NMS on a blank canvas
anchors_all = gen(feat_maps, (800, 800))
all_deltas_flat = torch.cat([
    b.permute(0, 2, 3, 1).reshape(-1, 4) for b in bbox_preds_list
]).detach()
all_scores_1d = torch.tensor(all_scores_flat)

top50_idx  = all_scores_1d.argsort(descending=True)[:50]
top50_props = decode_boxes(anchors_all[top50_idx], all_deltas_flat[top50_idx])
top50_props = top50_props.clamp(0, 800)

fig, ax = plt.subplots(figsize=(8, 8))
ax.set_xlim(0, 800); ax.set_ylim(800, 0)
ax.set_facecolor('#1a1a2e')
ax.set_title('Top-50 proposals (before NMS, random weights)')
for box in top50_props.tolist():
    x1, y1, x2, y2 = box
    rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1,
                               linewidth=1, edgecolor='cyan', facecolor='none', alpha=0.5)
    ax.add_patch(rect)
plt.tight_layout()
plt.savefig('images/top50_proposals.png', dpi=100, bbox_inches='tight')
plt.show()
Output from cell 7
Key references: (Ren et al., 2015; Tan & Le, 2019; Tian et al., 2019; Redmon et al., 2015; He et al., 2017)

References

  • He, K., Gkioxari, G., Dollár, P., Girshick, R. (2017). Mask R-CNN.
  • Redmon, J., Divvala, S., Girshick, R., Farhadi, A. (2015). You only look once: Unified, real-time object detection.
  • Ren, S., He, K., Girshick, R., Sun, J. (2015). Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks.
  • Tan, M., Le, Q. (2019). EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
  • Tian, Z., Shen, C., Chen, H., He, T. (2019). FCOS: Fully Convolutional One-Stage Object Detection.