Skip to main content

Rerun Lakehouse Visualizer - VisDrone Video Demo

Interactive drone-view video visualization from the VisDrone 2019 VID dataset stored in the Auraison lakehouse (s3://landing/visdrone/VID/), powered by Rerun SDK. What this demo shows:
  • Streaming a VisDrone video sequence (JPEG frames) from MinIO S3
  • Parsing per-frame bounding box annotations (x,y,w,h,score,category)
  • Logging frames + bbox overlays to Rerun with a frame timeline
  • Saving a portable .rrd recording and serving it in the Rerun web viewer
VisDrone categories: ignored(0), pedestrian(1), people(2), bicycle(3), car(4), van(5), truck(6), tricycle(7), awning-tricycle(8), bus(9), motor(10), others(11) Prerequisites:
cd data-plane
# Ensure visdrone has been migrated to landing/visdrone/
# uv run python scripts/migrate_datasets.py --only visdrone
import io, os, sys, zipfile
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath('.')), ''))

S3_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'http://localhost:29000')
S3_KEY      = os.getenv('MINIO_ACCESS_KEY', 'minio')
S3_SECRET   = os.getenv('MINIO_SECRET_KEY', 'minio123')

BUCKET      = 'landing'
# Prefer val; fall back to test-dev if val hasn't been migrated yet
ZIP_KEY     = os.getenv('VISDRONE_ZIP', 'visdrone/VID/VisDrone2019-VID-val.zip')
SEQ_NAME    = 'uav0000013_00000_v'  # adjust to any sequence in the val set
MAX_FRAMES  = 60   # frames to visualize
RRD_PATH    = '/tmp/visdrone_demo.rrd'

print(f'MinIO endpoint : {S3_ENDPOINT}')
print(f'Zip key        : s3://{BUCKET}/{ZIP_KEY}')
print(f'Sequence       : {SEQ_NAME}')
print(f'Max frames     : {MAX_FRAMES}')
print(f'RRD output     : {RRD_PATH}')
MinIO endpoint : http://192.168.1.26:9000
Zip key        : s3://landing/visdrone/VID/VisDrone2019-VID-test-dev.zip
Sequence       : uav0000013_00000_v
Max frames     : 60
RRD output     : /tmp/visdrone_demo.rrd

Step 1 - Download and Inspect the VID Val Zip

The VisDrone VID val split (1.5 GiB) contains ~58 video sequences:
VisDrone2019-VID-val/
  sequences/<seq_name>/       -- JPEG frames: 0000001.jpg, 0000002.jpg, ...
  annotations/<seq_name>.txt  -- CSV: frame,id,x,y,w,h,score,category,trunc,occ
import boto3
from botocore.config import Config

s3 = boto3.client(
    's3',
    endpoint_url=S3_ENDPOINT,
    aws_access_key_id=S3_KEY,
    aws_secret_access_key=S3_SECRET,
    config=Config(signature_version='s3v4'),
)

print(f'Downloading s3://{BUCKET}/{ZIP_KEY} ...')
obj = s3.get_object(Bucket=BUCKET, Key=ZIP_KEY)
zip_bytes = io.BytesIO(obj['Body'].read())
size_mb = zip_bytes.getbuffer().nbytes / 1024 / 1024
print(f'Downloaded {size_mb:.0f} MiB')

zf = zipfile.ZipFile(zip_bytes)
all_names = zf.namelist()

import re
seq_pattern = re.compile(r'sequences/(uav[^/]+)/\d+\.jpg$')
sequences = sorted(set(
    m.group(1) for name in all_names
    if (m := seq_pattern.search(name))
))
print(f'Sequences in val set: {len(sequences)}')
print('First 5:', sequences[:5])
Downloading s3://landing/visdrone/VID/VisDrone2019-VID-test-dev.zip ...
Downloaded 2187 MiB
Sequences in val set: 17
First 5: ['uav0000009_03358_v', 'uav0000073_00600_v', 'uav0000073_04464_v', 'uav0000077_00720_v', 'uav0000088_00290_v']
if SEQ_NAME not in sequences:
    SEQ_NAME = sequences[0]
    print(f'SEQ_NAME not found -- using: {SEQ_NAME}')
else:
    print(f'Using sequence: {SEQ_NAME}')

# Detect base directory from zip (e.g. VisDrone2019-VID-val or VID-test-dev)
base_dirs = set(n.split('/')[0] for n in all_names if '/' in n)
BASE_DIR = next(d for d in base_dirs if 'VID' in d)
print(f'Zip base directory: {BASE_DIR}')

frame_prefix = f'{BASE_DIR}/sequences/{SEQ_NAME}/'
frame_names = sorted(
    n for n in all_names
    if n.startswith(frame_prefix) and n.endswith('.jpg')
)[:MAX_FRAMES]

ann_path = f'{BASE_DIR}/annotations/{SEQ_NAME}.txt'

print(f'Frames selected  : {len(frame_names)}')
print(f'Annotation file  : {ann_path}')
if frame_names:
    print(f'Sample frame     : {frame_names[0]}')
SEQ_NAME not found -- using: uav0000009_03358_v
Zip base directory: VisDrone2019-VID-test-dev
Frames selected  : 60
Annotation file  : VisDrone2019-VID-test-dev/annotations/uav0000009_03358_v.txt
Sample frame     : VisDrone2019-VID-test-dev/sequences/uav0000009_03358_v/0000001.jpg

Step 2 - Parse Annotations

VisDrone annotation format (per-sequence CSV):
frame_index, target_id, x, y, w, h, score, category, truncation, occlusion
  • x, y, w, h in pixel coordinates (top-left origin)
  • category - 1=pedestrian, 4=car, 5=van, 6=truck, 9=bus, 10=motor, …
import collections

CATEGORY_NAMES = [
    'ignored', 'pedestrian', 'people', 'bicycle', 'car', 'van',
    'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor', 'others'
]

ann_data = zf.read(ann_path).decode('utf-8')
ann_by_frame = collections.defaultdict(list)

for line in ann_data.strip().splitlines():
    parts = line.strip().split(',')
    if len(parts) < 8:
        continue
    frame_idx = int(parts[0])
    x, y, w, h = int(parts[2]), int(parts[3]), int(parts[4]), int(parts[5])
    cat = int(parts[7])
    if cat == 0 or w <= 0 or h <= 0:
        continue
    ann_by_frame[frame_idx].append((x, y, w, h, cat))

total_boxes = sum(len(v) for v in ann_by_frame.values())
print(f'Annotation rows : {total_boxes} bounding boxes across {len(ann_by_frame)} frames')

cat_counts = collections.Counter()
for boxes in ann_by_frame.values():
    for *_, cat in boxes:
        cat_counts[CATEGORY_NAMES[cat]] += 1
print('Category distribution:')
for name, count in cat_counts.most_common():
    print(f'  {name:<18} {count:5d}')
Annotation rows : 12531 bounding boxes across 219 frames
Category distribution:
  car                 8201
  pedestrian          3933
  people               216
  motor                106
  van                   75

Step 3 - Log Video Frames + Bounding Boxes to Rerun

Each frame is logged at its frame index on the frame timeline.
  • rr.EncodedImage for JPEG frames (no decode overhead)
  • rr.Boxes2D with per-box category labels and colours
import numpy as np
import rerun as rr
import rerun.blueprint as rrb

CATEGORY_COLORS = {
    1: (100, 149, 237),  # pedestrian
    2: (65, 105, 225),   # people
    3: (255, 215, 0),    # bicycle
    4: (220, 20, 60),    # car
    5: (50, 205, 50),    # van
    6: (255, 140, 0),    # truck
    7: (138, 43, 226),   # tricycle
    8: (0, 206, 209),    # awning-tricycle
    9: (255, 69, 0),     # bus
    10: (255, 20, 147),  # motor
    11: (169, 169, 169), # others
}

rr.init('auraison/visdrone', spawn=False)
rr.save(RRD_PATH)

blueprint = rrb.Spatial2DView(
    name=f'VisDrone VID -- {SEQ_NAME}',
    origin='/drone',
)
rr.send_blueprint(blueprint)

print(f'Logging {len(frame_names)} frames ...')
for frame_path in frame_names:
    fname = os.path.basename(frame_path)
    frame_idx = int(fname.replace('.jpg', ''))

    rr.set_time('frame', sequence=frame_idx)

    img_bytes = zf.read(frame_path)
    rr.log('/drone/image', rr.EncodedImage(contents=img_bytes, media_type='image/jpeg'))

    boxes = ann_by_frame.get(frame_idx, [])
    if boxes:
        mins   = np.array([[x, y] for x, y, w, h, _ in boxes], dtype=np.float32)
        sizes  = np.array([[w, h] for x, y, w, h, _ in boxes], dtype=np.float32)
        colors = [CATEGORY_COLORS.get(cat, (200, 200, 200)) for *_, cat in boxes]
        labels = [CATEGORY_NAMES[cat] for *_, cat in boxes]
        rr.log('/drone/boxes', rr.Boxes2D(mins=mins, sizes=sizes, colors=colors, labels=labels))
    else:
        rr.log('/drone/boxes', rr.Clear(recursive=False))

    if frame_idx % 10 == 0:
        print(f'  frame {frame_idx:5d} -- {len(boxes):2d} boxes')

print(f'\nRecording saved: {RRD_PATH}')
rrd_size_mb = os.path.getsize(RRD_PATH) / 1024 / 1024
print(f'File size: {rrd_size_mb:.1f} MiB')
zf.close()
Logging 60 frames ...
  frame    10 -- 17 boxes
  frame    20 -- 14 boxes
  frame    30 -- 20 boxes
  frame    40 -- 20 boxes
  frame    50 -- 21 boxes
  frame    60 -- 22 boxes

Recording saved: /tmp/visdrone_demo.rrd
File size: 20.6 MiB

Step 4 - View in Rerun Web Viewer

Open the saved recording in the Rerun web viewer. Scrub the frame timeline to step through drone video with colour-coded bounding box overlays.
# Native viewer
rerun /tmp/visdrone_demo.rrd

# Web viewer (gRPC stream)
uv run python scripts/rerun_demo.py --count 20  # Flickr8k variant

Mintlify Embed

To embed in the aegean.ai docs site, upload visdrone_demo.rrd to a public URL and use:
&lt;iframe
  src="https://app.rerun.io/version/0.29.2/index.html?url=PUBLIC_RRD_URL"
  width="100%"
  height="640px"
  style="border: none; border-radius: 8px;"
/&gt;
print('=== VisDrone Rerun Demo -- Summary ===')
print(f'Dataset    : VisDrone 2019 VID val')
print(f'Sequence   : {SEQ_NAME}')
print(f'Frames     : {len(frame_names)}')
print(f'Boxes      : {total_boxes} total detections')
print(f'Recording  : {RRD_PATH}  ({rrd_size_mb:.1f} MiB)')
print()
print('Entity tree:')
print('  /drone/image  -> rr.EncodedImage (JPEG)')
print('  /drone/boxes  -> rr.Boxes2D (x,y,w,h + category labels + colours)')
print()
print('Timeline: frame (sequence)')
print('Top categories:')
for name, count in cat_counts.most_common(5):
    print(f'  {name:<18} {count:5d}')
=== VisDrone Rerun Demo -- Summary ===
Dataset    : VisDrone 2019 VID val
Sequence   : uav0000009_03358_v
Frames     : 60
Boxes      : 12531 total detections
Recording  : /tmp/visdrone_demo.rrd  (20.6 MiB)

Entity tree:
  /drone/image  -> rr.EncodedImage (JPEG)
  /drone/boxes  -> rr.Boxes2D (x,y,w,h + category labels + colours)

Timeline: frame (sequence)
Top categories:
  car                 8201
  pedestrian          3933
  people               216
  motor                106
  van                   75