Implementation:Haosulab ManiSkill BaseDigitalTwinEnv

Field	Value
Implementation Name	BaseDigitalTwinEnv
Type	API Doc
Domain	Sim2Real
Source File	`mani_skill/envs/tasks/digital_twins/base_env.py` (L16-180)
Date	2026-02-15
Repository	Haosulab/ManiSkill

Overview

The BaseDigitalTwinEnv class is the base environment for creating digital twin simulation environments that visually replicate real-world workspaces. It extends BaseEnv with greenscreen compositing functionality, allowing rendered simulation images to have their backgrounds replaced with real-world photographs for more realistic visual observations.

Description

This class implements the greenscreen pipeline described in the SIMPLER framework. It manages overlay images, segmentation-based masking, and pixel-level compositing. Task-specific digital twin environments inherit from this class and configure which cameras have overlay images and which objects should remain rendered from simulation (not greenscreened).

The greenscreen process operates at the observation level: during _get_obs_sensor_data, the class intercepts the rendered RGB and segmentation images, identifies which pixels belong to foreground objects (robot and manipulated objects), and replaces all other pixels with the real-world overlay image.

Usage

from mani_skill.envs.tasks.digital_twins.base_env import BaseDigitalTwinEnv

class GraspCubeDigitalTwin(BaseDigitalTwinEnv):
    def __init__(self, **kwargs):
        self.rgb_overlay_paths = {
            "base_camera": "assets/real_workspace.png"
        }
        super().__init__(**kwargs)

    def _load_scene(self, options: dict):
        self.cube = self._build_cube()
        self.remove_object_from_greenscreen(self.robot)
        self.remove_object_from_greenscreen(self.cube)

Code Reference

Class Definition and Properties

class BaseDigitalTwinEnv(BaseEnv):
    """Base Environment class for easily setting up evaluation digital twins
    for real2sim and sim2real. Based on the SIMPLER framework."""

    rgb_overlay_paths: dict[str, str] = None
    """dict mapping camera name to the file path of the greenscreening image"""

    _rgb_overlay_images: dict[str, torch.Tensor] = dict()
    """dict mapping camera name to the image torch tensor"""

    rgb_overlay_mode: str = "background"
    """RGB overlay mode: 'background' (default), 'debug' (50/50 blend), or 'none'"""

    _objects_to_remove_from_greenscreen: list[Union[Actor, Link]] = []
    """list of articulations/actors/links that should not be greenscreened"""

    _segmentation_ids_to_keep: torch.Tensor = None
    """segmentation ids referencing objects that should not be greenscreened"""

Class Properties

Property	Type	Description
`rgb_overlay_paths`	`dict[str, str]`	Maps camera names to file paths of overlay images. Set to `None` to disable greenscreening.
`rgb_overlay_mode`	`str`	Controls compositing mode: `"background"` replaces non-foreground pixels; `"debug"` blends at 50% opacity; `"none"` disables compositing.
`_objects_to_remove_from_greenscreen`	`list`	Internal list of objects whose pixels should be preserved (not replaced by overlay).
`_segmentation_ids_to_keep`	`torch.Tensor`	Computed segmentation IDs for foreground objects after scene reconfiguration.

Constructor (L55-67)

def __init__(self, **kwargs):
    if self.rgb_overlay_paths is not None:
        for camera_name, path in self.rgb_overlay_paths.items():
            if not os.path.exists(path):
                raise FileNotFoundError(f"rgb_overlay_path {path} is not found.")
            self._rgb_overlay_images[camera_name] = cv2.cvtColor(
                cv2.imread(path), cv2.COLOR_BGR2RGB
            )
    else:
        self._rgb_overlay_images = None
    super().__init__(**kwargs)

Key Methods

def remove_object_from_greenscreen(self, object: Union[Articulation, Actor, Link]):
    """Exclude an actor/articulation/link from the greenscreen process.
    Call this in _load_scene for the robot and all manipulated objects."""

def _after_reconfigure(self, options: dict):
    """After scene reconfiguration, compute segmentation IDs for foreground objects
    and resize overlay images to match camera resolution."""

def _green_sceen_rgb(self, rgb, segmentation, overlay_img):
    """Perform pixel-level compositing:
    - In 'background' mode: keep foreground pixels, replace rest with overlay
    - In 'debug' mode: blend at 50/50 opacity
    Returns composited RGB tensor."""

def _get_obs_sensor_data(self, apply_texture_transforms=True):
    """Override of BaseEnv method. Gets sensor data, then applies greenscreening
    to each camera that has an overlay image configured."""

Greenscreen Compositing Logic (L123-147)

def _green_sceen_rgb(self, rgb, segmentation, overlay_img):
    actor_seg = segmentation[..., 0]
    mask = torch.ones_like(actor_seg, device=actor_seg.device, dtype=torch.bool)
    if self.rgb_overlay_mode == "background":
        mask[torch.isin(actor_seg, self._segmentation_ids_to_keep)] = 0
    mask = mask[..., None]

    if "debug" not in self.rgb_overlay_mode:
        rgb = rgb * (~mask) + overlay_img * mask
    else:
        rgb = rgb * 0.5 + overlay_img * 0.5
        rgb = rgb.to(torch.uint8)
    return rgb

Observation Override (L149-180)

def _get_obs_sensor_data(self, apply_texture_transforms=True):
    obs = super()._get_obs_sensor_data(apply_texture_transforms)
    if self.rgb_overlay_mode == "none":
        return obs
    if (self.obs_mode_struct.visual.rgb
        and self.obs_mode_struct.visual.segmentation
        and self.rgb_overlay_paths is not None):
        for camera_name in self._rgb_overlay_images.keys():
            assert "segmentation" in obs[camera_name].keys()
            overlay_img = self._rgb_overlay_images[camera_name]
            green_screened_rgb = self._green_sceen_rgb(
                obs[camera_name]["rgb"],
                obs[camera_name]["segmentation"],
                overlay_img,
            )
            obs[camera_name]["rgb"] = green_screened_rgb
    return obs

I/O Contract

Direction	Data	Format
Input (config)	Overlay image paths	`dict[str, str]` mapping camera names to PNG/JPG file paths
Input (scene)	Objects to exclude from greenscreen	`Actor`, `Link`, or `Articulation` instances via `remove_object_from_greenscreen`
Output (obs)	Greenscreened RGB images	`torch.Tensor` shape (B, H, W, 3) `uint8` -- same shape as standard RGB output
Requirement	Observation mode must include RGB and segmentation	`obs_mode` must enable both `rgb` and `segmentation` visual channels

Overlay Mode Behavior

Mode	Background Pixels	Foreground Pixels
`"background"`	Replaced with overlay image	Kept from simulation render
`"debug"`	50% overlay + 50% render	50% overlay + 50% render
`"none"`	Kept from simulation render	Kept from simulation render

External Dependencies

Package	Purpose
cv2 (OpenCV)	Loading overlay images, color space conversion, resizing
torch	Tensor operations for segmentation masking and compositing
mani_skill.envs.sapien_env.BaseEnv	Parent class providing environment infrastructure
mani_skill.utils.structs	`Actor`, `Articulation`, `Link` types for object identification

Usage Examples

import gymnasium as gym

# Use an existing digital twin environment
env = gym.make(
    "GraspCubeSO100Digital-v1",
    obs_mode="rgb+segmentation",
    control_mode="pd_joint_pos",
    num_envs=16,
    sim_backend="gpu",
)
obs, info = env.reset()
# obs contains greenscreened RGB observations

# Create a custom digital twin
class CustomDigitalTwin(BaseDigitalTwinEnv):
    rgb_overlay_paths = {"overhead_cam": "workspace_photo.png"}
    rgb_overlay_mode = "background"  # or "debug" for visualization

    def _load_scene(self, options):
        self.table = self._build_table()
        self.target = self._build_target_object()
        self.remove_object_from_greenscreen(self.robot)
        self.remove_object_from_greenscreen(self.target)

# Debug mode for verifying alignment
class DebugTwin(BaseDigitalTwinEnv):
    rgb_overlay_mode = "debug"  # 50/50 blend to inspect alignment
    rgb_overlay_paths = {"base_camera": "workspace.png"}

Related Pages

Page Connections

Double-click a node to navigate. Hold to expand connections.

Principle

Implementation

Heuristic

Environment