google-deepmind · copybara-service · Apr 8, 2025 · Mar 15, 2025 · Mar 16, 2025 · Mar 17, 2025
diff --git a/python/mujoco/simulate.cc b/python/mujoco/simulate.cc
@@ -91,6 +91,7 @@ class SimulateWrapper {
 
   void Destroy() {
     if (simulate_) {
+      ClearImages();
       delete simulate_;
       simulate_ = nullptr;
       destroyed_.store(1);
@@ -148,6 +149,59 @@ class SimulateWrapper {
 
   void ClearFigures() { simulate_->user_figures_.clear(); }
 
+  void SetTexts(
+      const std::vector<std::tuple<int, int, std::string, std::string>>& texts) {
+    // Collection of [font, gridpos, text1, text2] tuples for overlay text
+    std::vector<std::tuple<int, int, std::string, std::string>> user_texts;
+    for (const auto& [font, gridpos, text1, text2] : texts) {
+      user_texts.push_back(std::make_tuple(font, gridpos, text1, text2));
+    }
+
+    // Set them all at once to prevent text flickering.
+    simulate_->user_texts_ = user_texts;
+  }
+
+  void ClearTexts() { simulate_->user_texts_.clear(); }
+
+  void SetImages(
+    const std::vector<std::tuple<mjrRect, pybind11::array&>> viewports_images
+  ) {
+    // Clear previous images to prevent memory leaks
+    ClearImages();
+
+    for (const auto& [viewport, image] : viewports_images) {
+      auto buf = image.request();
+      if (buf.ndim != 3) {
+        throw std::invalid_argument("image must have 3 dimensions (H, W, C)");
+      }
+      if (static_cast<int>(buf.shape[2]) != 3) {
+        throw std::invalid_argument("image must have 3 channels");
+      }
+      if (buf.itemsize != sizeof(unsigned char)) {
+        throw std::invalid_argument("image must be uint8 format");
+      }
+
+      // Calculate size of the image data
+      size_t height = buf.shape[0];
+      size_t width = buf.shape[1];
+      size_t size = height * width * 3;
+
+      // Make a copy of the image data to prevent flickering
+      unsigned char* image_copy = new unsigned char[size];
+      std::memcpy(image_copy, buf.ptr, size);
+
+      simulate_->user_images_.push_back(std::make_tuple(viewport, image_copy));
+    }
+  }
+
+  void ClearImages() { 
+    // Free memory for each image before clearing the vector
+    for (const auto& [viewport, image_ptr] : simulate_->user_images_) {
+      delete[] image_ptr;
+    }
+    simulate_->user_images_.clear(); 
+  }
+
  private:
   mujoco::Simulate* simulate_;
   std::atomic_int destroyed_ = 0;
@@ -249,6 +303,12 @@ PYBIND11_MODULE(_simulate, pymodule) {
       .def("set_figures", &SimulateWrapper::SetFigures,
            py::arg("viewports_figures"))
       .def("clear_figures", &SimulateWrapper::ClearFigures)
+      .def("set_texts", &SimulateWrapper::SetTexts,
+           py::arg("overlay_texts"))
+      .def("clear_texts", &SimulateWrapper::ClearTexts)
+      .def("set_images", &SimulateWrapper::SetImages,
+           py::arg("viewports_images"))
+      .def("clear_images", &SimulateWrapper::ClearImages)
       .def_property_readonly("m", &SimulateWrapper::GetModel)
       .def_property_readonly("d", &SimulateWrapper::GetData)
       .def_property_readonly("viewport", &SimulateWrapper::GetViewport)

diff --git a/python/mujoco/viewer.py b/python/mujoco/viewer.py
@@ -23,7 +23,7 @@
 import sys
 import threading
 import time
-from typing import Callable, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
 import weakref
 
 import glfw
@@ -115,16 +115,96 @@ def viewport(self):
       return sim.viewport
     return None
 
-  def set_figures(self, viewports_figures):
+  def set_figures(
+      self, viewports_figures: Union[Tuple[mujoco.MjrRect, mujoco.MjvFigure],
+                                   List[Tuple[mujoco.MjrRect, mujoco.MjvFigure]]]
+  ):
+    """Overlay figures on the viewer.
+
+    Args:
+      viewports_figures: Single tuple or list of tuples of (viewport, figure)
+        viewport: Rectangle defining position and size of the figure
+        figure: MjvFigure object containing the figure data to display
+    """
     sim = self._sim()
     if sim is not None:
+      # Convert single tuple to list if needed
+      if isinstance(viewports_figures, tuple):
+        viewports_figures = [viewports_figures]
       sim.set_figures(viewports_figures)
 
   def clear_figures(self):
     sim = self._sim()
     if sim is not None:
       sim.clear_figures()
 
+  def set_texts(self, texts: Union[Tuple[Optional[int], Optional[int], Optional[str], Optional[str]], 
+                                            List[Tuple[Optional[int], Optional[int], Optional[str], Optional[str]]]]):
+    """Overlay text on the viewer.
+
+    Args:
+      texts: Single tuple or list of tuples of (font, gridpos, text1, text2)
+        font: Font style from mujoco.mjtFontScale
+        gridpos: Position of text box from mujoco.mjtGridPos
+        text1: Left text column, defaults to empty string if None
+        text2: Right text column, defaults to empty string if None
+    """
+    sim = self._sim()
+    if sim is not None:
+      # Convert single tuple to list if needed
+      if isinstance(texts, tuple):
+        texts = [texts]
+
+      # Convert None values to empty strings
+      default_font = mujoco.mjtFontScale.mjFONTSCALE_150
+      default_gridpos = mujoco.mjtGridPos.mjGRID_TOPLEFT
+      processed_texts = [(
+                        default_font if font is None else font, 
+                        default_gridpos if gridpos is None else gridpos, 
+                         "" if text1 is None else text1,
+                         "" if text2 is None else text2)
+                        for font, gridpos, text1, text2 in texts]
+
+      sim.set_texts(processed_texts)
+
+  def clear_texts(self):
+    sim = self._sim()
+    if sim is not None:
+      sim.clear_texts()
+
+  def set_images(
+      self, viewports_images: Union[Tuple[mujoco.MjrRect, np.ndarray],
+                                  List[Tuple[mujoco.MjrRect, np.ndarray]]]
+  ):
+    """Overlay images on the viewer.
+
+    Args:
+      viewports_images: Single tuple or list of tuples of (viewport, image)
+        viewport: Rectangle defining position and size of the image
+        image: RGB image with shape (height, width, 3)
+    """
+    sim = self._sim()
+    if sim is not None:
+      # Convert single tuple to list if needed
+      if isinstance(viewports_images, tuple):
+        viewports_images = [viewports_images]
+
+      processed_images = []
+      for viewport, image in viewports_images:
+        targ_shape = (viewport.height, viewport.width)
+        # Check if image is already the correct shape
+        if image.shape[:2] != targ_shape:
+          raise ValueError(f"Image shape {image.shape[:2]} does not match target shape {targ_shape}")
+        flipped = np.flip(image, axis=0)
+        contiguous = np.ascontiguousarray(flipped)
+        processed_images.append((viewport, contiguous))
+      sim.set_images(processed_images)
+
+  def clear_images(self):
+    sim = self._sim()
+    if sim is not None:
+      sim.clear_images()
+
   def close(self):
     sim = self._sim()
     if sim is not None:

diff --git a/simulate/simulate.cc b/simulate/simulate.cc
@@ -546,6 +546,14 @@ void ShowFigure(mj::Simulate* sim, mjrRect viewport, mjvFigure* fig){
   mjr_figure(viewport, fig, &sim->platform_ui->mjr_context());
 }
 
+void ShowOverlayText(mj::Simulate* sim, mjrRect viewport, int font, int gridpos, std::string text1, std::string text2){
+  mjr_overlay(font, gridpos, viewport, text1.c_str(), text2.c_str(), &sim->platform_ui->mjr_context());
+}
+
+void ShowImage(mj::Simulate* sim, mjrRect viewport, const unsigned char* image) {
+  mjr_drawPixels(image, nullptr, viewport, &sim->platform_ui->mjr_context());
+}
+
 // load state from history buffer
 static void LoadScrubState(mj::Simulate* sim) {
   // get index into circular buffer
@@ -2597,6 +2605,16 @@ void Simulate::Render() {
     ShowFigure(this, viewport, &figure);
   }
 
+  // overlay text
+  for (auto& [font, gridpos, text1, text2] : this->user_texts_) {
+    ShowOverlayText(this, rect, font, gridpos, text1, text2);
+  }
+
+  // user images
+  for (auto& [viewport, image] : this->user_images_) {
+    ShowImage(this, viewport, image);
+  }
+
   // finalize
   this->platform_ui->SwapBuffers();
 }

diff --git a/simulate/simulate.h b/simulate/simulate.h
@@ -249,10 +249,12 @@ class Simulate {
   mjvFigure figsize = {};
   mjvFigure figsensor = {};
 
-  // additional user-defined visualization geoms (used in passive mode)
+  // additional user-defined visualization
   mjvScene* user_scn = nullptr;
   mjtByte user_scn_flags_prev_[mjNRNDFLAG];
   std::vector<std::pair<mjrRect, mjvFigure>> user_figures_;
+  std::vector<std::tuple<int, int, std::string, std::string>> user_texts_;
+  std::vector<std::tuple<mjrRect, unsigned char*>> user_images_;
 
   // OpenGL rendering and UI
   int refresh_rate = 60;