Fix some examples and plotting (with Qt)

kngwyu · kngwyu · commit 8e1780aedc6f · 2021-04-05T11:57:48.000+09:00
diff --git a/examples/bernoulli_gridworld.py b/examples/bernoulli_gridworld.py
@@ -8,7 +8,9 @@
 def select_domain(map_, noise, episode_cap, **kwargs):
     map_ = BernoulliGridWorld.default_map(map_ + ".txt")
     return BernoulliGridWorld(
-        map_, random_start=True, noise=noise, episode_cap=episode_cap,
+        map_,
+        noise=noise,
+        episode_cap=episode_cap,
     )
 
 
diff --git a/examples/deepsea.py b/examples/deepsea.py
@@ -22,5 +22,6 @@ def select_domain(size, noise, **kwargs):
             click.Option(["--epsilon-min"], type=float, default=None),
             click.Option(["--beta"], type=float, default=0.05),
             click.Option(["--show-reward"], is_flag=True),
+            click.Option(["--vi-threshold"], type=float, default=0.001),
         ],
     )
diff --git a/examples/fr_gridworld.py b/examples/fr_gridworld.py
@@ -9,7 +9,6 @@ def select_domain(map_, noise, step_penalty, episode_cap, **kwargs):
     map_ = FixedRewardGridWorld.default_map(map_ + ".txt")
     return FixedRewardGridWorld(
         map_,
-        random_start=True,
         noise=noise,
         step_penalty=step_penalty,
         episode_cap=episode_cap,
diff --git a/examples/gridworld.py b/examples/gridworld.py
@@ -9,7 +9,7 @@ def select_domain(map_, noise, **kwargs):
     random_goal = "RandomGoal" in map_
     map_ = GridWorld.default_map(map_ + ".txt")
     return GridWorld(
-        map_, random_start=True, random_goal=random_goal, noise=noise, episode_cap=20
+        map_, random_goal=random_goal, noise=noise, episode_cap=20
     )
 
 
diff --git a/examples/mdp-solvers/fr_gridworld.py b/examples/mdp-solvers/fr_gridworld.py
@@ -12,9 +12,7 @@
 
 def select_domain(map_, step_penalty, **kwargs):
     map_ = FixedRewardGridWorld.default_map(map_ + ".txt")
-    return FixedRewardGridWorld(
-        map_, random_start=True, noise=0.1, step_penalty=step_penalty
-    )
+    return FixedRewardGridWorld(map_, noise=0.1, step_penalty=step_penalty)
 
 
 def select_agent(name, domain, seed, threshold, **kwargs):
diff --git a/examples/mdp-solvers/gridworld.py b/examples/mdp-solvers/gridworld.py
@@ -12,7 +12,7 @@
 
 def select_domain(map_="4x5", **kwargs):
     map_ = GridWorld.default_map(map_ + ".txt")
-    return GridWorld(map_, random_start=True, noise=0.1)
+    return GridWorld(map_, noise=0.1)
 
 
 def select_agent(name, domain, seed, **kwargs):
diff --git a/rlpy/domains/grid_world.py b/rlpy/domains/grid_world.py
@@ -245,6 +245,7 @@ def show_domain(self, a=0, s=None, legend=False, noticks=False):
         self.agent_fig.remove()
         self.agent_fig = self._agent_fig(s)
         self.domain_fig.canvas.draw()
+        self.domain_fig.show()
         if JUPYTER_MODE:
             if self.domain_display is None:
                 self.domain_display = display(self.domain_fig, display_id=True)  # noqa
@@ -567,6 +568,8 @@ def _init_value_vis(self):
         self.vf_fig.show()
 
     def show_learning(self, representation):
+        import matplotlib as mpl
+
         if self.vf_ax is None:
             self._init_value_vis()
         self._reset_texts(self.vf_texts)
diff --git a/rlpy/domains/pinball.py b/rlpy/domains/pinball.py
@@ -108,7 +108,7 @@ def __init__(
         self.screen_width = screen_width
         self.screen_height = screen_height
 
-    def show_domain(self, _a=None):
+    def show_domain(self, a=None):
         if self.screen is None:
             tk_window = Tk()
             tk_window.title("RLPy Pinball")
@@ -117,7 +117,10 @@ def show_domain(self, _a=None):
             self.screen.configure(background="LightGray")
             self.screen.pack()
             self.environment_view = PinballView(
-                self.screen, width, height, self.environment,
+                self.screen,
+                width,
+                height,
+                self.environment,
             )
         self.environment_view.blit()
         self.screen.pack()
@@ -209,7 +212,11 @@ def __init__(
         for i in range(nrows * ncols):
             ax = self.fig.add_subplot(nrows, ncols, i + 1)
             img = ax.imshow(
-                dummy_data, cmap=cmap, interpolation="nearest", vmin=vmin, vmax=vmax,
+                dummy_data,
+                cmap=cmap,
+                interpolation="nearest",
+                vmin=vmin,
+                vmax=vmax,
             )
             cbar = ax.figure.colorbar(img, ax=ax)
             cbar.ax.set_ylabel("", rotation=-90, va="bottom")
@@ -234,7 +241,7 @@ def draw(self):
 
 class BallModel:
 
-    """ This class maintains the state of the ball
+    """This class maintains the state of the ball
     in the pinball domain. It takes care of moving
     it according to the current velocity and drag coefficient.
 
@@ -269,7 +276,7 @@ def step(self):
 
 class PinballObstacle:
 
-    """ This class represents a single polygon obstacle in the
+    """This class represents a single polygon obstacle in the
     pinball domain and detects when a :class:`BallModel` hits it.
 
     When a collision is detected, it also provides a way to
@@ -291,7 +298,7 @@ def __init__(self, points):
         self._intercept = None
 
     def collision(self, ball):
-        """ Determines if the ball hits this obstacle
+        """Determines if the ball hits this obstacle
         :param ball: An instance of :class:`BallModel`
         :type ball: :class:`BallModel`
         """
@@ -371,7 +378,7 @@ def _select_edge(self, intersect1, intersect2, ball):
         return intersect2
 
     def _angle(self, v1, v2):
-        """ Compute the angle difference between two vectors
+        """Compute the angle difference between two vectors
         :param v1: The x,y coordinates of the vector
         :type: v1: list
         :param v2: The x,y coordinates of the vector
@@ -424,7 +431,11 @@ class PinballTarget:
     """
 
     def __init__(
-        self, target_pos, target_rad, target_color="red", target_reward_scale=1.0,
+        self,
+        target_pos,
+        target_rad,
+        target_color="red",
+        target_reward_scale=1.0,
     ):
         if isinstance(target_pos[0], list):
             self.num_goals = len(target_pos)
@@ -481,8 +492,7 @@ class _DoubleCollision:
         pass
 
     def __init__(self, config_file, random_state):
-        """ Reads a configuration file for Pinball and draw the domain to screen
-        """
+        """Reads a configuration file for Pinball and draw the domain to screen"""
 
         self.random_state = random_state
         self.action_effects = {
@@ -522,7 +532,7 @@ def sample_start(self):
         return self.start_positions[idx].copy()
 
     def get_state(self):
-        """ Access the current 4-dimensional state vector.
+        """Access the current 4-dimensional state vector.
         :returns: a list containing the x position, y position, xdot, ydot
         :rtype: np.ndarray
         """
@@ -542,7 +552,7 @@ def _detect_collision(self):
             return PinballModel._Collision(dxdy)
 
     def take_action(self, action):
-        """ Take a step in the environment
+        """Take a step in the environment
 
         :param action: The action to apply over the ball
         :type action: int
diff --git a/rlpy/tools/plotting.py b/rlpy/tools/plotting.py
@@ -4,7 +4,6 @@
 import matplotlib as mpl
 from matplotlib import cm, colors, lines, rc  # noqa
 from matplotlib import pylab as pl
-from matplotlib import pyplot as plt
 from matplotlib import patches as mpatches  # noqa
 from matplotlib import path as mpath  # noqa
 import numpy as np
@@ -27,6 +26,8 @@ def jupyter_mode(mode=True):
 
 
 def nogui_mode():
+    from matplotlib import pyplot as plt
+
     mpl.use("agg")
     plt.ioff()
 
@@ -38,9 +39,13 @@ def _stub(*args, **kwargs):
 
 # Try GUI backend first
 try:
-    mpl.use("tkAgg")
+    mpl.use("TkAgg")
+    from matplotlib import pyplot as plt
+
     plt.ion()
 except ImportError:
+    from matplotlib import pyplot as plt
+
     nogui_mode()
 
 

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,9 @@`
`8`	`8`	`def select_domain(map_, noise, episode_cap, **kwargs):`
`9`	`9`	`map_ = BernoulliGridWorld.default_map(map_ + ".txt")`
`10`	`10`	`return BernoulliGridWorld(`
`11`		`- map_, random_start=True, noise=noise, episode_cap=episode_cap,`
	`11`	`+ map_,`
	`12`	`+ noise=noise,`
	`13`	`+ episode_cap=episode_cap,`
`12`	`14`	`)`
`13`	`15`
`14`	`16`
Original file line number	Diff line number	Diff line change
`@@ -22,5 +22,6 @@ def select_domain(size, noise, **kwargs):`
`22`	`22`	`click.Option(["--epsilon-min"], type=float, default=None),`
`23`	`23`	`click.Option(["--beta"], type=float, default=0.05),`
`24`	`24`	`click.Option(["--show-reward"], is_flag=True),`
	`25`	`+ click.Option(["--vi-threshold"], type=float, default=0.001),`
`25`	`26`	`],`
`26`	`27`	`)`
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@ def select_domain(map_, noise, **kwargs):`
`9`	`9`	`random_goal = "RandomGoal" in map_`
`10`	`10`	`map_ = GridWorld.default_map(map_ + ".txt")`
`11`	`11`	`return GridWorld(`
`12`		`- map_, random_start=True, random_goal=random_goal, noise=noise, episode_cap=20`
	`12`	`+ map_, random_goal=random_goal, noise=noise, episode_cap=20`
`13`	`13`	`)`
`14`	`14`
`15`	`15`