kngwyu
diff --git a/‎.gitignore
+2 b/‎.gitignore
+2
diff --git a/‎examples/pinball.py
+10-3 b/‎examples/pinball.py
+10-3
diff --git a/‎rlpy/domains/PinballConfigs/pinball_box.cfg
-10 b/‎rlpy/domains/PinballConfigs/pinball_box.cfg
-10
diff --git a/‎rlpy/domains/PinballConfigs/pinball_box.json
+13 b/‎rlpy/domains/PinballConfigs/pinball_box.json
+13
diff --git a/‎rlpy/domains/PinballConfigs/pinball_empty.cfg
-8 b/‎rlpy/domains/PinballConfigs/pinball_empty.cfg
-8
diff --git a/‎rlpy/domains/PinballConfigs/pinball_empty.json
+12 b/‎rlpy/domains/PinballConfigs/pinball_empty.json
+12
diff --git a/‎rlpy/domains/PinballConfigs/pinball_hard_single.cfg
-22 b/‎rlpy/domains/PinballConfigs/pinball_hard_single.cfg
-22
diff --git a/‎rlpy/domains/PinballConfigs/pinball_hard_single.json
+25 b/‎rlpy/domains/PinballConfigs/pinball_hard_single.json
+25
diff --git a/‎rlpy/domains/PinballConfigs/pinball_medium.cfg
-15 b/‎rlpy/domains/PinballConfigs/pinball_medium.cfg
-15
diff --git a/‎rlpy/domains/PinballConfigs/pinball_medium.json
+18 b/‎rlpy/domains/PinballConfigs/pinball_medium.json
+18
diff --git a/‎rlpy/domains/PinballConfigs/pinball_simple_single.cfg
-15 b/‎rlpy/domains/PinballConfigs/pinball_simple_single.cfg
-15
diff --git a/‎rlpy/domains/PinballConfigs/pinball_simple_single.json
+18 b/‎rlpy/domains/PinballConfigs/pinball_simple_single.json
+18
diff --git a/‎rlpy/domains/pinball.py
+36-54 b/‎rlpy/domains/pinball.py
+36-54
@@ -17,7 +17,9 @@ Config.py
 dist
 *.egg-info
 *.bak
+Untitled.ipynb
 .eggs
+/**/.ipynb_checkpoints
 /**/Results
 /**/Result
 MANIFEST
@@ -1,14 +1,18 @@
+import click
 from rlpy.domains import Pinball
 from rlpy.tools.cli import run_experiment
 
 import methods
 
 
-def select_domain(noise=0.1):
-    return Pinball(noise=noise)
+def select_domain(cfg, noise=0.1):
+    if not cfg.startswith("pinball_"):
+        cfg = "pinball_" + cfg
+    cfg = Pinball.default_cfg(cfg + ".json")
+    return Pinball(noise=noise, config_file=cfg)
 
 
-def select_agent(name, domain, max_steps, seed):
+def select_agent(name, domain, max_steps, seed, **kwargs):
     if name is None or name == "fourier-q":
         return methods.fourier_q(domain, order=5)
     elif name == "fourier-sarsa":
@@ -36,4 +40,7 @@ def select_agent(name, domain, max_steps, seed):
         default_max_steps=100000,
         default_num_policy_checks=30,
         default_checks_per_policy=1,
+        other_options=[
+            click.Option(["--cfg"], type=str, default="pinball_simple_single")
+        ],
     )
@@ -0,0 +1,13 @@
+{
+  "ball_rad": 0.02,
+  "obstacles": [
+    [[0.0, 0.0], [0.0, 0.01], [1.0, 0.01], [1.0, 0.0]],
+    [[0.0, 0.0], [0.01, 0.0], [0.01, 1.0], [0.0, 1.0]],
+    [[0.0, 1.0], [0.0, 0.99], [1.0, 0.99], [1.0, 1.0]],
+    [[1.0, 1.0], [0.99, 1.0], [0.99, 0.0], [1.0, 0.0]],
+    [[0.45, 0.45], [0.55, 0.45], [0.55, 0.55], [0.45, 0.55]]
+  ],
+  "start_pos": [[0.2, 0.9]],
+  "target_pos": [0.9, 0.2],
+  "target_rad": 0.04
+}
@@ -0,0 +1,12 @@
+{
+  "ball_rad": 0.02,
+  "obstacles": [
+    [[0.0, 0.0], [0.0, 0.01], [1.0, 0.01], [1.0, 0.0]],
+    [[0.0, 0.0], [0.01, 0.0], [0.01, 1.0], [0.0, 1.0]],
+    [[0.0, 1.0], [0.0, 0.99], [1.0, 0.99], [1.0, 1.0]],
+    [[1.0, 1.0], [0.99, 1.0], [0.99, 0.0], [1.0, 0.0]]
+  ],
+  "start_pos": [[0.2, 0.9]],
+  "target_pos": [0.9, 0.2],
+  "target_rad": 0.04
+}
@@ -0,0 +1,25 @@
+{
+  "ball_rad": 0.015,
+  "obstacles": [
+    [[0.0, 0.0], [0.0, 0.01], [1.0, 0.01], [1.0, 0.0]],
+    [[0.0, 0.0], [0.01, 0.0], [0.01, 1.0], [0.0, 1.0]],
+    [[0.0, 1.0], [0.0, 0.99], [1.0, 0.99], [1.0, 1.0]],
+    [[1.0, 1.0], [0.99, 1.0], [0.99, 0.0], [1.0, 0.0]],
+    [[0.034, 0.852], [0.106, 0.708], [0.33199999999999996, 0.674], [0.17599999999999996, 0.618], [0.028, 0.718]],
+    [[0.15, 0.7559999999999999], [0.142, 0.93], [0.232, 0.894], [0.238, 0.99], [0.498, 0.722]],
+    [[0.8079999999999999, 0.91], [0.904, 0.784], [0.7799999999999999, 0.572], [0.942, 0.562], [0.952, 0.82], [0.874, 0.934]],
+    [[0.768, 0.814], [0.692, 0.548], [0.594, 0.47], [0.606, 0.804], [0.648, 0.626]],
+    [[0.22799999999999998, 0.5760000000000001], [0.39, 0.322], [0.3400000000000001, 0.31400000000000006], [0.184, 0.456]],
+    [[0.09, 0.228], [0.242, 0.076], [0.106, 0.03], [0.022, 0.178]],
+    [[0.11, 0.278], [0.24600000000000002, 0.262], [0.108, 0.454], [0.16, 0.566], [0.064, 0.626], [0.016, 0.438]],
+    [[0.772, 0.1], [0.71, 0.20599999999999996], [0.77, 0.322], [0.894, 0.09600000000000002], [0.8039999999999999, 0.17600000000000002]],
+    [[0.698, 0.476], [0.984, 0.27199999999999996], [0.908, 0.512]],
+    [[0.45, 0.39199999999999996], [0.614, 0.25799999999999995], [0.7340000000000001, 0.438]],
+    [[0.476, 0.868], [0.552, 0.8119999999999999], [0.62, 0.902], [0.626, 0.972], [0.49, 0.958]],
+    [[0.61, 0.014000000000000002], [0.58, 0.094], [0.774, 0.05000000000000001], [0.63, 0.054000000000000006]],
+    [[0.33399999999999996, 0.014], [0.27799999999999997, 0.03799999999999998], [0.368, 0.254], [0.7, 0.20000000000000004], [0.764, 0.108], [0.526, 0.158]],
+    [[0.294, 0.584], [0.478, 0.626], [0.482, 0.574], [0.324, 0.434], [0.35, 0.39], [0.572, 0.52], [0.588, 0.722], [0.456, 0.668]]],
+  "start_pos": [[0.055, 0.95]],
+  "target_pos": [0.5, 0.06],
+  "target_rad": 0.04
+}
@@ -0,0 +1,18 @@
+{
+  "ball_rad": 0.02,
+  "obstacles": [
+    [[0.0, 0.0], [0.0, 0.01], [1.0, 0.01], [1.0, 0.0]],
+    [[0.0, 0.0], [0.01, 0.0], [0.01, 1.0], [0.0, 1.0]],
+    [[0.0, 1.0], [0.0, 0.99], [1.0, 0.99], [1.0, 1.0]],
+    [[1.0, 1.0], [0.99, 1.0], [0.99, 0.0], [1.0, 0.0]],
+    [[0.09, 0.228], [0.242, 0.076], [0.106, 0.03], [0.022, 0.178]],
+    [[0.33399999999999996, 0.014], [0.27799999999999997, 0.03799999999999998], [0.368, 0.254], [0.7, 0.20000000000000004], [0.764, 0.108], [0.526, 0.158]],
+    [[0.034, 0.852], [0.106, 0.708], [0.33199999999999996, 0.674], [0.17599999999999996, 0.618], [0.028, 0.718]],
+    [[0.45, 0.39199999999999996], [0.614, 0.25799999999999995], [0.7340000000000001, 0.438]],
+    [[0.33399999999999996, 0.014], [0.27799999999999997, 0.03799999999999998], [0.368, 0.254], [0.7, 0.20000000000000004], [0.764, 0.108], [0.526, 0.158]],
+    [[0.294, 0.584], [0.478, 0.626], [0.482, 0.574], [0.324, 0.434], [0.35, 0.39], [0.572, 0.52], [0.588, 0.722], [0.456, 0.668]]
+  ],
+  "start_pos": [[0.2, 0.9]],
+  "target_pos": [0.9, 0.2],
+  "target_rad": 0.04
+}
@@ -0,0 +1,18 @@
+{
+  "ball_rad": 0.02,
+  "obstacles": [
+    [[0.0, 0.0], [0.0, 0.01], [1.0, 0.01], [1.0, 0.0]],
+    [[0.0, 0.0], [0.01, 0.0], [0.01, 1.0], [0.0, 1.0]],
+    [[0.0, 1.0], [0.0, 0.99], [1.0, 0.99], [1.0, 1.0]],
+    [[1.0, 1.0], [0.99, 1.0], [0.99, 0.0], [1.0, 0.0]],
+    [[0.35, 0.4], [0.45, 0.55], [0.43, 0.65], [0.3, 0.7], [0.45, 0.7], [0.5, 0.6], [0.45, 0.35]],
+    [[0.2, 0.6], [0.25, 0.55], [0.15, 0.5], [0.15, 0.45], [0.2, 0.3], [0.12, 0.27], [0.075, 0.35], [0.09, 0.55]],
+    [[0.3, 0.8], [0.6, 0.75], [0.8, 0.8], [0.8, 0.9], [0.6, 0.85], [0.3, 0.9]],
+    [[0.8, 0.7], [0.975, 0.65], [0.75, 0.5], [0.9, 0.3], [0.7, 0.35], [0.63, 0.65]],
+    [[0.6, 0.25], [0.3, 0.07], [0.15, 0.175], [0.15, 0.2], [0.3, 0.175], [0.6, 0.3]],
+    [[0.75, 0.025], [0.8, 0.24], [0.725, 0.27], [0.7, 0.025]]
+  ],
+  "start_pos": [[0.2, 0.9]],
+  "target_pos": [0.9, 0.2],
+  "target_rad": 0.04
+}
@@ -4,7 +4,7 @@
 import numpy as np
 from itertools import tee
 import itertools
-import os
+from pathlib import Path
 
 try:
     from tkinter import Tk, Canvas
@@ -57,24 +57,24 @@ class Pinball(Domain):
     """
 
     #: default location of config files shipped with rlpy
-    default_config_dir = os.path.join(__rlpy_location__, "domains", "PinballConfigs")
+    DEFAULT_CONFIG_DIR = Path(__rlpy_location__).joinpath("domains/PinballConfigs")
+
+    @classmethod
+    def default_cfg(cls, name="pinball_simple_single.json"):
+        return cls.DEFAULT_CONFIG_DIR.joinpath(name)
 
     def __init__(
         self,
         noise=0.1,
         episode_cap=1000,
-        configuration=os.path.join(default_config_dir, "pinball_simple_single.cfg"),
+        config_file=DEFAULT_CONFIG_DIR.joinpath("pinball_simple_single.json"),
     ):
         """
-        configuration:
-            location of the configuration file
-        episode_cap:
-            maximum length of an episode
-        noise:
-            with probability noise, a uniformly random action is executed
+        :param config_file: Location of the configuration file.
+        :param episode_cap: Maximum length of an episode
+        :param noise: With probability noise, a uniformly random action is executed
         """
         self.NOISE = noise
-        self.configuration = configuration
         self.screen = None
         self.actions = [
             PinballModel.ACC_X,
@@ -91,14 +91,12 @@ def __init__(
             continuous_dims=[4],
             episode_cap=episode_cap,
         )
-        self.environment = PinballModel(
-            self.configuration, random_state=self.random_state
-        )
+        self.environment = PinballModel(config_file, random_state=self.random_state)
 
     def show_domain(self, a):
         if self.screen is None:
             master = Tk()
-            master.title("RLPY Pinball")
+            master.title("RLPy Pinball")
             self.screen = Canvas(master, width=500.0, height=500.0)
             self.screen.configure(background="LightGray")
             self.screen.pack()
@@ -149,7 +147,7 @@ def is_terminal(self):
         return self.environment.episode_ended()
 
 
-class BallModel(object):
+class BallModel:
 
     """ This class maintains the state of the ball
     in the pinball domain. It takes care of moving
@@ -202,7 +200,7 @@ def _clip(self, val, low=-2, high=2):
         return val
 
 
-class PinballObstacle(object):
+class PinballObstacle:
 
     """ This class represents a single polygon obstacle in the
     pinball domain and detects when a :class:`BallModel` hits it.
@@ -216,7 +214,7 @@ def __init__(self, points):
         :param points: A list of points defining the polygon
         :type points: list of lists
         """
-        self.points = points
+        self.points = np.array(points)
         self.min_x = min(self.points, key=lambda pt: pt[0])[0]
         self.max_x = max(self.points, key=lambda pt: pt[0])[0]
         self.min_y = min(self.points, key=lambda pt: pt[1])[1]
@@ -375,7 +373,7 @@ def _intercept_edge(self, pt_pair, ball):
             return False
 
 
-class PinballModel(object):
+class PinballModel:
 
     """ This class is a self-contained model of the pinball
     domain for reinforcement learning.
@@ -395,13 +393,8 @@ class PinballModel(object):
     THRUST_PENALTY = -5
     END_EPISODE = 10000
 
-    def __init__(self, configuration, random_state=np.random.RandomState()):
+    def __init__(self, config_file, random_state):
         """ Read a configuration file for Pinball and draw the domain to screen
-
-    :param configuration: a configuration file containing the polygons,
-        source(s) and target location.
-    :type configuration: str
-
         """
 
         self.random_state = random_state
@@ -412,33 +405,23 @@ def __init__(self, configuration, random_state=np.random.RandomState()):
             self.DEC_Y: (0, -1),
             self.ACC_NONE: (0, 0),
         }
+        import json
 
         # Set up the environment according to the configuration
-        self.obstacles = []
-        self.target_pos = []
-        self.target_rad = 0.01
-
-        ball_rad = 0.01
-        start_pos = []
-        with open(configuration) as fp:
-            for line in fp.readlines():
-                tokens = line.strip().split()
-                if not len(tokens):
-                    continue
-                elif tokens[0] == "polygon":
-                    self.obstacles.append(
-                        PinballObstacle(list(zip(*[iter(map(float, tokens[1:]))] * 2)))
-                    )
-                elif tokens[0] == "target":
-                    self.target_pos = [float(tokens[1]), float(tokens[2])]
-                    self.target_rad = float(tokens[3])
-                elif tokens[0] == "start":
-                    start_pos = list(zip(*[iter(map(float, tokens[1:]))] * 2))
-                elif tokens[0] == "ball":
-                    ball_rad = float(tokens[1])
+        with config_file.open() as f:
+            config = json.load(f)
+        try:
+            self.obstacles = list(map(PinballObstacle, config["obstacles"]))
+            self.target_pos = config["target_pos"]
+            self.target_rad = config["target_rad"]
+            start_pos = config["start_pos"]
+            ball_rad = config["ball_rad"]
+        except KeyError as e:
+            raise KeyError(f"Pinball config doesn't have a key: {e}")
+
         self.start_pos = start_pos[0]
-        a = self.random_state.randint(len(start_pos))
-        self.ball = BallModel(list(start_pos[a]), ball_rad)
+        start_idx = self.random_state.randint(len(start_pos))
+        self.ball = BallModel(list(start_pos[start_idx]), ball_rad)
 
     def get_state(self):
         """ Access the current 4-dimensional state vector
@@ -520,7 +503,7 @@ def _check_bounds(self):
             self.ball.position[1] = 0.05
 
 
-class PinballView(object):
+class PinballView:
 
     """ This class displays a :class:`PinballModel`
 
@@ -592,14 +575,13 @@ def blit(self):
 
 def run_pinballview(width, height, configuration):
     """
-
-        Changed from original Pierre-Luc Bacon implementation to reflect
-        the visualization changes in the PinballView Class.
-
+    Changed from original Pierre-Luc Bacon implementation to reflect
+    the visualization changes in the PinballView Class.
     """
+
     width, height = float(width), float(height)
     master = Tk()
-    master.title("RLPY Pinball")
+    master.title("RLPy Pinball")
     screen = Canvas(master, width=500.0, height=500.0)
     screen.configure(background="LightGray")
     screen.pack()