diff --git a/CartPole/__init__.py b/CartPole/__init__.py
index 302ffdf5..071a36b8 100644
--- a/CartPole/__init__.py
+++ b/CartPole/__init__.py
@@ -10,9 +10,12 @@
 import csv
 # Import module to interact with OS
 import os
+import sys
 import traceback
 # Import module to get a current time and date used to name the files containing the history of simulations
 from datetime import datetime
+from typing import Optional
+
 # To detect the latest csv file
 
 import numpy as np
@@ -22,9 +25,9 @@
 from Control_Toolkit.others.globals_and_utils import (
     get_available_controller_names, get_available_optimizer_names, get_controller_name, get_optimizer_name, import_controller_by_name)
 from others.globals_and_utils import MockSpace, create_rng, load_config
-from others.p_globals import (P_GLOBALS, J_fric, L, m_cart, M_fric, TrackHalfLength,
-                              controlBias, controlDisturbance, export_globals,
-                              g, k, m_pole, u_max, v_max)
+from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength,
+                              controlBias, controlDisturbance, g, k, m_pole, u_max, v_max, cart_bounce_factor, NaturalPeriod,
+                              export_globals)
 # Interpolate function to create smooth random track
 from scipy.interpolate import BPoly, interp1d
 # Run range() automatically adding progress bar in terminal
@@ -40,6 +43,9 @@
 from CartPole.state_utilities import (ANGLE_COS_IDX, ANGLE_IDX, ANGLE_SIN_IDX,
                                       ANGLED_IDX, POSITION_IDX, POSITIOND_IDX)
 
+from Control_Toolkit.others.get_logger import get_logger
+log = get_logger(__name__)
+
 # region Imported modules
 
 try:
@@ -143,7 +149,7 @@ def __init__(self, initial_state=s0, path_to_experiment_recordings=None):
         # region Variables controlling operation of the program - should not be modified directly
         self.save_flag = False  # Signalizes that the current time step should be saved
         self.csv_filepath = None  # Where to save the experiment history.
-        self.controller = None  # Placeholder for the currently used controller function
+        self.controller:template_controller = Optional[None]  # Placeholder for the currently used controller function
         self.controller_name = ''  # Placeholder for the currently used controller name
         self.optimizer_name = ''  # Placeholder for the currently used optimizer name
         self.controller_idx = None  # Placeholder for the currently used controller index
@@ -199,7 +205,7 @@ def __init__(self, initial_state=s0, path_to_experiment_recordings=None):
         self.slider_max = 1.0
         self.slider_value = 0.0
 
-        self.show_hanging_pole = False
+        self.show_hanging_pole = True
 
         self.physical_to_graphics = None
         self.graphics_to_physical = None
@@ -446,6 +452,7 @@ def cartpole_integration(self):
 
     def edge_bounce(self):
         # Elastic collision at edges
+        # TODO should be semielastic
         self.s[ANGLE_IDX], self.s[ANGLED_IDX], self.s[POSITION_IDX], self.s[POSITIOND_IDX] = edge_bounce_numba(
             self.s[ANGLE_IDX],
             np.cos(self.s[ANGLE_IDX]),
@@ -460,6 +467,10 @@ def edge_bounce(self):
     # This function should be called for the first time to calculate 0th time step
     # Otherwise it goes out of sync with saving
     def Update_Q(self):
+        """ Determine the dimensionless [-1,1] value of the motor power Q
+        This function should be called for the first time to calculate 0th time step
+        Otherwise it goes out of sync with saving,
+    """
         # Calculate time steps from last update
         # The counter should be initialized at max-1 to start with a control input update
         self.dt_controller_steps_counter += 1
@@ -471,7 +482,7 @@ def Update_Q(self):
                 # in this case slider corresponds already to the power of the motor
                 self.Q = self.slider_value
             else:  # in this case slider gives a target position, lqr regulator
-                self.Q = self.controller.step(self.s_with_noise_and_latency, self.time, {"target_position": self.target_position, "target_equilibrium": self.target_equilibrium})
+                self.Q = self.controller.step(self.s_with_noise_and_latency, self.time, updated_attributes= {"target_position": self.target_position, "target_equilibrium": self.target_equilibrium})
 
             self.dt_controller_steps_counter = 0
 
@@ -488,6 +499,11 @@ def update_parameters(self):
 
     # This method saves the dictionary keeping the history of simulation to a .csv file
     def save_history_csv(self, csv_name=None, mode='init', length_of_experiment='unknown'):
+        """ Saves history of cartpole state and control
+        :param csv_name: the filename base, .csv is appended if it is not there. self.path_to_experiment_recordings is prepended for path
+        :param mode: the mode for saving, default is 'init' which makes the timestamped folder etc TODO what are these modes???????
+        :param length_of_experiment: the duration of this experiment in seconds, written to header of CSV
+        """
 
         if mode == 'init':
 
@@ -550,8 +566,9 @@ def save_history_csv(self, csv_name=None, mode='init', length_of_experiment='unk
 
                 writer.writerow(['#'])
                 writer.writerow(['# Parameters:'])
-                for k in P_GLOBALS.__dict__:
-                    writer.writerow(['# ' + k + ': ' + str(getattr(P_GLOBALS, k))])
+                c = load_config("config.yml")
+                for k,v in c.items():
+                    writer.writerow(['# ' + k + ': ' + str(v)])
                 writer.writerow(['#'])
 
                 writer.writerow(['# Data:'])
@@ -900,8 +917,9 @@ def set_controller(self, controller_name=None, controller_idx=None):
                 )
 
             else:
+                log.debug(f'configuring controller "{self.controller}"')
                 self.controller.configure()
-            
+
                 
         # Set the maximal allowed value of the slider - relevant only for GUI
         if self.controller_name == 'manual-stabilization':
@@ -932,8 +950,10 @@ def set_cartpole_state_at_t0(self, reset_mode=1, s=None, target_position=None, r
             pass
 
         # reset global variables
-        global k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength
-        k[...], m_cart[...], m_pole[...], g[...], J_fric[...], M_fric[...], L[...], v_max[...], u_max[...], controlDisturbance[...], controlBias[...], TrackHalfLength[...] = export_globals()
+        global k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength, cart_bounce_factor, NaturalPeriod
+        # TODO why is ellipis object used here? https://stackoverflow.com/questions/772124/what-does-the-ellipsis-object-do
+        # these outputs of export_globals are numpy scalar arrays, i.e. each constant is a np.array with a single element
+        k[...], m_cart[...], m_pole[...], g[...], J_fric[...], M_fric[...], L[...], v_max[...], u_max[...], controlDisturbance[...], controlBias[...], TrackHalfLength[...], cart_bounce_factor[...], NaturalPeriod[...] = export_globals()
 
         self.time = 0.0
         if reset_mode == 0:  # Don't change it
@@ -1297,3 +1317,17 @@ def animationManage(i):
         return anim
 
     # endregion
+
+
+def is_physical_cartpole_running_and_control_enabled():
+    """ super hack to determine if we are running physical cartpole and control is turned on"""
+    if 'DriverFunctions' in sys.modules:  # if this module exists in sys.modules, we can deduce that physical-cartpole is running
+        try:
+            physical_cartpole_instance = sys.modules[
+                'DriverFunctions'].PhysicalCartPoleDriver.PhysicalCartPoleDriver.PhysicalCartPoleDriverInstance
+            if getattr(physical_cartpole_instance, 'controlEnabled') == True:
+                log.debug(f'physical cartpole present and control enabled')
+                return True
+        except Exception as e:
+            log.warning(f'Could not determine if control is enabled: {e}')
+            return False
diff --git a/CartPole/cartpole_model.py b/CartPole/cartpole_model.py
index f35b8358..505b2151 100644
--- a/CartPole/cartpole_model.py
+++ b/CartPole/cartpole_model.py
@@ -3,7 +3,7 @@
 import numpy as np
 from others.globals_and_utils import create_rng, load_config
 from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength,
-                              controlBias, controlDisturbance, g, k, m_pole, u_max, v_max)
+                              controlBias, controlDisturbance, g, k, m_pole, u_max, v_max,cart_bounce_factor)
 
 from CartPole.state_utilities import (ANGLE_COS_IDX, ANGLE_IDX, ANGLE_SIN_IDX,
                                       ANGLED_IDX, POSITION_IDX, POSITIOND_IDX,
@@ -30,8 +30,8 @@
 Should be the same up to the angle-direction-convention and notation changes.
 
 The convention:
-Pole upright position defines 0 angle
-Cart movement to the right is positive
+Pole upright position defines 0 angle, units of angle is in radians
+Cart movement to the right is positive, units are meters
 Clockwise angle rotation is defined as negative
 
 Required angle convention for CartPole GUI: CLOCK-NEG
@@ -146,12 +146,24 @@ def cartpole_ode(s: np.ndarray, u: float,
 
     return angleDD, positionDD
 
-def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L):
+def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L, cart_bounce_factor=cart_bounce_factor):
+    """ Models bounce at edge of cart track. Very simple complete elastic bounce currently.
+
+    :param angle:
+    :param angle_cos:
+    :param angleD:
+    :param position:
+    :param positionD:
+    :param t_step: the timestep in seconds
+    :param L: the pole length
+
+    :returns: angle, angleD, position, positionD
+    """
     if position >= TrackHalfLength or -position >= TrackHalfLength:  # Without abs to compile with tensorflow
-        angleD -= 2 * (positionD * angle_cos) / L
-        angle += angleD * t_step
-        positionD = -positionD
-        position += positionD * t_step
+        angleD -= 2 * (positionD * angle_cos) / L # TODO why this formula???
+        # angle += angleD * t_step # update angle according to new derivative of angle
+        positionD = -cart_bounce_factor*positionD # perfect bounce
+        # position += positionD * t_step # step back the amount of bounce
     return angle, angleD, position, positionD
 
 
diff --git a/CartPole/cartpole_model_tf.py b/CartPole/cartpole_model_tf.py
index 4a416d03..58ba6161 100644
--- a/CartPole/cartpole_model_tf.py
+++ b/CartPole/cartpole_model_tf.py
@@ -3,7 +3,7 @@
 import numpy as np
 import tensorflow as tf
 from others.globals_and_utils import create_rng, load_config
-from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength,
+from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength, cart_bounce_factor,
                               controlBias, controlDisturbance, g, k, m_pole, u_max,
                               v_max)
 from SI_Toolkit.Functions.TF.Compile import CompileTF
@@ -26,6 +26,7 @@
 controlDisturbance = tf.convert_to_tensor(controlDisturbance)
 controlBias = tf.convert_to_tensor(controlBias)
 TrackHalfLength = tf.convert_to_tensor(TrackHalfLength)
+cart_bounce_factor = tf.convert_to_tensor(cart_bounce_factor)
 
 
 rng = create_rng(__name__, config["cartpole"]["seed"])
@@ -71,7 +72,10 @@ def _cartpole_ode(ca, sa, angleD, positionD, u,
     Calculates current values of second derivative of angle and position
     from current value of angle and position, and their first derivatives
 
-    :param angle, angleD, position, positionD: Essential state information of cart
+    :param angle, angleD, position, positionD:
+          Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative.
+          Cart position is in meters, 0 at middle of track, positive to rightwards.
+          Essential state information of cart
     :param u: Force applied on cart in unnormalized range
 
     :returns: angular acceleration, horizontal acceleration
@@ -135,19 +139,46 @@ def cartpole_ode(s: np.ndarray, u: float,
     )
     return angleDD, positionDD
 
-def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L):
-    if position >= TrackHalfLength or -position >= TrackHalfLength:  # Without abs to compile with tensorflow
-        angleD -= 2 * (positionD * angle_cos) / L
-        angle += angleD * t_step
-        positionD = -positionD
-        position += positionD * t_step
+# @tf.function
+def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L, cart_bounce_factor=cart_bounce_factor):
+    """ Models bounce at edge of cart track. Very simple elastic bounce currently.
+
+    :param angle: Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative.
+    :param angle_cos:
+    :param angleD: rad/s. Positive means CCW rotation
+    :param position: meters, 0 at middle of table, positive rightwards
+    :param positionD: m/w, positive rightwards
+    :param t_step: the timestep in seconds
+    :param L: the pole length in meters
+    :param cart_bounce_factor: fraction of cart speed after bounce from edge
+
+    :returns: angle, angleD, position, positionD
+    """
+
+    i=tf.greater_equal(tf.abs(position),TrackHalfLength) # find those rollouts that go past edge of track
+
+    # for those that do, update the swing according to this dynamics
+    angleD = tf.where(i,angleD-2 * (positionD * angle_cos) / L, angleD) # TODO why this formula???
+    # don't update angle since the euler step will already do it
+    # angle = angle+angleD * t_step # update angle according to new derivative of angle
+    # and the cart velocity is reversed with some absorption
+    positionD = tf.where(i,-cart_bounce_factor*positionD,positionD) # imperfect bounce
+    # don't update position since Euler step will do it
+    # position = position+positionD * t_step  # step back the amount of bounce
+
+    # following is old serial code
+    # if position >= TrackHalfLength or -position >= TrackHalfLength:  # Without abs to compile with tensorflow
+    #     angleD -= 2 * (positionD * angle_cos) / L # TODO why this formula???
+    #     angle += angleD * t_step # update angle according to new derivative of angle
+    #     positionD = -cart_bounce_factor*positionD # perfect bounce
+    #     position += positionD * t_step # step back the amount of bounce
     return angle, angleD, position, positionD
 
 
 def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L=L):
     for i in range(position.size):
         angle[i], angleD[i], position[i], positionD[i] = edge_bounce(angle[i], angle_cos[i], angleD[i], position[i], positionD[i],
-                                                                     t_step, L)
+                                                                     t_step, L) # see cartpole_tf.py; note we no longer need the edge_bounce wrapper because edge bounce compiles as tf code
     return angle, angleD, position, positionD
 
 
diff --git a/CartPole/cartpole_tf.py b/CartPole/cartpole_tf.py
index e2ebab84..818c6a5e 100644
--- a/CartPole/cartpole_tf.py
+++ b/CartPole/cartpole_tf.py
@@ -1,6 +1,6 @@
 import tensorflow as tf
 from others.globals_and_utils import create_rng, load_config
-from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength,
+from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength,cart_bounce_factor,
                               controlBias, controlDisturbance, g, k, m_pole, u_max,
                               v_max)
 from SI_Toolkit.Functions.TF.Compile import CompileTF
@@ -25,11 +25,12 @@
 controlDisturbance = tf.convert_to_tensor(controlDisturbance)
 controlBias = tf.convert_to_tensor(controlBias)
 TrackHalfLength = tf.convert_to_tensor(TrackHalfLength)
+cart_bounce_factor = tf.convert_to_tensor(cart_bounce_factor)
 
 rng = create_rng(__name__, config["cartpole"]["seed"])
 
 ###
-# FIXME: Currently tf predictor is not modeling edge bounce!
+# TODO: Currently tf predictor is not modeling edge bounce!
 ###
 
 
@@ -56,7 +57,7 @@ def wrap_angle_rad(sin, cos):
 
 
 @CompileTF
-def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L=L):
+def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L=L, cart_bounce_factor=cart_bounce_factor):
     angle_bounced = tf.TensorArray(tf.float32, size=tf.size(angle), dynamic_size=False)
     angleD_bounced = tf.TensorArray(tf.float32, size=tf.size(angleD), dynamic_size=False)
     position_bounced = tf.TensorArray(tf.float32, size=tf.size(position), dynamic_size=False)
@@ -65,7 +66,7 @@ def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L
     for i in tf.range(tf.size(position)):
         angle_i, angleD_i, position_i, positionD_i = edge_bounce_tf(angle[i], angle_cos[i], angleD[i], position[i],
                                                                     positionD[i],
-                                                                    t_step, L)
+                                                                    t_step, L, cart_bounce_factor=cart_bounce_factor)
         angle_bounced = angle_bounced.write(i, angle_i)
         angleD_bounced = angleD_bounced.write(i, angleD_i)
         position_bounced = position_bounced.write(i, position_i)
@@ -127,10 +128,12 @@ def _cartpole_fine_integration_tf(angle, angleD,
                                                                      positionDD, t_step, )
 
         # The edge bounce calculation seems to be too much for a GPU to tackle
-        # angle_cos = tf.cos(angle)
-        # angle, angleD, position, positionD = edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L)
-
+        # TODO it is currently commented out in master branch
         angle_cos = tf.cos(angle)
+        angle, angleD, position, positionD = edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L, cart_bounce_factor)
+        # # note we no longer need the edge_bounce wrapper because edge_bounce compiles as tf code
+
+        # angle_cos = tf.cos(angle)
         angle_sin = tf.sin(angle)
 
         angle = wrap_angle_rad(angle_sin, angle_cos)
@@ -138,8 +141,10 @@ def _cartpole_fine_integration_tf(angle, angleD,
     return angle, angleD, position, positionD, angle_cos, angle_sin
 
 
+@CompileTF
 def cartpole_fine_integration_tf(s, u, t_step, intermediate_steps,
                                  k=k, m_cart=m_cart, m_pole=m_pole, g=g, J_fric=J_fric, M_fric=M_fric, L=L):
+    #print('test 5')
     """
     Calculates current values of second derivative of angle and position
     from current value of angle and position, and their first derivatives
diff --git a/CartPole/load.py b/CartPole/load.py
index 9db54677..bff031a6 100644
--- a/CartPole/load.py
+++ b/CartPole/load.py
@@ -3,6 +3,8 @@
 
 import os
 import glob
+from typing import Optional
+
 import pandas as pd
 
 def get_full_paths_to_csvs(default_locations='', csv_names=None):
@@ -87,7 +89,12 @@ def get_full_paths_to_csvs(default_locations='', csv_names=None):
 
 
 # load csv file with experiment recording (e.g. for replay)
-def load_csv_recording(file_path):
+def load_csv_recording(file_path:str)->pd.DataFrame:
+    """ Loads the recording CSV file
+    :param file_path: path to CSV including full filename with suffix
+
+    :returns: False if file not found or pd.DataFrame if found
+    """
     if isinstance(file_path, list):
         file_path = file_path[0]
 
diff --git a/CartPole/state_utilities.py b/CartPole/state_utilities.py
index 6afbee6e..ffc95f0e 100644
--- a/CartPole/state_utilities.py
+++ b/CartPole/state_utilities.py
@@ -6,6 +6,9 @@
     ["angle", "angleD", "angle_cos", "angle_sin", "position", "positionD",]
 )
 
+NUM_STATES:int=len(STATE_VARIABLES)
+
+# dict[state_name, index], e.g. STATE_INDICES['angle']=0
 STATE_INDICES = {x: np.where(STATE_VARIABLES == x)[0][0] for x in STATE_VARIABLES}
 
 CONTROL_INPUTS = np.sort(["Q"])
@@ -13,12 +16,12 @@
 CONTROL_INDICES = {x: np.where(CONTROL_INPUTS == x)[0][0] for x in CONTROL_INPUTS}
 
 """Define indices of values in state statically"""
-ANGLE_IDX = STATE_INDICES["angle"].item()
-ANGLED_IDX = STATE_INDICES["angleD"].item()
-POSITION_IDX = STATE_INDICES["position"].item()
-POSITIOND_IDX = STATE_INDICES["positionD"].item()
-ANGLE_COS_IDX = STATE_INDICES["angle_cos"].item()
-ANGLE_SIN_IDX = STATE_INDICES["angle_sin"].item()
+ANGLE_IDX = STATE_INDICES["angle"].item() # Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative.
+ANGLED_IDX = STATE_INDICES["angleD"].item() # Angular velocity of pole in rad/s, CCW is positive.
+POSITION_IDX = STATE_INDICES["position"].item() # Horizontal position of pole in meters.
+POSITIOND_IDX = STATE_INDICES["positionD"].item() # Horizontal velocity of pole in m/s. Cart movement to the right is positive.
+ANGLE_COS_IDX = STATE_INDICES["angle_cos"].item() # cos of angle, vertical means 1 and -1 means hanging down
+ANGLE_SIN_IDX = STATE_INDICES["angle_sin"].item() # sin of angle, 0 upright and hanging, +1 for leftwards, -1 for rightwards
 
 
 def create_cartpole_state(state: dict = {}, dtype=None) -> np.ndarray:
@@ -28,18 +31,22 @@ def create_cartpole_state(state: dict = {}, dtype=None) -> np.ndarray:
     Input parameters are passed as a dict with the following possible keys. Other keys are ignored.
     Unset key-value pairs are initialized to 0.
 
-    :param angle: Pole angle. 0 means pole is upright. Clockwise angle rotation is defined as negative.
-    :param angleD: Angular velocity of pole.
-    :param position: Horizontal position of pole.
-    :param positionD: Horizontal velocity of pole. Cart movement to the right is positive.
+    :param angle: Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative.
+    :param angleD: Angular velocity of pole in rad/s, CCW is positive.
+    :param position: Horizontal position of pole in meters.
+    :param positionD: Horizontal velocity of pole in m/s. Cart movement to the right is positive.
 
     :returns: A numpy.ndarray with values filled in order set by STATE_VARIABLES
     """
+    initial_pole_angle=0 # set to zero to start upright, np.pi to hang down
+
+    state['angle']=initial_pole_angle
+
     state["angle_cos"] = (
-        np.cos(state["angle"]) if "angle" in state.keys() else np.cos(0.0)
+        np.cos(state["angle"]) if "angle" in state.keys() else np.cos(initial_pole_angle)
     )
     state["angle_sin"] = (
-        np.sin(state["angle"]) if "angle" in state.keys() else np.sin(0.0)
+        np.sin(state["angle"]) if "angle" in state.keys() else np.sin(initial_pole_angle)
     )
 
     if dtype is None:
@@ -48,6 +55,7 @@ def create_cartpole_state(state: dict = {}, dtype=None) -> np.ndarray:
     s = np.zeros_like(STATE_VARIABLES, dtype=np.float32)
     for i, v in enumerate(STATE_VARIABLES):
         s[i] = state.get(v) if v in state.keys() else s[i]
+
     return s
 
 
diff --git a/Control_Toolkit b/Control_Toolkit
index 09a0a720..807c46de 160000
--- a/Control_Toolkit
+++ b/Control_Toolkit
@@ -1 +1 @@
-Subproject commit 09a0a7204d8f52656e8d056625b45ee398d243c8
+Subproject commit 807c46de404c542c4a074b99645be8e7c8397be0
diff --git a/Control_Toolkit_ASF/Controllers/controller_do_mpc.py b/Control_Toolkit_ASF/Controllers/controller_do_mpc.py
index 1d2f7d75..c2431fa9 100644
--- a/Control_Toolkit_ASF/Controllers/controller_do_mpc.py
+++ b/Control_Toolkit_ASF/Controllers/controller_do_mpc.py
@@ -9,7 +9,7 @@
 from CartPole.cartpole_model import Q2u, cartpole_ode_namespace
 from CartPole.state_utilities import cartpole_state_vector_to_namespace
 from Control_Toolkit.Controllers import template_controller
-from others.globals_and_utils import create_rng
+from others.globals_and_utils import create_rng, update_attributes
 from SI_Toolkit.computation_library import NumpyLibrary, TensorType
 
 
@@ -136,16 +136,16 @@ def tvp_fun(self, t_ind):
         return self.tvp_template
 
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+        update_attributes(updated_attributes,self)
 
-        s = cartpole_state_vector_to_namespace(s)
+        state = cartpole_state_vector_to_namespace(state)
 
-        self.x0['s.position'] = s.position
-        self.x0['s.positionD'] = s.positionD
+        self.x0['s.position'] = state.position
+        self.x0['s.positionD'] = state.positionD
 
-        self.x0['s.angle'] = s.angle
-        self.x0['s.angleD'] = s.angleD
+        self.x0['s.angle'] = state.angle
+        self.x0['s.angleD'] = state.angleD
 
         self.tvp_template['_tvp', :, 'target_position'] = self.target_position
 
diff --git a/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py b/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py
index 353792fa..dbf13dde 100644
--- a/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py
+++ b/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py
@@ -9,6 +9,7 @@
 from CartPole.state_utilities import cartpole_state_vector_to_namespace
 from Control_Toolkit.Controllers import template_controller
 from SI_Toolkit.computation_library import NumpyLibrary, TensorType
+from others.globals_and_utils import update_attributes
 
 
 def mpc_next_state(s, u, dt):
@@ -145,16 +146,16 @@ def configure(self):
     def tvp_fun(self, t_ind):
         return self.tvp_template
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+        update_attributes(updated_attributes,self)
 
-        s = cartpole_state_vector_to_namespace(s)
+        state = cartpole_state_vector_to_namespace(state)
 
-        self.x0['s.position'] = s.position
-        self.x0['s.positionD'] = s.positionD
+        self.x0['s.position'] = state.position
+        self.x0['s.positionD'] = state.positionD
 
-        self.x0['s.angle'] = s.angle
-        self.x0['s.angleD'] = s.angleD
+        self.x0['s.angle'] = state.angle
+        self.x0['s.angleD'] = state.angleD
 
         self.tvp_template['_tvp', :, 'target_position'] = self.target_position
 
diff --git a/Control_Toolkit_ASF/Controllers/controller_energy.py b/Control_Toolkit_ASF/Controllers/controller_energy.py
new file mode 100644
index 00000000..cbf15e6d
--- /dev/null
+++ b/Control_Toolkit_ASF/Controllers/controller_energy.py
@@ -0,0 +1,182 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May 11 18:07:58 2023
+
+@author: Shreyan Banerjee
+"""
+
+"""
+This is a linear-quadratic regulator
+It assumes that the input relation is u = Q*u_max (no fancy motor model) !
+"""
+
+from SI_Toolkit.computation_library import NumpyLibrary, TensorType
+import numpy as np
+import scipy
+import yaml
+import os
+
+from CartPole.cartpole_jacobian import cartpole_jacobian
+from CartPole.cartpole_model import s0, u_max
+from CartPole.state_utilities import (ANGLE_IDX, ANGLED_IDX, POSITION_IDX,
+                                      POSITIOND_IDX)
+from Control_Toolkit.Controllers import template_controller
+from others.globals_and_utils import create_rng
+
+config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader)
+actuator_noise = config["cartpole"]["actuator_noise"]
+
+
+class controller_energy(template_controller):
+    _computation_library = NumpyLibrary
+    
+    def configure(self):
+        # From https://github.com/markwmuller/controlpy/blob/master/controlpy/synthesis.py#L8
+        """Solve the continuous time LQR controller for a continuous time system.
+        A and B are system matrices, describing the systems dynamics:
+         dx/dt = A x + B u
+        The controller minimizes the infinite horizon quadratic cost function:
+         cost = integral (x.T*Q*x + u.T*R*u) dt
+        where Q is a positive semidefinite matrix, and R is positive definite matrix.
+        Returns K, X, eigVals:
+        Returns gain the optimal gain K, the solution matrix X, and the closed loop system eigenvalues.
+        The optimal input is then computed as:
+         input: u = -K*x
+        """
+        self.p_Q = actuator_noise
+        # ref Bertsekas, p.151
+        self.theta = 0
+        self.thetaprev = 0
+        self.thetadot = 0
+        self.thetadotprev = 0
+        self.U = 0
+        self.K = 0
+        self.deltaU = 0
+
+        seed = self.config_controller["seed"]
+        self.rng = create_rng(self.__class__.__name__, seed if seed==None else seed*2)
+
+        # Calculate Jacobian around equilibrium
+        # Set point around which the Jacobian should be linearized
+        # It can be here either pole up (all zeros) or pole down
+        s = s0
+        s[POSITION_IDX] = 0.0
+        s[POSITIOND_IDX] = 0.0
+        s[ANGLE_IDX] = 0.0
+        s[ANGLED_IDX] = 0.0
+        u = 0.0
+        
+        
+
+        jacobian = cartpole_jacobian(s, u)
+        A = jacobian[:, :-1]
+        B = np.reshape(jacobian[:, -1], newshape=(4, 1)) * u_max
+
+        # Cost matrices for LQR controller
+        self.Q = np.diag(self.config_controller["Q"]) # How much to punish x, v, theta, omega
+        self.R = self.config_controller["R"]  # How much to punish Q
+
+        # first, try to solve the ricatti equation
+        X = scipy.linalg.solve_continuous_are(A, B, self.Q, self.R)
+
+        # compute the LQR gain
+        if np.array(self.R).ndim == 0:
+            Ri = 1.0 / self.R
+        else:
+            Ri = np.linalg.inv(self.R)
+
+        K = np.dot(Ri, (np.dot(B.T, X)))
+
+        eigVals = np.linalg.eigvals(A - np.dot(B, K))
+
+        self.K = K
+        self.X = X
+        self.eigVals = eigVals
+
+    def controller_reset(self):
+        #self.optimizer.optimizer_reset()
+        #self.cartpole_trajectory_generator.reset()
+        pass
+
+    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+        self.update_attributes(updated_attributes)
+        
+        state = np.array(
+            [[s[POSITION_IDX] - self.variable_parameters.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]])
+
+        #Q = np.dot(-self.K, state).item()
+        
+        self.theta = s[ANGLE_IDX]
+        self.thetadot = s[ANGLED_IDX]
+        
+        m = 1
+        l = 1
+        mc = 1
+        g = 9.8
+        
+        #Modify
+        self.K = 0.5 * m*l*l * self.thetadot**2
+        self.U = 0.5 * m*g*l*np.cos(self.theta)
+        E = self.K+self.U
+        
+        
+        u = ((m*g*abs(np.sin(self.theta))/9.8)*2)*self.thetadot/(abs(self.thetadot+0.01))
+        #u = (m*g*abs(np.sin(theta)) - m*l*thetaddot)*2/g
+        #c1+=1
+        self.deltaU = m*g*l*(1-np.cos(self.theta))
+        # if thetadot*theta>0:
+        #     pausefactor = -K + deltaU
+        #     #c = 1/(1+np.exp(pausefactor*10))
+        #c1+=1
+        # if c1%500==0:
+        #     done = True
+        
+        if (np.cos(self.theta)-np.cos(self.thetaprev))<0:
+            if (self.K+self.deltaU - m*g*l)>=0:
+                u=0
+        else:
+            if (self.K-self.deltaU)>=0:
+                u=0
+        
+        u= (2*u/l)*(m/mc)
+        u = u * (np.cos(self.theta)/abs(np.cos(self.theta)))
+        u*=6
+        #print(theta)
+        # if abs(theta)>0.9:
+        #     print("Failed")
+        # theta sign reversal for stabilization problem
+        if abs(self.theta)<0.5:# and self.theta*self.thetadot<0:
+            # if abs(self.thetadot)<0.05 and abs(theta)>0.3:
+            #     u = 
+            u = m*g*l*np.sin(self.theta)*1
+            u= (2*u/l)*(m/mc)
+            u = -u#*(np.cos(theta)/abs(np.cos(theta)))
+            u*=1
+            #print(u)
+        
+        #print(u)
+        # if c1 <5:
+        #     u = 1
+        self.thetaprev = self.theta
+        self.thetadotprev = self.thetadot
+        self.Kprev = self.K
+        # time_step = env.step([u])
+        # observation = time_step.observation
+        # position = T.Tensor(time_step.observation['position'])
+        # velocity = T.Tensor(time_step.observation['velocity'])
+        # observation = T.cat((position,velocity),0).reshape(len(position)+len(velocity))
+        # costheta = position[1].item()
+        # sintheta = position[2].item()
+        # theta = math.atan2(sintheta,costheta)
+        # thetadot = velocity[1].item()
+        #thetadotprev = thetadot
+        #thetaprev = theta
+        
+
+        # Mod ends
+        Q = u
+        #Q *= (1 + self.p_Q * float(self.rng.uniform(self.action_low, self.action_high)))
+
+        # Clip Q
+        Q = np.clip(Q, -1.0, 1.0, dtype=np.float32)
+        return Q
\ No newline at end of file
diff --git a/Control_Toolkit_ASF/Controllers/controller_lqr.py b/Control_Toolkit_ASF/Controllers/controller_lqr.py
index a4010aee..78e4239b 100644
--- a/Control_Toolkit_ASF/Controllers/controller_lqr.py
+++ b/Control_Toolkit_ASF/Controllers/controller_lqr.py
@@ -14,7 +14,7 @@
 from CartPole.state_utilities import (ANGLE_IDX, ANGLED_IDX, POSITION_IDX,
                                       POSITIOND_IDX)
 from Control_Toolkit.Controllers import template_controller
-from others.globals_and_utils import create_rng
+from others.globals_and_utils import create_rng, update_attributes
 
 config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader)
 actuator_noise = config["cartpole"]["actuator_noise"]
@@ -81,11 +81,11 @@ def configure(self):
         self.X = X
         self.eigVals = eigVals
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
-        self.update_attributes(updated_attributes)
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+        update_attributes(updated_attributes,self)
         
         state = np.array(
-            [[s[POSITION_IDX] - self.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]])
+            [[state[POSITION_IDX] - self.target_position], [state[POSITIOND_IDX]], [state[ANGLE_IDX]], [state[ANGLED_IDX]]])
 
         Q = np.dot(-self.K, state).item()
 
diff --git a/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py b/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py
index 21bd2c9f..8bf9a2f9 100644
--- a/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py
+++ b/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py
@@ -26,6 +26,8 @@
 from matplotlib.widgets import Slider
 from numba import jit
 from numpy.random import SFC64, Generator
+
+from others.globals_and_utils import update_attributes
 from others.p_globals import TrackHalfLength
 from scipy.interpolate import interp1d
 from SI_Toolkit.Predictors.predictor_ODE import predictor_ODE
@@ -42,14 +44,14 @@
 
 """Timestep and sampling settings"""
 mpc_horizon = config_mppi_cartpole["mpc_horizon"]
-num_rollouts = config_mppi_cartpole["num_rollouts"]
+batch_size = config_mppi_cartpole["batch_size"]
 update_every = config_mppi_cartpole["update_every"]
 predictor_specification = config_mppi_cartpole["predictor_specification"]
 dt = config_data_gen["dt"]["control"]
 
 """Define Predictor"""
 predictor = PredictorWrapper()
-predictor.configure(batch_size=num_rollouts, horizon=mpc_horizon, dt=dt, predictor_specification=predictor_specification)
+predictor.configure(batch_size=batch_size, horizon=mpc_horizon, dt=dt, predictor_specification=predictor_specification)
 
 if predictor.predictor_config['predictor_type'] == 'neural':
     MODEL_NAME = predictor.predictor_config['model_name']
@@ -89,7 +91,6 @@
 LBD = config_mppi_cartpole["LBD"]
 NU = config_mppi_cartpole["NU"]
 SQRTRHODTINV = config_mppi_cartpole["SQRTRHOINV"] * (1 / np.sqrt(dt))
-GAMMA = config_mppi_cartpole["GAMMA"]
 SAMPLING_TYPE = config_mppi_cartpole["SAMPLING_TYPE"]
 
 
@@ -178,7 +179,7 @@ def trajectory_rollouts(
     :type S_tile_k: np.ndarray
     :param u: Vector of nominal inputs computed in previous iteration
     :type u: np.ndarray
-    :param delta_u: Array containing all input perturbation samples. Shape (num_rollouts x horizon_steps)
+    :param delta_u: Array containing all input perturbation samples. Shape (batch_size x horizon_steps)
     :type delta_u: np.ndarray
     :param u_prev: Array with nominal inputs from previous iteration. Used to compute cost of control change
     :type u_prev: np.ndarray
@@ -187,7 +188,7 @@ def trajectory_rollouts(
 
     :return: S_tilde_k - Array filled with a cost for each rollout trajectory
     """
-    initial_state = np.tile(s, (num_rollouts, 1))
+    initial_state = np.tile(s, (batch_size, 1))
 
     s_horizon = predictor.predict(initial_state, (u + delta_u)[..., np.newaxis])[:, :, : len(STATE_INDICES)]
 
@@ -220,7 +221,7 @@ def trajectory_rollouts(
         # (1 x mpc_horizon)
         LOGS.get("states").append(
             np.copy(s_horizon[:, :-1, :])
-        )  # num_rollouts x mpc_horizon x STATE_VARIABLES
+        )  # batch_size x mpc_horizon x STATE_VARIABLES
 
     return S_tilde_k
 
@@ -328,9 +329,9 @@ def update_inputs(u: np.ndarray, S: np.ndarray, delta_u: np.ndarray):
 
     :param u: Sampling mean / warm started control inputs of size (,mpc_horizon)
     :type u: np.ndarray
-    :param S: Cost array of size (num_rollouts)
+    :param S: Cost array of size (batch_size)
     :type S: np.ndarray
-    :param delta_u: The input perturbations that had been used, shape (num_rollouts x mpc_horizon)
+    :param delta_u: The input perturbations that had been used, shape (batch_size x mpc_horizon)
     :type delta_u: np.ndarray
     """
     u += reward_weighted_average(S, delta_u)
@@ -369,8 +370,8 @@ def configure(self):
         self.s_horizon = np.zeros((), dtype=np.float32)
         self.u = np.zeros((mpc_horizon), dtype=np.float32)
         self.u_prev = np.zeros_like(self.u, dtype=np.float32)
-        self.delta_u = np.zeros((num_rollouts, mpc_horizon), dtype=np.float32)
-        self.S_tilde_k = np.zeros((num_rollouts), dtype=np.float32)
+        self.delta_u = np.zeros((batch_size, mpc_horizon), dtype=np.float32)
+        self.S_tilde_k = np.zeros((batch_size), dtype=np.float32)
 
         self.wash_out_len = WASH_OUT_LEN
         self.warm_up_countdown = self.wash_out_len
@@ -404,7 +405,7 @@ def initialize_perturbations(
             - "interpolated" - Sample a new independent perturbation every 10th MPC horizon step. Interpolate in between the samples
             - "iid" - Sample independent and identically distributed samples of a Gaussian distribution
         :type sampling_type: str, optional
-        :return: Independent perturbation samples of shape (num_rollouts x horizon_steps)
+        :return: Independent perturbation samples of shape (batch_size x horizon_steps)
         :rtype: np.ndarray
         """
         """
@@ -413,23 +414,23 @@ def initialize_perturbations(
         If random_walk is true, each row represents a 1D random walk with Gaussian steps.
         """
         if sampling_type == "random_walk":
-            delta_u = np.empty((num_rollouts, mpc_horizon), dtype=np.float32)
+            delta_u = np.empty((batch_size, mpc_horizon), dtype=np.float32)
             delta_u[:, 0] = stdev * self.rng_mppi.standard_normal(
-                size=(num_rollouts,), dtype=np.float32
+                size=(batch_size,), dtype=np.float32
             )
             for i in range(1, mpc_horizon):
                 delta_u[:, i] = delta_u[:, i - 1] + stdev * self.rng_mppi.standard_normal(
-                    size=(num_rollouts,), dtype=np.float32
+                    size=(batch_size,), dtype=np.float32
                 )
         elif sampling_type == "uniform":
-            delta_u = np.empty((num_rollouts, mpc_horizon), dtype=np.float32)
+            delta_u = np.empty((batch_size, mpc_horizon), dtype=np.float32)
             for i in range(0, mpc_horizon):
                 delta_u[:, i] = self.rng_mppi.uniform(
-                    low=-1.0, high=1.0, size=(num_rollouts,)
+                    low=-1.0, high=1.0, size=(batch_size,)
                 ).astype(np.float32)
         elif sampling_type == "repeated":
             delta_u = np.tile(
-                stdev * self.rng_mppi.standard_normal(size=(num_rollouts, 1), dtype=np.float32),
+                stdev * self.rng_mppi.standard_normal(size=(batch_size, 1), dtype=np.float32),
                 (1, mpc_horizon),
             )
         elif sampling_type == "interpolated":
@@ -438,33 +439,33 @@ def initialize_perturbations(
             t = np.arange(start=0, stop=range_stop, step=step)
             t_interp = np.arange(start=0, stop=range_stop, step=1)
             t_interp = np.delete(t_interp, t)
-            delta_u = np.zeros(shape=(num_rollouts, range_stop), dtype=np.float32)
+            delta_u = np.zeros(shape=(batch_size, range_stop), dtype=np.float32)
             delta_u[:, t] = stdev * self.rng_mppi.standard_normal(
-                size=(num_rollouts, t.size), dtype=np.float32
+                size=(batch_size, t.size), dtype=np.float32
             )
             f = interp1d(t, delta_u[:, t])
             delta_u[:, t_interp] = f(t_interp)
             delta_u = delta_u[:, :mpc_horizon]
         else:
             delta_u = stdev * self.rng_mppi.standard_normal(
-                size=(num_rollouts, mpc_horizon), dtype=np.float32
+                size=(batch_size, mpc_horizon), dtype=np.float32
             )
 
         return delta_u
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         """Perform controller step
 
-        :param s: State passed to controller after system has evolved for one step
-        :type s: np.ndarray
+        :param state: State passed to controller after system has evolved for one step
+        :type state: np.ndarray
         :param time: Time in seconds that has passed in the current experiment, defaults to None
         :type time: float, optional
         :return: A normed control value in the range [-1.0, 1.0]
         :rtype: np.float32
         """
-        self.update_attributes(updated_attributes)
+        update_attributes(updated_attributes,self)
 
-        self.s = s
+        self.s = state
 
         self.iteration += 1
 
@@ -510,8 +511,8 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
                     # This is a lot of unnecessary calculation, but a stateful RNN in TF has frozen batch size
                     # FIXME: Problaby you can reduce it!
 
-                    rollout_trajectory = predictor.predict(np.tile(self.s, (num_rollouts, 1)),
-                        np.tile(self.u[np.newaxis, :, np.newaxis], (num_rollouts, 1, 1))
+                    rollout_trajectory = predictor.predict(np.tile(self.s, (batch_size, 1)),
+                        np.tile(self.u[np.newaxis, :, np.newaxis], (batch_size, 1, 1))
                     )[0, ...]
                 LOGS.get("nominal_rollouts").append(np.copy(rollout_trajectory[:-1, :]))
 
@@ -527,8 +528,8 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
 
         ):
             self.warm_up_countdown -= 1
-            if abs(s[ANGLE_IDX]) < np.pi/10.0:  # Stabilize during warm_up with auxiliary controller if initial angle small
-                Q = self.auxiliary_controller.step(s, self.target_position)
+            if abs(state[ANGLE_IDX]) < np.pi/10.0:  # Stabilize during warm_up with auxiliary controller if initial angle small
+                Q = self.auxiliary_controller.step(state, self.target_position)
             else:
                 Q = self.rng_mppi_rnn.uniform(-1, 1)  # Apply random input to let RNN "feel" the system behaviour
         else:
@@ -564,7 +565,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy
         # self.u = zeros_like(self.u)
 
         # Prepare predictor for next timestep
-        Q_update = np.tile(Q, (num_rollouts, 1, 1))
+        Q_update = np.tile(Q, (batch_size, 1, 1))
         predictor.update(Q_update, self.s)
 
         return Q  # normed control input in the range [-1,1]
@@ -586,7 +587,7 @@ def controller_report(self):
             ### Plot the average state cost per iteration
             ctglgs = np.stack(
                 LOGS.get("cost_to_go"), axis=0
-            )  # ITERATIONS x num_rollouts
+            )  # ITERATIONS x batch_size
             NUM_ITERATIONS = np.shape(ctglgs)[0]
             time_axis = update_every * dt * np.arange(start=0, stop=np.shape(ctglgs)[0])
             plt.figure(num=2, figsize=(16, 9))
@@ -698,7 +699,7 @@ def draw_rollouts(
                     )
 
             # Prepare data
-            # shape(slgs) = ITERATIONS x num_rollouts x mpc_horizon x STATE_VARIABLES
+            # shape(slgs) = ITERATIONS x batch_size x mpc_horizon x STATE_VARIABLES
             slgs = np.stack(LOGS.get("states"), axis=0)
             wrap_angle_rad_inplace(slgs[:, :, :, ANGLE_IDX])
             # shape(iplgs) = ITERATIONS x mpc_horizon
@@ -735,7 +736,7 @@ def draw_rollouts(
             )
 
             # Normalize cost to go to use as opacity in plot
-            # shape(ctglgs) = ITERATIONS x num_rollouts
+            # shape(ctglgs) = ITERATIONS x batch_size
             ctglgs = np.divide(ctglgs.T, np.max(np.abs(ctglgs), axis=1)).T
 
             # This function updates the plot when a new iteration is selected
diff --git a/Control_Toolkit_ASF/Controllers/controller_secloc.py b/Control_Toolkit_ASF/Controllers/controller_secloc.py
index fc03108a..b8fab586 100644
--- a/Control_Toolkit_ASF/Controllers/controller_secloc.py
+++ b/Control_Toolkit_ASF/Controllers/controller_secloc.py
@@ -36,12 +36,12 @@ def configure(self):
         self.interpolation = interp1d([-self.motor_map,self.motor_map], [1,-1])
         self.step_idx = 0
 
-    def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
+    def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}):
         self.update_attributes(updated_attributes)
         # Read the cartpole state s = ["angle", "angleD", "angle_cos", "angle_sin", "position", "positionD"]
         # angle: pole UP -> 0, then +/-pi
         print(f"***** Step #{self.step_idx} *****")
-        pole_angle = s[0]
+        pole_angle = state[0]
         print(f"Current pole angle value: {pole_angle} radians. left/right --> +/- pi. Pole UP --> angle=0")
         print("Update the potentiometer event based sensor using the cartpole sate")
         self.potentiometer.update(signal_in= pole_angle)
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/__init__.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/__init__.py
new file mode 100644
index 00000000..b28b04f6
--- /dev/null
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/__init__.py
@@ -0,0 +1,3 @@
+
+
+
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv
new file mode 100644
index 00000000..f2782173
--- /dev/null
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv
@@ -0,0 +1,26 @@
+# cartpole dancer dance sequence for song satisfaction rolling stones
+# it has 135bpm with offset of 40ms according to BeatEdit
+# which means the beat frequency is 2.25Hz and so all frequencies should be multiple or integer fraction of 2.25Hz, e.g. 1.125, .5625
+# policies can be balance1, spin2, shimmy3, cartonly4, cartwheel5
+# self.cartwheel_cycles=self.cartpole_dancer.amp
+endtime, cartpos, policy, option, freq, amp, freq2, amp2
+3,0, balance1,1,,,,
+00:7.3,0, cartonly4,.5,1.125,0.1,,
+00:14.6,0, cartonly4,.5,2.25,0.1,,
+00:18.3,0, toandfro6, ,,2,,
+00:21.3,0, toandfro6, ,,-2,,
+00:24.5,0, cartwheel5, ,,2,,
+00:27.9,0, cartwheel5, ,,-2,,
+00:35.5,-0.05, spin2, -1,,,,
+00:42.2,0.05, spin2, +1,,,,
+00:50.0,0, shimmy3,,.5625,0.07,2.25,0.08
+00:53.5,0, shimmy3,,1.125,0.04,2.25,0.08
+00:57.0,0, shimmy3,,2.25,0.03,2.25,0.08
+01:00.4,0, cartwheel5, ,,1,,
+01:04.5,0, cartwheel5, ,,-1,,
+01:07.3,0, cartwheel5, ,,2,,
+01:09.8,0, cartwheel5, ,,-2,,
+01:15.1,-0.05, spin2, -1,,,,
+01:19.0,0.05, spin2, +1,,,,
+01:22.2,0, shimmy3,,2.25,0.03,2.25,0.08
+01:26.1,0, balance1,-1,,,,
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv
new file mode 100644
index 00000000..1a9329e6
--- /dev/null
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv
@@ -0,0 +1,12 @@
+# cartpole dancer dance sequence for song sookie-sookie grant green
+# it has 126bpm with offset of 420ms according to BeatEdit
+# which means the beat frequency is 2.1Hz and so all frequencies should be multiple or integer fraction of 2.1Hz, e.g. 1.05, .525 etc
+# policies can be balance1, spin2, shimmy3, cartonly4
+endtime, cartpos, policy, option, freq, amp, freq2, amp2
+5.4,  0, balance1, up,,,,
+12, -.05, shimmy3,up, 1.05, .04, 1.05, .04
+29 , +.05, shimmy3,up, 2.1, .02, 2.1, .02
+33, .05, spin2, cw, 5,,,
+59 , -.05, spin2, ccw, 5,,,
+166, 0, shimmy3,up, 2.1, .03, 2.1, .1
+
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance.csv b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance.csv
new file mode 100644
index 00000000..5b80eb1e
--- /dev/null
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance.csv
@@ -0,0 +1,17 @@
+# cartpole dancer dance sequence
+# policies can be balance1, spin2, shimmy3, cartonly4
+duration, cartpos, policy, option, freq, amp, freq2, amp2
+4,  0, balance1, up,,,,
+6, .1, spin2, cw, 5,,,
+#1,  0, balance1, up,,,,
+6, -.1,  spin2,ccw, 5,,,
+3, -.1, balance1, up,,,,
+6, 0, shimmy3,up, 2, .05, 1, .1
+1,  0, balance1, up,,,,
+8, 0, shimmy3,up, 1, .001, 1, .1
+1.5, .15, balance1, up,,,,
+1.5, -.15, balance1, up,,,,
+#2, 0, balance1, up,,,,
+# 4, 0, cartonly4,.9, 2, .1,,
+3, 0, balance1, down,,,,
+
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer.py
new file mode 100644
index 00000000..cdd83451
--- /dev/null
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer.py
@@ -0,0 +1,278 @@
+import csv
+from pathlib import Path
+from typing import Dict
+
+from CartPole import is_physical_cartpole_running_and_control_enabled
+from GUI import CartPoleMainWindow
+from others.globals_and_utils import get_logger
+
+# SONG='others/Media/01 Sookie_ Sookie.mp3'
+# CSV='Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv'
+# SONG="others/Media/Rolling_Stones_Satisfaction.mp3"
+# CSV='Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv'
+# SONG_PLAYBACK_RATE=0.55 # rate to match song speed compared to real time simulation rate
+
+import os
+try:
+    import vlc
+except Exception as e:
+    raise Exception(f'{e}: pip install python-vlc and install VLC (https://www.videolan.org/vlc/) to your system. Y'
+                    f'ou may need to restart your python IDE. See also https://stackoverflow.com/questions/59014318/filenotfounderror-could-not-find-module-libvlc-dll ;'
+                    f'you might need 64 bit version of VLC since default installs 32-bit version')
+import sys
+
+log = get_logger(__name__)
+try:
+    import winsound
+except:
+    log.warning('winsound is not available - will not play beeps when dance step changes')
+    pass
+
+UPDATE_INTERVAL=10 # update interval for status text and song rate update
+
+class cartpole_dancer:
+    """ Reads steps from CSV file for cartpole dancer
+    """
+
+    def __init__(self):
+        """ Constructs the dance reader, opening the CSV file
+
+        """
+        self.last_status_string=None
+        """ The last status line shown in GUI for simulation and printed out during runtime in physical-cartpole"""
+        self.started = False
+        self.song_file_name:str = None
+        self.song_player:vlc.MediaPlayer = None
+        self.row_iterator = None
+        self.reader = None
+        self.csvfile = None
+        csv.register_dialect('cartpole-dancer', skipinitialspace=True)
+
+        self.ENDTIME = 'endtime'
+        self.POLICY = 'policy'
+        self.OPTION = 'option'
+        self.CARTPOS = 'cartpos'
+        self.FREQ = 'freq'
+        self.AMP = 'amp'
+        self.FREQ2 = 'freq2'
+        self.AMP2 = 'amp2'
+
+        self.first_step_run=False  # used to avoid starting playing music when dance is configured in config_cost_functions but simulation not running
+        self.state:str='running' # set by signals emitted by CartPoleMainWindow, set to running initially so that we read the first row of CSV to compute trajectory for init
+        if CartPoleMainWindow.CartPoleMainWindowInstance and CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal:
+            CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal.connect(self.process_signal)
+
+        self.csv_file_name = None
+        self.csv_file_path = None
+        self.mtime = None
+        self.reset_fields()
+        self.paused=False
+        self.time_step_started = float(0)
+        self.time_dance_started = float(0)
+
+        self.cartpole_trajectory_generator = None # set in start()
+        self.iteration_counter=0 # to reduce some updates
+
+    def reload_csv_and_song(self):
+        physical_cartpole_path='CartPoleSimulation' # for checking for media in CartPoleSimulation when running from physical-cartpole
+        self.csv_file_name = bytes.decode(self.cartpole_trajectory_generator.cost_function.dance_csv_file.numpy())
+        if not os.path.exists(self.csv_file_name):
+            self.csv_file_name=os.path.join(physical_cartpole_path,self.csv_file_name)
+            if not os.path.exists(self.csv_file_name):
+                raise FileNotFoundError(f'CSV file for dance {self.cartpole_trajectory_generator.cost_function.dance_song_file} not found, cannot play music')
+        self.csv_file_path = Path(self.csv_file_name)
+        self.mtime = self.csv_file_path.stat().st_mtime
+
+        self.song_file_name=bytes.decode(self.cartpole_trajectory_generator.cost_function.dance_song_file.numpy())
+        if not os.path.exists(self.song_file_name):
+            self.song_file_name=os.path.join(physical_cartpole_path,self.song_file_name)
+            if not os.path.exists(self.song_file_name):
+                raise FileNotFoundError(f'mp3/wave file for dance {self.song_file_name} not found, cannot play music')
+        if self.song_player:
+            self.song_player.release()
+        try:
+            self.song_player:vlc.MediaPlayer = vlc.MediaPlayer(self.song_file_name)
+            log.debug(f'loaded CSV {self.csv_file_name} and song {self.song_file_name}')
+        except AttributeError as e:
+            log.error(f'cannot open VLC song_player: {e}')
+
+        if self.song_player:
+            if self.song_player.set_rate(self.cartpole_trajectory_generator.cost_function.dance_song_playback_rate)==-1:
+                log.warning('could not set playback rate for song')
+            em = self.song_player.event_manager()
+            em.event_attach(vlc.EventType.MediaPlayerEndReached, self.start) # restart dance when song restarts
+        self.mtime = self.csv_file_path.stat().st_mtime
+        if self.csvfile:
+            self.csvfile.close()
+        self.csvfile = open(self.csv_file_name, 'r')
+        self.reader = csv.DictReader(filter(lambda row: row[0] != '#', self.csvfile),
+                                     dialect='cartpole-dancer')  # https://stackoverflow.com/questions/14158868/python-skip-comment-lines-marked-with-in-csv-dictreader
+        self.row_iterator = self.reader.__iter__()
+
+    def reset_fields(self) -> None:
+        self.time_dance_started = float(0)
+        self.time_step_started = float(0)
+        self.current_row = None
+        self.started = False
+        self.endtime = None
+        self.option = None
+        self.policy = None
+        self.policy_number = None
+        self.cartpos = None
+        self.freq = None
+        self.amp = None
+        self.freq2 = None
+        self.amp2 = None
+
+    def start(self, time: float=0.0, cartpole_trajectory_generator=None) -> None:
+        """ Starts the dance now at time in seconds
+
+        :param time: the current simulation time in seconds, 0 by default
+        :param cartpole_trajectory_generator: the trajectory generator, if not None it sets it for use by cartpole_dancer use
+        """
+        if cartpole_trajectory_generator:
+            self.cartpole_trajectory_generator=cartpole_trajectory_generator
+        self.reset_fields()
+        self.reload_csv_and_song()
+        self.time_step_started = float(time)
+        self.time_dance_started = float(time)
+        if ((CartPoleMainWindow.CartPoleMainWindowInstance) and CartPoleMainWindow.CartPoleGuiSimulationState=='running')\
+                or is_physical_cartpole_running_and_control_enabled():
+                if self.song_player:
+                    log.debug(f'starting playing song {self.song_file_name}')
+                    self.song_player.play() # only play if simulator running or running physical cartpole
+        self.started = True
+        self.paused=False
+
+    def stop(self)->None:
+        if self.song_player:
+            self.song_player.stop()
+        self.started=False
+
+    def pause(self)->None:
+        self.paused=True
+        if self.song_player:
+            self.song_player.pause()
+
+    def process_signal(self, signal:str):
+        """ Process signal from CartPoleMainWindow
+
+        :param signal: the string state, 'running', 'paused', 'stopped'
+        """
+        log.debug(f'got signal "{signal}"')
+        self.state=signal
+        if self.state=='paused':
+            self.pause()
+        elif self.state=='stopped':
+            self.stop()
+        elif self.state=='running':
+            if self.started:
+                self.start(0) # only start (song) if dance has been started, don't play if e.g. policy is balance
+
+    def step(self, time: float)-> Dict:
+        """ Does next time step, reading the csv for next step if the current one's duration has timed out
+
+        :param time: the current simulation time in seconds
+
+        :returns: the current step row of cartpole_dance.csv as dict of column name -> value
+        """
+
+        if self.state=='running':
+            if not self.started:
+                log.warning('cartpole_dancer must be start()ed before calling step() - calling start now')
+                self.start(time)
+            if self.endtime is None or time >=  self.time_dance_started+self.endtime:
+                self._read_next_step(time)
+        if self.iteration_counter % UPDATE_INTERVAL== 0:
+            # setting rate dynamically produces very choppy sound; we must ensure that we run fast enough for real time by optimizing code....
+            # if not CartPoleMainWindow.CartPoleMainWindowInstance.speedup_measured is None:
+            #     rate = CartPoleMainWindow.CartPoleMainWindowInstance.speedup_measured
+            #     self.song_player.set_rate(rate)  # TODO check effect on performance in vlc
+            CartPoleMainWindow.set_status_text(self.format_step(time))
+        self.iteration_counter+=1
+        return self.current_row
+
+    def _read_next_step(self, time: float) ->None:
+        """ Reads next step from CSV file, reloading file from start if it has been modified since it was last read.
+        Plays a beep when the step changes.
+
+        :param time: the current time in seconds
+
+        :returns: None
+        """
+        try:
+            # check if file modified, if so, restart whole dance
+            mtime = self.csv_file_path.stat().st_mtime
+            if mtime>self.mtime:
+                log.warning('cartpole_dance.csv modified, restarting dance')
+                self.start(time)
+
+            self.time_step_started=time
+            self.current_row = self.reader.__next__()
+            log.info(f"At t={time:.1f}s new dance step is '{self.current_row['policy']}' ending at t={self.current_row['endtime']}")
+            if 'winsound' in sys.modules:
+                try:
+                    winsound.Beep(1000,300) # beep
+                except ValueError as e:
+                    log.warning(f'could not play beep, caught {e}')
+
+        except StopIteration:
+            log.info(f'restarting dance at time={time}s')
+            self.start(time)
+            self.step(time)
+            return
+        try:
+            self.endtime = hms_to_seconds(self.current_row[self.ENDTIME])
+            self.policy = self.current_row[self.POLICY][:-1] # the dqnce step 'policy" column has entries like balance1, spin2, etc
+            self.policy_number=int(self.current_row[self.POLICY][-1]) # todo not sufficient for XLA / TF compiled code to see this variable change
+            opt_str=self.current_row[self.OPTION]
+            try:
+                self.option=int(opt_str)
+            except ValueError:
+                self.option = self.current_row[self.OPTION]
+            self.cartpos = float(self.current_row[self.CARTPOS])
+            self.freq = self.convert_float(self.FREQ)
+            self.amp = self.convert_float(self.AMP)
+            self.freq2 = self.convert_float(self.FREQ2)
+            self.amp2 = self.convert_float(self.AMP2)
+        except ValueError as e:
+            log.warning(f'could not convert value in row {self.current_row}: {e}')
+
+    def convert_float(self, key):
+        if self.current_row[key] and self.current_row[key]!='':
+            return float(self.current_row[key])
+        else:
+            return None
+
+    def format_step(self, time:float) -> str:
+        """ Formats the current step for display in GUI main window status line. Sets self.last_status_string for use by other classes.
+
+        :param time: the current time in seconds
+
+        :returns: the step string
+        """
+        duration=self.endtime-self.time_step_started
+        timestr=f't={(time):.1f}/{self.endtime:.1f}s (dur={duration:.1f}s)'
+        s=None
+        if not self.freq is  None and not self.freq2 is None and not self.amp is None and not self.amp2 is None:
+            s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m f0/f1={self.freq:.1f}/{self.freq2:.1f}Hz a0/a1={self.amp:.2f}/{self.amp2:.2f}m'
+        elif not self.freq  is None and not self.amp is None:
+            s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m freq={self.freq:.1f}Hz amp={self.amp:.2f}m'
+        elif not self.freq is None:
+            s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m freq={self.freq:.1f}Hz'
+        else:
+            s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m'
+        self.last_status_string=s
+        return s
+
+
+def hms_to_seconds(timestr:str)->float:
+    """Get seconds from time.
+
+    :param timestr: hh:mm:ss.xxx string or mm:s.xxx or simply s.xxx where xxx is the fraction of seconds
+    :returns: time in float seconds
+    """
+    seconds= 0.
+    for part in timestr.split(':'):
+        seconds= seconds*60. + float(part)
+    return seconds
\ No newline at end of file
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer_cost.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer_cost.py
new file mode 100644
index 00000000..3997ebda
--- /dev/null
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer_cost.py
@@ -0,0 +1,210 @@
+import numpy as np
+
+from CartPole import state_utilities
+from Control_Toolkit.Controllers import template_controller
+from SI_Toolkit.computation_library import TensorType, ComputationLibrary
+from Control_Toolkit.Cost_Functions import cost_function_base
+
+from CartPole.cartpole_model import TrackHalfLength, L, m_pole
+from CartPole.state_utilities import NUM_STATES
+
+import tensorflow as tf
+
+from Control_Toolkit.others.get_logger import get_logger
+log = get_logger(__name__)
+
+
+
+# (config, _) = load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"),
+#                                                        every=1)
+# 
+# weights=config.CartPole.cartpole_dancer_cost
+# log.info(f'starting MPC cartpole trajectory cost weights={weights}')  # only runs once
+
+
+class cartpole_dancer_cost(cost_function_base):
+    """ Computes the rollout costs for cartpole dancer.
+    The state trajectories are computed from the current timestep according to desired 'step" type.
+    The costs are weighted according to config_cost_functions.yml values for various state components, e.g. cart position, pole swing velocity, etc.
+
+    """
+
+    def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]",
+                 config: dict = None) -> None:
+        """ makes a new cost function
+
+        :param controller: the controller
+        :param ComputationLib: the library, e.g. python, tensorflow
+        :param config: the dict of configuration for this cost function.  The caller can modify the config to change behavior during runtime.
+
+         """
+        super().__init__(controller, ComputationLib)
+        self.new_target_trajectory = None
+        dist_metric = 'mse'
+        if dist_metric == 'rmse':
+            self.dist = lambda x: self.lib.sqrt(self.lib.pow(x, 2))
+        elif dist_metric == 'mse':
+            self.dist = lambda x: self.lib.pow(x, 2)
+        elif dist_metric == 'abs':
+            self.dist = lambda x: self.lib.abs(x)
+        else:
+            raise Exception(f'unknown distance metric for cost "{dist_metric}"')
+
+    def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType, time: float = None):
+        """
+        Computes costs of all stages, i.e., costs of all rollouts for each timestep.
+
+        :param states: Tensor of states [rollout, timestep, state].
+            states[:,0,:] are num_rollouts copies of current state.
+            rollout dimension is the batch dimension of the rollouts, e.g. 1000 parallel rollouts.
+            timestep dimension are the timesteps of each rollout, e.g. 50 timesteps.
+            state is the state vector, defined by the STATE_INDICES in [state_utilities.py](../CartPole/state_utilities.py)
+        :param inputs: control input, e.g. cart acceleration
+        :param previous_input: previous control input, can be used to compute cost of changing control input
+        :param time: the time in seconds, used for generating target trajectory now
+
+        :returns: Tensor of [rollout, timestep] over all rollouts and horizon timesteps
+        """
+
+        control_change_cost = 0  # compute the control change cost
+
+        # position=states[:, :, POSITION_IDX]
+        # positionD=states[:, :, POSITIOND_IDX]
+        # angle=states[:, :, ANGLE_IDX]
+        # angleD=states[:,:,ANGLED_IDX]
+
+        gui_target_equilibrium = self.target_equilibrium  # GUI switch +1 or -1 to make pole target up or down position, set by previous call that update_attributes this field
+
+        input_shape = self.lib.shape(states)
+        num_rollouts = input_shape[0]
+        mpc_horizon = input_shape[1]
+        num_states=input_shape[2]
+
+        # allocate a zero stage costs tensor of dimension [num_rollouts, horizon], using the input states tensor that has the correct dimension
+        # squeeze is to remove the single state dimension to go from 3d to 2d tensor
+        # this 2d stage_costs tensor is immutable and is replaced every time modify it
+        stage_costs = tf.squeeze(tf.zeros_like(states[:,:,0]))
+
+
+
+        # dance step policies are: dance0 (follow csv file)   balance1 spin2 shimmy3 cartonly4
+
+        # states are "angle", "angleD", "angle_cos", "angle_sin", "position", "positionD"
+        # stage_costs = self.lib.zeros((num_rollouts, num_timesteps))
+        target_trajectory = tf.transpose(
+            self.target_trajectory)  # [horizon,num_states] array tensor computed by cartpole_trajectory_generator.py
+        broadcasted_target_trajectory = tf.broadcast_to(target_trajectory,
+                                                        [num_rollouts, mpc_horizon, NUM_STATES])
+        cost_weights = self.effective_traj_cost_weights
+
+        traj_dist = self.dist(
+            states - broadcasted_target_trajectory)  # difference for each rollout of predicted to target trajectory, has dim [num_rollouts, horizon, num_states]
+        stage_costs = tf.multiply(traj_dist, cost_weights)  # multiply states dim by cost_weights
+        stage_costs = self.stage_cost_factor * tf.reduce_sum(stage_costs,
+                                                             axis=2)  # sum costs across state dimension to leave costs as [num_rollouts, mpc_horizon]
+        stage_costs = tf.reshape(stage_costs, [num_rollouts, mpc_horizon])
+
+        if self.lib.equal(self.policy_number, 2): # spin
+            # The cost tensor we will return.  we allocate one less than num_timesteps to be all zeros because
+            # we concatenate the terminal cost in this branch
+            # stage_costs = self.lib.zeros((num_rollouts, num_timesteps - 1))
+            # stage_costs = tf.zeros_like(states)
+            # stage_costs=stage_costs[:,-1]
+
+            # spin is special cost aimed to first get energy into pole, then make it spin in correct direction.
+            # It is totally based on terminal state at end of rollout.
+            # Spin cost is based on total pole energy plus cart position plus rotation speed in cw or ccw direction.
+            # When the pole has insufficient energy for a spin, we follow objective to maximize the total pole energy.
+            # If the pole has sufficient energy for full rotations, then we follow objective to just maximize the signed kinetic energy in the cw or ccw direction.
+            upright_pole_energy=self.pole_energy_potential(0) # energy of pole upright compared with hanging down (factor of 2), means pole can swing around all the way if total energy is larger than this
+            current_pole_angle=states[0,0,state_utilities.ANGLE_IDX] # rad
+            current_pole_angleD=states[0,0,state_utilities.ANGLED_IDX] # rad/s
+
+            # compute the total pole energy, which is sum of kinetic and potential energy of pole
+            current_pole_potential_energy=self.pole_energy_potential(current_pole_angle)
+            current_pole_kinetic_energy=self.pole_energy_kinetic(current_pole_angleD)
+            current_pole_total_energy=current_pole_potential_energy+current_pole_kinetic_energy
+
+            # now find the final state at end of horizon and try to get it to either have more energy or spinning faster in particular direction
+            # each of following is [num_rollouts] vector of angleD, cos_angle, and cart position
+            angleD_states = states[:, -1, state_utilities.ANGLED_IDX]
+            cosangle_states = states[:, -1, state_utilities.ANGLE_COS_IDX]
+            pos_states = states[:, -1, state_utilities.POSITION_IDX]
+
+            gui_target_position = self.target_position  # GUI slider position
+            pos_cost =  self.dist(pos_states - gui_target_position) # num_rollouts vector
+
+            # compute the cost of spinning, the more energy, the lower the cost
+            spin_cost_energy = -(self.pole_energy_kinetic(angleD_states) + self.pole_energy_potential(cosangle_states))
+
+            if current_pole_total_energy>self.upright_pole_energy_multiple_to_count_spin_direction_cost * upright_pole_energy:
+                # pole is already spinning, multiply the spin energy by the actual direction of spinning and desired spin direction
+                actual_spin_dirs=self.lib.sign(angleD_states)
+                desired_spin_dir= -float(self.spin_dir) * gui_target_equilibrium # minus sign to get correct cost sign for cw/ccw rotation
+                spin_cost_energy=spin_cost_energy*desired_spin_dir*actual_spin_dirs
+
+            # compute the cart position cost as distance from desired cart position
+            # total stage cost is sum of energy of pole spin, direction of spin, and cart position
+            # the returned stage_costs is a tensor [num_rollouts, horizon] that has concatenated the terminal costs at end of horizon to the zero stage costs along the horizon
+            term_costs=self.spin_energy_weight *spin_cost_energy + self.cart_pos_weight * pos_cost
+            term_costs=tf.expand_dims(term_costs,-1) # make it, e.g. with num_rollouts=700, from [700] to [700,1] tensor
+            stage_costs =  self.lib.concat([stage_costs[:,:-1],term_costs],1) # now concatenate the terminal costs (all we count for spin) to the zero stage costs replacing the last horizon step
+            stage_costs = tf.reshape(stage_costs, [num_rollouts, mpc_horizon])
+
+        if previous_input is not None:
+            control_change_cost = self.control_cost_change_weight * self._control_change_rate_cost(inputs, previous_input)
+        control_cost = self.control_cost_weight * self._CC_cost(inputs)  # compute the cart acceleration control cost
+
+        terminal_traj_edge_barrier_cost = self.track_edge_barrier_cost * self.track_edge_barrier(states[:, :, state_utilities.POSITION_IDX])
+
+        stage_costs = stage_costs + terminal_traj_edge_barrier_cost + control_cost + control_change_cost   # result has dimension [num_rollouts, horizon]
+
+        return stage_costs
+
+    # final stage cost
+    def get_terminal_cost(self, terminal_states: TensorType):
+        """ returns 0; terminal cost and barrier cost computed in get_stage_cost()"""
+        return self.lib.zeros_like(terminal_states)
+
+    # cost for distance from cart track edge
+    def track_edge_barrier(self, position):
+        """Soft constraint: Do not crash into border; returns a 1 for any rollout cart position state that exceeds the track edge boundary"""
+        return self.lib.cast(
+            self.lib.abs(position) > self.track_length_fraction * TrackHalfLength, self.lib.float32
+        )
+
+    # actuation cost
+    def _CC_cost(self, u):
+        return self.lib.sum(u ** 2, 2)
+
+    # cost of changing control too fast
+    def _control_change_rate_cost(self, u, u_prev):
+        """Compute penalty of control jerk, i.e. difference to previous control input
+
+        :poram u: the current control input (cart acceleration in dimensionless scale)
+        :param u_prev: the previous timesteps control input
+
+        :returns: the cost of changing control
+        """
+        u_prev_vec = self.lib.concat(
+            (self.lib.ones((u.shape[0], 1, u.shape[2])) * u_prev, u[:, :-1, :]), 1
+        )
+        return self.lib.sum((u - u_prev_vec) ** 2, 2)
+
+    def pole_energy_potential(self, cosangle):
+        """Compute potential energy of pole
+
+        :param cosangle: the cosine of pole angle (1 when upright)
+
+        :returns: the pole potential energy in Joules, it is zero when pole is hanging down
+        """
+        return ((1+cosangle)/2) * m_pole * (L/2) * 9.8  # hopefully this is height of COM potential gravitational energy
+
+    def pole_energy_kinetic(self, angleD):
+        """Compute kinetic energy of pole
+
+         :param angleD: time derivative of pole angle in rad/s
+
+         :returns: the kinetic energy in Joules
+         """
+        return (1. / 6.) * m_pole * L * L * (angleD ** 2)
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_trajectory_generator.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_trajectory_generator.py
new file mode 100644
index 00000000..b8ae30a5
--- /dev/null
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_trajectory_generator.py
@@ -0,0 +1,438 @@
+# import time
+# from pydoc import text
+from typing import List, Optional, Union
+
+import numpy as np
+# import scipy
+# from matplotlib.pyplot import pause
+from scipy.signal import sawtooth, square
+
+from CartPole import state_utilities
+from CartPole.state_utilities import NUM_STATES, ANGLED_IDX, ANGLE_IDX, ANGLE_SIN_IDX, ANGLE_COS_IDX, POSITION_IDX, POSITIOND_IDX
+from Control_Toolkit.Controllers import template_controller
+from Control_Toolkit.Cost_Functions import cost_function_base
+from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_dancer import cartpole_dancer
+from GUI import gui_default_params, CartPoleMainWindow
+from SI_Toolkit.computation_library import TensorType
+import tensorflow as tf
+
+from Control_Toolkit.others.get_logger import get_logger
+from others.globals_and_utils import update_attributes
+from others.p_globals import CARTPOLE_PHYSICAL_CONSTANTS
+
+log = get_logger(__name__)
+
+def to_numpy(v):
+    return v if not isinstance(v,(tf.Tensor,tf.Variable)) else v.numpy()
+
+class cartpole_trajectory_generator:
+
+    POLICIES=('balance','spin','shimmy','cartonly','cartwheel','toandfro')
+    POLICIES_NUMBERED=('balance0','spin1','shimmy2','cartonly3','cartwheel4','toandfro5') # for tensorflow scalar BS
+    CARTWHEEL_STATES={'before','starting','during','after'}
+
+    def __init__(self):
+
+        self.last_status_text:str=None
+        self.step_start_time = None
+        self.policy_changed=False
+        self.prev_policy=None
+        self.prev_dance_policy=None
+        self.time_policy_changed=None # when the new dance step started
+        self.shimmy_function=None # used for ramp of shimmy
+        self.controller=None
+        self.cost_function=None # set by each generate_trajectory, used by contained classes, e.g. cartpole_dancer, to access config values
+        self.lib=None
+        self.cartpole_dancer=cartpole_dancer()
+        self.traj:np.ndarray=None # stores the latest trajectory, for later reference, e.g. for measuring model mismatch
+        self.start_time=0
+
+        self.cartwheel_cycles = None
+        self.cartwheel_cycles_to_do:int = None
+        self.cartwheel_state:str = None
+        self.cartwheel_time_state_changed:float = None
+        self.cartwheel_starttime:float = None
+        self.cartwheel_direction:int = None
+        self.last_cartwheel_cycles:int=None
+
+        self.mpc_horizon=None
+        """ the horizion, updated from cost function on every call of generate_cartpole_trajectory"""
+
+        self.traj:np.ndarray=None
+        """ The state vector trajectories to follow. It is a 2d array [state,horizon] where first dimension are the states and 2nd is the MPC horizon"""
+
+        self.traj_states_to_consider:List[int]=[]
+        """ a list of state indices to count difference over for cost computation. It is put to cost_function as a tf.Variable so that the compiled cost function can use it."""
+
+        self.state_cost_weights=None
+        """ Vector of state trajectory cost weights from config_cost_functions. Filled in on every generate_cartpole_trajectory()."""
+
+        # we cannot determine if we are running physical cartpole with following method since control is not running at start time.time()
+        # if is_physical_cartpole_running_and_control_enabled() else 0 # subtracted from all times for running physical cartpole to avoid numerical overflow
+        # log.debug(f'self.start_time={self.start_time:.1f}s')
+
+        self.reset_cartwheel_state()
+
+    def reset_cartwheel_state(self):
+        self.cartwheel_state = 'before'
+        self.cartwheel_time_state_changed = 0
+        self.set_cartwheel_state('before', self.start_time)
+        self.cartwheel_cycles_to_do = 0
+        if not self.cost_function is None and hasattr(self.cost_function,'cartwheel_cycles'):
+            self.cartwheel_cycles_to_do=np.abs(to_numpy(self.cost_function.cartwheel_cycles))
+        self.cartwheel_starttime = 0  # time that this particular cartwheel started
+        self.cartwheel_direction = 1  # 1 for ccw (increasing angle), -1 for cw
+        self.last_cartwheel_cycles=0 # to test if config file value changes so we can restart
+
+    def reset(self):
+        """ stops dance if it is going
+        """
+        self.cartpole_dancer.stop()
+        self.reset_cartwheel_state()
+
+
+    def generate_cartpole_trajectory(self, time: float, state: np.ndarray, controller:template_controller, cost_function: cost_function_base):
+        """ Computes the desired future state trajectory at this time.
+        Sets the cost function attributes target_trajectory and traj_states_to_consider.
+        The target state trajectory of cartpole is valid for steps that use this trajectory (spin does not use it)
+        It is np.ndarray[states,timesteps] where only considered states are in the 2d array. The first dimension is the state, 2nd is the mpc_horizon.
+
+        :param time: the scalar time in seconds
+        :param horizon: the number of horizon steps
+        :param dt: the timestep in seconds
+        :param state: the current state of the cartpole as 1d vector of current state components, e.g. the scalar angle state[state_utilities.ANGLE]
+
+       """
+        self.controller=controller
+        self.cost_function=cost_function
+        self.lib=self.cost_function.lib
+        dt = gui_default_params.controller_update_interval # TODO fix for physical-cartpole which has CONTROL_PERIOD_MS in globals.py
+        # dt=CartPoleMainWindow.CartPoleMainWindowInstance.dt_simulation # todo figure out way to get from single place
+        self.mpc_horizon = to_numpy(controller.optimizer.mpc_horizon)
+        gui_target_position = to_numpy(cost_function.target_position)  # GUI slider position
+        gui_target_equilibrium = to_numpy(cost_function.target_equilibrium)  # GUI switch +1 or -1 to make pole target up or down position
+        # log.debug(f'dt={dt:.3f} mpc_horizon={self.mpc_horizon}')
+        time=time-self.start_time # account for 1970's time returned when running physical-cartpol
+
+
+
+        policy:str = bytes.decode(to_numpy(cost_function.policy)) # we really want a python str out of the policy so we can compare it using python previous policy
+        dancer_current_step=None
+        if policy is None:
+            raise RuntimeError(f'set policy in config_self.controller.cost_function_wrapper.cost_functions.yml')
+
+        if self.prev_policy is None or policy!=self.prev_policy:
+            self.time_policy_changed=time
+            self.policy_changed=True
+        else:
+            self.policy_changed=False
+        self.prev_policy=policy
+
+        if self.policy_changed and self.prev_policy== 'dance' and policy!= 'dance':
+            self.cartpole_dancer.stop() # stop music
+
+        # ****************************
+        # if we are following a complete dance from CSV file, then set the appropriate fields here from the CSV columns
+        if policy=='dance':
+            if self.policy_changed:
+                self.cartpole_dancer.start(time,  self)
+            self.cartpole_dancer.step(time=time)
+
+            policy=self.cartpole_dancer.policy
+            self.prev_dance_policy=policy
+
+            gui_target_position=to_numpy(self.cartpole_dancer.cartpos)
+
+            # must assign for TF/XLA to see the policy number in the compiled cost function. BS...
+            self.lib.assign(getattr(cost_function, 'policy_number'),
+                                 self.lib.to_variable(self.cartpole_dancer.policy_number, cost_function.lib.int32))
+            if policy=='spin':
+                spin_dir=self.cartpole_dancer.option
+                # hack to get int value for TF cost function use, is referred to in cartpole_dancer_cost as self.spin_dir
+                cost_function.lib.assign(getattr(cost_function, 'spin_dir'),
+                                         cost_function.lib.to_variable(spin_dir,
+                                                                       cost_function.lib.int32))
+                # cost_function.spin_freq_hz=self.cartpole_dancer.freq
+            elif policy=='balance':
+                cost_function.balance_dir=self.cartpole_dancer.option
+            elif policy=='shimmy':
+                cost_function.shimmy_freq_hz=self.cartpole_dancer.freq
+                cost_function.shimmy_amp=self.cartpole_dancer.amp
+                cost_function.shimmy_freq2_hz=self.cartpole_dancer.freq2
+                cost_function.shimmy_amp2=self.cartpole_dancer.amp2
+                cost_function.shimmy_duration=self.cartpole_dancer.endtime
+                cost_function.shimmy_dir=self.cartpole_dancer.option
+            elif policy=='cartonly':
+                cost_function.cartonly_freq_hz=self.cartpole_dancer.freq
+                cost_function.cartonly_amp=self.cartpole_dancer.amp
+                cost_function.cartonly_duty_cycle=float(self.cartpole_dancer.option)
+            elif policy=='cartwheel':
+                cost_function.cartwheel_cycles=self.cartpole_dancer.amp
+            elif policy=='toandfro':
+                cost_function.cartwheel_cycles=self.cartpole_dancer.amp
+            else:
+                log.error(f"policy '{policy}' is unknown")
+
+
+        # **********************************
+        # Now set the state trajectories to follow and for each type of dance step movement
+        self.clear_traj()
+        if policy== 'spin':  # spin pole CW or CCW depending on target_equilibrium up or down
+            # spin is handled entirely by a special cost function in cartpole_dancer_cost.py
+            pass
+        elif policy=='balance':  # balance upright or down at desired cart position
+            balance_dir = gui_target_equilibrium*to_numpy(cost_function.balance_dir)
+            target_angle = np.pi * (1 - balance_dir) / 2  # either 0 for up and pi for down
+            self.add_traj(POSITION_IDX,gui_target_position)
+            # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle)
+            if balance_dir>0:
+                self.add_traj(ANGLE_IDX,0) # for up, target angle zero, which is increases linearly with angle, unlike cos which only goes away from 1 like 1-angle^2
+            else:
+                self.add_traj(ANGLE_COS_IDX,-1) # for down, avoid the 2pi cut ot +pi/-pi by targetting cos to as negative as possible
+
+            self.add_traj(ANGLED_IDX,0)
+            self.add_traj(POSITIOND_IDX,0)
+        elif policy=='shimmy':  # cart follows a desired cart position shimmy while keeping pole up or down
+            f0 = to_numpy(cost_function.shimmy_freq_hz)  # seconds
+            a0 = to_numpy(cost_function.shimmy_amp)  # meters
+            if self.policy_changed or self.step_start_time is None:
+                log.debug(f'shimmy with freq={f0}Hz and amp={a0}m restarted at time={time}')
+                self.step_start_time=time
+            # shimmy_endtime=self._time_policy_changed+cost_function.shimmy_duration
+            # compute times from current time to end of horizon
+            time_since_step_started=time-self.step_start_time
+            horizon_endtime =  time_since_step_started + self.mpc_horizon * dt
+            # time for shimmy must be relative to start of shimmy step for freq ramp to make sense
+            times = np.linspace(time_since_step_started, horizon_endtime, num=self.mpc_horizon, dtype=np.float32)
+            # time_frac=times/cost_function.shimmy_duration
+            # f1 = cost_function.shimmy_freq2_hz  # seconds
+            # a1 = cost_function.shimmy_amp2  # meters
+
+            # compute the shimmmy trajectory over the horizon
+            # amps=a0+time_frac*(a1-a0)
+            # freqs=f0+time_frac*(f1-f0)
+
+            # print(f'abs_time={time:.2f}, rel_time={times[0]:.2f} time_frac={time_frac[0]:.3f} amp={amps[0]:.3f} freq={freqs[0]:.2f}')
+
+            cartpos=a0*np.sin(2*np.pi*f0*times, dtype=np.float32)
+            cartpos_d=np.gradient(cartpos,dt)
+            angle=np.arcsin(cartpos/(2*CARTPOLE_PHYSICAL_CONSTANTS.L), dtype=np.float32)/2 # compute the angle towards the center of shimmy to keep head of pole fixed as well as possible. L is half of pole length.
+            # we divide by 2 to reduce the required angle a bit more
+            angle_d=np.gradient(angle,dt)
+            self.add_traj(POSITION_IDX, gui_target_position + cartpos)
+            self.add_traj(ANGLE_IDX,angle) # keep pole pointing towards center of shimmy to minimize pole tip movement
+            self.add_traj(ANGLED_IDX,angle_d)
+            self.add_traj(POSITIOND_IDX, cartpos_d)
+        elif policy=='cartonly':  # cart follows the trajectory, pole ignored
+            f0 = to_numpy(cost_function.cartonly_freq_hz)  # seconds
+            a0 = to_numpy(cost_function.cartonly_amp)  # meters
+            if self.policy_changed or self.step_start_time is None:
+                log.debug(f'cartonly with freq={f0}Hz and amp={a0}m restarted at time={time}')
+                self.step_start_time=time
+            # shimmy_endtime=self._time_policy_changed+cost_function.shimmy_duration
+            # compute times from current time to end of horizon
+            time_since_step_started=time-self.step_start_time
+            horizon_endtime =  time_since_step_started + self.mpc_horizon * dt
+            # time for shimmy must be relative to start of shimmy step for freq ramp to make sense
+            times = np.linspace(time_since_step_started, horizon_endtime, num=self.mpc_horizon, dtype=np.float32)
+            # sawtooth is out for now to avoid sharp turns at ends
+            # cartpos = a0 * square((2 * np.pi * f0) * times, duty=cost_function.cartonly_duty_cycle)  # duty=.5 makes square with equal halfs https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html
+            cartpos = a0 * sawtooth((2 * np.pi * f0) * times, width=to_numpy(cost_function.cartonly_duty_cycle))  # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html
+            # cartpos = a0 * np.sin((2 * np.pi * f0) * times)  # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html
+            cartvel = np.gradient(cartpos, dt)
+            cartpos_vector = gui_target_position + cartpos
+            self.add_traj(ANGLE_COS_IDX,-1)
+            self.add_traj(POSITION_IDX,cartpos_vector)
+            self.add_traj(POSITIOND_IDX, cartvel)
+        elif policy=='cartwheel' or policy=='toandfro':
+            # cartwheel starts with balance, once balanced the cartwheels start, after the cartwheels we again balance
+            # toandfro swings one way and then the other immediately, must have cycles>2
+
+            # determine state transitions based on current state
+            angle = state[state_utilities.ANGLE_IDX]
+            angle_d = state[state_utilities.ANGLED_IDX]
+
+            # need to convert cartwheel cycles to real float since equals test on tf.Variable does not work if object does not change like it does not with assignment in update_attributes.
+            # also cartwheel cycles can be set to float during dance above
+            self.cartwheel_cycles=to_numpy(cost_function.cartwheel_cycles)
+            if self.policy_changed or self.cartwheel_cycles!=self.last_cartwheel_cycles:
+                self.cartwheel_direction=np.sign(self.cartwheel_cycles)
+                self.set_cartwheel_state('before',time)
+                self.cartwheel_cycles_to_do=np.abs(self.cartwheel_cycles)
+                self.last_cartwheel_cycles=self.cartwheel_cycles
+
+            # first update state depending on cartwheel state, cartpole state, and time
+            if self.cartwheel_state=='before':
+                # if before cartwheel, we need to get balanced
+                if self.cartwheel_cycles_to_do>0 and self.is_pole_balanced(state):
+                    self.set_cartwheel_state('starting',time)
+                    self.cartwheel_starttime=time
+            elif self.cartwheel_state=='starting':
+                # start the free fall in the correct direction using angle_d indicator function
+               # if pole is falling in correct direction and fast enough and has fallen enough move to free fall
+                if np.abs(angle_d)>to_numpy(self.cost_function.cartwheel_freefall_angle_limit_deg)*np.pi/180\
+                        and np.sign(angle_d)==self.cartwheel_direction \
+                        and np.abs(angle)>to_numpy(self.cost_function.cartwheel_freefall_angled_limit_deg_per_sec)*np.pi/180:
+                    self.cartwheel_cycles_to_do-=1
+                    self.set_cartwheel_state('during',time)
+            elif self.cartwheel_state=='during':
+                # during the cartwheel, we don't go to 'after' until the angle has exceeded some angle and spin speed
+                if np.abs(angle)>to_numpy(self.cost_function.cartwheel_freefall_to_after_angle_limit_deg)*np.pi/180:
+                    if self.cartwheel_cycles_to_do>0:
+                        self.set_cartwheel_state('before',time)
+                        if policy=='toandfro':
+                            self.cartwheel_direction= -self.cartwheel_direction # go other direction this time
+                    else:
+                        self.set_cartwheel_state('after',time)
+            elif self.cartwheel_state=='after':
+                pass # end state, don't get out of it except by starting a new cartwheel
+
+            # now set the target trajectory objectives according to state
+            if self.cartwheel_state=='before' or self.cartwheel_state=='after':  # just get balanced again
+                self.add_traj(POSITION_IDX, gui_target_position)
+                self.add_traj(ANGLE_IDX, 0)
+                self.add_traj(ANGLED_IDX, 0)
+                self.add_traj(POSITIOND_IDX, 0)
+
+            elif self.cartwheel_state=='starting': # we just try to get the pole spinning the correct direction
+                self.add_traj(POSITION_IDX, gui_target_position)
+                self.add_traj(POSITIOND_IDX, 0)
+                self.add_traj(ANGLED_IDX,  1e6*self.cartwheel_direction)
+
+            elif self.cartwheel_state=='during': # free fall, just get the cart back to target position
+                self.add_traj(POSITION_IDX, gui_target_position)
+                self.add_traj(POSITIOND_IDX, 0)
+
+        else:
+            log.error(f'cost policy "{policy}" is unknown')
+
+
+        self.compile_traj_for_cost_function()
+
+        self.set_status_text(time, state, cost_function) # update CartPoleSimulation GUI status line (if it exists)
+
+
+    def add_traj(self, idx:int, traj:Union[float,np.ndarray]):
+        """ Add a target trajectory vector to the target states trajectories.
+
+        :param idx: the state index, e.g. POSITION
+        :param traj: either a float value or a vector with length self.mpc_horizon
+        """
+        traj_vec=None
+
+        self.traj_states_to_consider.append(idx)
+
+        if isinstance(traj,(float,int, np.float32)):
+            traj_vec=np.full(shape=(1,self.mpc_horizon),fill_value=traj,dtype=np.float32)
+        elif isinstance(traj, np.ndarray):
+            if len(traj.shape)>1 or traj.shape[0]!=self.mpc_horizon:
+                raise AttributeError(f'trajectory {traj} must be a vector with length self.mpc_horizon')
+            traj_vec=traj
+        else:
+            raise AttributeError(f'trajectory vector {traj} must be a scalar int,float value or np.ndarray or tf.Tensor')
+
+        self.traj[idx,:]=traj_vec # put this vector to 2d array of trajectories
+
+    def clear_traj(self):
+        """ Clear the target state trajectories"""
+        # initialize desired trajectory with 0 entries, will fill appropriate rows with target state trajectories
+
+        self.traj = np.full(shape=(NUM_STATES, self.mpc_horizon),
+                       fill_value=0., dtype=np.float32)  # use numpy not tf, too hard to work with immutable tensors
+        self.traj_states_to_consider=[]
+
+    def compile_traj_for_cost_function(self):
+        """ Compile the target state trajectories to a fixed-size tensor is used in cartpole_dancer_cost.py to
+         measure the rollout costs. Transfer the target states trajectory and effective cost weighting vector to cost function.
+         """
+        # states are "angle", "angleD", "angle_cos", "angle_sin", "position", "positionD"
+        raw_state_cost_weights = np.array([to_numpy(self.cost_function.pole_angle_weight),
+                                   to_numpy(self.cost_function.pole_swing_weight),
+                                    to_numpy(self.cost_function.pole_angle_weight),
+                                    to_numpy(self.cost_function.pole_angle_weight),
+                                    to_numpy(self.cost_function.cart_pos_weight),
+                                    to_numpy(self.cost_function.cart_vel_weight)])
+        effective_cost_weights=np.zeros([1,NUM_STATES])
+        np.put(effective_cost_weights, self.traj_states_to_consider,raw_state_cost_weights[self.traj_states_to_consider])
+
+
+        updated_attributes = {}  # empty dict
+        updated_attributes['target_trajectory'] = tf.Variable(self.traj, dtype=tf.float32)
+        updated_attributes['effective_traj_cost_weights'] = tf.Variable(effective_cost_weights, dtype=tf.float32)
+        update_attributes(updated_attributes, self.cost_function)  # update the cost_fuction tf.Variable's to give access to them in compiled cost_function
+
+    def set_cartwheel_state(self,new_state:str,time:float):
+        """Sets the cartwheel state and the time the state changed if it did
+        """
+        if self.cartwheel_state is None or self.cartwheel_state!=new_state:
+            self.cartwheel_time_state_changed=time
+            log.debug(f'changed state from {self.cartwheel_state}->{new_state} at t={time:.3f}')
+        self.cartwheel_state=new_state
+
+    def is_pole_balanced(self, state):
+        """ Returns True if angle and angle_d are sufficiently small"""
+        return np.abs(state[state_utilities.ANGLE_IDX])<to_numpy(self.cost_function.cartwheel_balance_angle_limit_deg)*(np.pi/180)\
+                and np.abs(state[state_utilities.ANGLED_IDX])<to_numpy(self.cost_function.cartwheel_balance_angled_limit_deg_per_sec)*(np.pi/180)
+
+    def set_status_text(self, time: float, state: np.ndarray, cost_function: cost_function_base) -> None:
+        s = self.get_status_string(time, state, cost_function)
+        CartPoleMainWindow.set_status_text(s)
+
+    def get_status_string(self, time, state, cost_function):
+        """ Returns a string describing current objective trajectory or control target"""
+        policy: str = cost_function.policy
+        if policy == 'dance':
+            return  # status of cartpole GUI set by dancer
+        gui_target_position = float(cost_function.target_position)  # GUI slider position
+        gui_target_equilibrium = float(
+            cost_function.target_equilibrium)  # GUI switch +1 or -1 to make pole target up or down position
+
+        if policy == 'balance':
+            dir = to_numpy(cost_function.balance_dir)
+            s = f'Policy: balance/{dir} pos={gui_target_position:.1f}m'
+        elif policy == 'spin':
+            dir_int = cost_function.spin_dir.numpy()
+            dir='cw' if dir_int<0 else 'ccw'
+            s = f'Policy: spin/{dir}*up/down pos={gui_target_position:.1f}m'
+        elif policy == 'shimmy':
+            s = f'Policy: shimmy pos={gui_target_position:.1f}m freq={float(cost_function.shimmy_freq_hz):.1f}Hz amp={float(cost_function.shimmy_amp):.3f}m'
+        elif policy == 'cartonly':
+            s = f'Policy: cartonly pos={gui_target_position:.1f}m freq={float(cost_function.cartonly_freq_hz):.1f}Hz amp={float(cost_function.cartonly_amp):.3f}m'
+        elif policy == 'cartwheel':
+            s = f'Policy: cartwheel pos={gui_target_position:.1f}m state={self.cartwheel_state} cycles_to_do={self.cartwheel_cycles_to_do} angle={state[state_utilities.ANGLE_IDX]*180/np.pi:.1f}deg angle_d={state[state_utilities.ANGLED_IDX]*180/np.pi:.1f}deg/s'
+        elif policy == 'toandfro':
+            s = f'Policy: toandfro pos={gui_target_position:.1f}m state={self.cartwheel_state} cycles_to_do={self.cartwheel_cycles_to_do} angle={state[state_utilities.ANGLE_IDX]*180/np.pi:.1f}deg angle_d={state[state_utilities.ANGLED_IDX]*180/np.pi:.1f}deg/s'
+        else:
+            s = f'unknown/not implemented string'
+        self.last_status_text=s
+        return s
+
+
+    def decode_string(self,tfstring:tf.Variable):
+        """ Decodes tensorflow string to unicode python str.
+        :param tfstring: the tf.Variable with dtype 'string'.
+
+        :returns: If tfstring is tf.Variable, returns tfstring.numpy().decode('utf-8'). If tfstring is already str, returns result of tfstring.encode().decode('utf-8')
+        """
+        if isinstance(tfstring,str): return tfstring.encode().decode('utf-8')
+        return tfstring.numpy().decode('utf-8')
+
+    def keyboard_input(self, c):
+        if c == 'M':  # switch dance step (mode)
+            newpol=self.next_policy()
+            update_attributes({'policy':newpol},self.cost_function)
+
+    def next_policy(self)->str:
+        curpolicy = self.decode_string(self.cost_function.policy)
+        ind=0
+        try:
+            ind=cartpole_trajectory_generator.POLICIES.index(curpolicy)+1
+            if ind==len(cartpole_trajectory_generator.POLICIES): ind=0
+        except ValueError as e:
+            log.error(f'error cycling policy {e}')
+        newpol=cartpole_trajectory_generator.POLICIES_NUMBERED[ind]
+        print(f'changed policy from {curpolicy} -> {newpol}')
+        return newpol
+
+    def print_keyboard_help(self):
+        print('M cycle dance policy')
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py
index ee811a52..e3c29e4c 100644
--- a/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py
@@ -4,21 +4,50 @@
 from SI_Toolkit.computation_library import TensorType
 from Control_Toolkit.Cost_Functions import cost_function_base
 
-from others.globals_and_utils import load_config
+from others.globals_and_utils import load_config, load_or_reload_config_if_modified
 
 from CartPole.cartpole_model import TrackHalfLength
-from CartPole.state_utilities import ANGLE_IDX, POSITION_IDX
+from CartPole.state_utilities import ANGLE_IDX, POSITION_IDX, ANGLED_IDX
 
-# load constants from config file
-config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"))
+class default(cost_function_base):
+
+    # all stage costs together
+    def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType):
+        """
+        Computes costs of all stages, i.e., costs of all rollouts for each timestep.
+
+        :param states: Tensor of states [rollout, timestep, state].
+            rollout dimension is the batch dimension of the rollouts, e.g. 1000 parallel rollouts.
+            timestep dimension are the timesteps of each rollout, e.g. 50 timesteps.
+            state is the state vector, defined by the STATE_INDICES in [state_utilities.py](../CartPole/state_utilities.py)
+        :param inputs: control input, e.g. cart acceleration
+        :param previous_input: previous control input, can be used to compute cost of changing control input
+        :return: Tensor of [rollout, timestamp] over all rollouts and horizon timesteps
+        """
+        (config,reloaded)=load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"),every=1,search_path=['CartPoleSimulation'])
+        self.dd_weight = config["CartPole"]["default"]["dd_weight"]
+        self.cc_weight = config["CartPole"]["default"]["cc_weight"]
+        self.ep_weight = config["CartPole"]["default"]["ep_weight"]
+        self.ccrc_weight = config["CartPole"]["default"]["ccrc_weight"]
+        self.R = config["CartPole"]["default"]["R"]
+
+        dd = self.dd_weight * self._distance_difference_cost(states[:, :, POSITION_IDX]) # compute cart position target distance cost
+        cc = self.cc_weight * self.R*self._CC_cost(inputs) # compute the cart acceleration control cost
+        ccrc = 0 # compute the control change cost
+        angle=states[:, :, ANGLE_IDX]
+        angleD=states[:,:,ANGLED_IDX]
+        ep = self.ep_weight * self._E_pot_cost(angle)
+        # if previous_input is not None:
+        #     ccrc = ccrc_weight * self._control_change_rate_cost(inputs, previous_input)
+
+        # if self.controller.target_positions_vector[0] > 0: # TODO why is potential cost positive for this case and negative otherwise?
+        #     stage_cost = dd + ep + cc + ccrc
+        # else:
+        stage_cost = dd + ep + cc + ccrc
+
+        return stage_cost
 
-dd_weight = config["CartPole"]["default"]["dd_weight"]
-cc_weight = config["CartPole"]["default"]["cc_weight"]
-ep_weight = config["CartPole"]["default"]["ep_weight"]
-ccrc_weight = config["CartPole"]["default"]["ccrc_weight"]
-R = config["CartPole"]["default"]["R"]
 
-class default(cost_function_base):
     # cost for distance from track edge
     def _distance_difference_cost(self, position):
         """Compute penalty for distance of cart to the target position"""
@@ -35,7 +64,7 @@ def _E_pot_cost(self, angle):
 
     # actuation cost
     def _CC_cost(self, u):
-        return R * self.lib.sum(u**2, 2)
+        return self.lib.sum(u**2, 2)
 
     # final stage cost
     def get_terminal_cost(self, terminal_states: TensorType):
@@ -76,13 +105,3 @@ def _control_change_rate_cost(self, u, u_prev):
         )
         return self.lib.sum((u - u_prev_vec) ** 2, 2)
 
-    # all stage costs together
-    def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType):
-        dd = dd_weight * self._distance_difference_cost(states[:, :, POSITION_IDX])
-        ep = ep_weight * self._E_pot_cost(states[:, :, ANGLE_IDX])
-        cc = cc_weight * self._CC_cost(inputs)
-        ccrc = 0
-        # if previous_input is not None:
-        #     ccrc = ccrc_weight * self._control_change_rate_cost(inputs, previous_input)
-        stage_cost = dd + ep + cc + ccrc
-        return stage_cost
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py
index a1b7671f..1c5e991e 100644
--- a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py
@@ -10,7 +10,7 @@
 from CartPole.state_utilities import ANGLE_IDX, POSITION_IDX
 
 # load constants from config file
-config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"))
+config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r"))
 
 dd_weight = config["CartPole"]["quadratic_boundary"]["dd_weight"]
 cc_weight = config["CartPole"]["quadratic_boundary"]["cc_weight"]
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py
index a64f39db..39e63407 100644
--- a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py
@@ -10,7 +10,7 @@
 from CartPole.state_utilities import ANGLE_IDX, ANGLED_IDX, POSITION_IDX
 
 #load constants from config file
-config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"))
+config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r"))
 
 dd_weight = config["CartPole"]["quadratic_boundary_grad"]["dd_weight"]
 cc_weight = config["CartPole"]["quadratic_boundary_grad"]["cc_weight"]
diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py
index e15f9297..e20a5138 100644
--- a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py
+++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py
@@ -10,7 +10,7 @@
 from CartPole.state_utilities import ANGLE_IDX, ANGLED_IDX, POSITION_IDX
 
 #load constants from config file
-config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r"))
+config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r"))
 
 dd_weight = config["CartPole"]["quadratic_boundary_nonconvex"]["dd_weight"]
 cc_weight = config["CartPole"]["quadratic_boundary_nonconvex"]["cc_weight"]
diff --git a/Control_Toolkit_ASF/Cost_Functions/__init__.py b/Control_Toolkit_ASF/Cost_Functions/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/Control_Toolkit_ASF/config_controllers.yml b/Control_Toolkit_ASF/config_controllers.yml
index 2e383d72..e699b172 100644
--- a/Control_Toolkit_ASF/config_controllers.yml
+++ b/Control_Toolkit_ASF/config_controllers.yml
@@ -1,13 +1,20 @@
-mpc:
-  optimizer: rpgd-tf
+# this config specifies the controller options.
+# to select a particular controller, see TODO where is the controller specified?
+# controller_mpc.py uses the mpc: section below
+# see config_optimizers to specify the parameters for the optimizer, e.g. mppi
+
+# when running physical-cartpole, its globals.py overwrites the config regarding controller & optimizer. For simulator, options are specified completely in .yml files.
+
+mpc: # selects Control_Tookit/Controllers/controller_mpc.py as the controller
+  optimizer: rpgd-tf # selects the optimizer, see CartPoleSimulation/ControlToolkit_ASF/config_optimizers.yml. Popular options are 'mppi' and 'rpgd-tf'
   predictor_specification: "ODE_TF"    # Can be "ODE", "ODE_TF", network/GP name (possibly with path) e.g. 'GRU-6IN-32H1-32H2-5OUT-0'/'SGP_30' or a name of a custom predictor. For more info see config_predictors in SI_Toolkit_ASF
-  cost_function_specification: default  # One of "default", "quadratic_boundary_grad", "quadratic_boundary_nonconvex", "quadratic_boundary"
+  cost_function_specification: cartpole_dancer_cost  # One of "default", "quadratic_boundary_grad", "quadratic_boundary_nonconvex", "quadratic_boundary" "cartpole_dancer_cost"
   computation_library: tensorflow  # One of "numpy", "tensorflow", "pytorch". Defaults to "numpy" if none given.
   controller_logging: false
-mppi-cartpole:
+mppi-cartpole: # TODO what is this section for?
   seed: null                            # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!)
-  mpc_horizon: 35                       # steps
-  num_rollouts: 3500                    # Number of Monte Carlo samples
+  mpc_horizon: 30                       # steps
+  batch_size: 3500                  # Number of Monte Carlo samples
   update_every: 1                       # Cost weighted update of inputs every ... steps
   predictor_specification: "ODE_TF"    # Can be "ODE", "ODE_TF", network/GP name (possibly with path) e.g. 'GRU-6IN-32H1-32H2-5OUT-0'/'SGP_30' or a name of a custom predictor. For more info see config_predictors in SI_Toolkit_ASF
   cost_function_specification: default  # One of "default", "quadratic_boundary_grad", "quadratic_boundary_nonconvex", "quadratic_boundary"
@@ -23,7 +30,6 @@ mppi-cartpole:
   LBD: 100.0                            # Cost parameter lambda
   NU: 1000.0                            # Exploration variance
   SQRTRHOINV: 0.02                      # Sampling variance
-  GAMMA: 1.00                           # Future cost discount
   SAMPLING_TYPE: "interpolated"         # One of ["iid", "random_walk", "uniform", "repeated", "interpolated"]
   controller_logging: False                        # Collect and show detailed insights into the controller's behavior
   WASH_OUT_LEN: 100                     # Only matters if RNN used as predictor; For how long MPPI should be desactivated (replaced either with LQR or random input) to give memory units time to settle
@@ -47,7 +53,6 @@ custom-mpc-scipy:
   controller_logging: True
 do-mpc-discrete:
   mpc_horizon: 50                       # steps
-  num_rollouts: 1
   # Initial positions
   position_init: 0.0
   positionD_init: 0.0
@@ -57,7 +62,6 @@ do-mpc-discrete:
 do-mpc:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 50                       # steps
-  num_rollouts: 1
   # Perturbation factors:
   # Change of output from optimal
   p_Q: 0.00
diff --git a/Control_Toolkit_ASF/config_cost_function.yml b/Control_Toolkit_ASF/config_cost_function.yml
deleted file mode 100644
index 7ecd18cf..00000000
--- a/Control_Toolkit_ASF/config_cost_function.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-cost_function_name_default: default
-# Default value is used if controller does not specify a cost_function_specification (leave empty)
-# Cost functions are grouped by environment name in a folder within Control_Toolkit_ASF.Cost_Functions
-# Check config.yml to learn more on how cost_functions are selected
-
-CartPole:
-  default:
-    dd_weight: 600.0
-    ep_weight: 20000.0
-    ekp_weight: 80.0
-    ekc_weight: 20.0
-    cc_weight: 1.0
-    ccrc_weight: 1.0
-    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
-  quadratic_boundary_grad:
-    dd_weight: 600.0
-    ep_weight: 20000.0
-    ekp_weight: 80.0
-    cc_weight: 1.0
-    ccrc_weight: 1.0
-    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
-  quadratic_boundary_nonconvex:
-    dd_weight: 600.0
-    ep_weight: 20000.0
-    cc_weight: 1.0
-    ccrc_weight: 1.0
-    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
-  quadratic_boundary:
-    dd_weight: 600.0
-    ep_weight: 20000.0
-    cc_weight: 1.0
-    ccrc_weight: 1.0
-    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
diff --git a/Control_Toolkit_ASF/config_cost_functions.yml b/Control_Toolkit_ASF/config_cost_functions.yml
new file mode 100644
index 00000000..716fbac5
--- /dev/null
+++ b/Control_Toolkit_ASF/config_cost_functions.yml
@@ -0,0 +1,94 @@
+# this config is to specify specific cost function parameters. The actual cost function used is specified in config_optimers.yml
+
+cost_function_name_default: default
+# Default value is used if controller does not specify a cost_function_specification (leave empty)
+# Cost functions are grouped by environment name in a folder within Control_Toolkit_ASF.Cost_Functions
+# Check CartPoleSimulation/config.yml to learn more on how cost_functions are selected TODO I don't see any more information there, and which config.yml is this referring to?
+
+CartPole:
+  default:
+    dd_weight: 600.0
+    ep_weight: 20000
+    ekp_weight: 80.0
+    ekc_weight: 20.0
+    cc_weight: 1.0
+    ccrc_weight: 1.0
+    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
+  quadratic_boundary_grad:
+    dd_weight: 600.0
+    ep_weight: 20000.0
+    ekp_weight: 80.0
+    cc_weight: 1.0
+    ccrc_weight: 1.0
+    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
+  quadratic_boundary_nonconvex:
+    dd_weight: 600.0
+    ep_weight: 20000.0
+    cc_weight: 1.0
+    ccrc_weight: 1.0
+    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
+  quadratic_boundary:
+    dd_weight: 600.0
+    ep_weight: 20000.0
+    cc_weight: 1.0
+    ccrc_weight: 1.0
+    R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
+  cartpole_dancer_cost: # values must be explicitly float or scientific format if needed, since they are cast to int type if provided here as int, which might not work for some ops
+    # the policy (dance step or move to control). It is trailed by an int to code the policy as an int for tensorflow XLA compiled code.
+    # e.g. dance0 will create (in update_attributes call) policy='dance' and policy_number=0
+    # this hack is to allow branching in XLA code that lacks string compare operator
+    # see others\globals_and_utilities.py\update_attributes method
+
+    policy:  shimmy3  # dance0 (follow csv file)   balance1 spin2 shimmy3 cartonly4 cartwheel5 toandfro6
+
+    # tobi cannot get conditional branch to work for following option in cartpole_dancer_cost.py
+#    distance_norm: 0 # 0=abs, 1=rmse, 2=mse are possible distance metrics
+    stage_cost_factor: 1e0 # how much to weight up the state costs
+    track_edge_barrier_cost: 1e11 # how much to penalize cart at edge of track
+    track_length_fraction: 0.95 # fraction of half track length to allow free movement of cart before hitting barrier function
+
+    # cost weightings for the different state components for following desired cart state trajectory
+    cart_pos_weight: 2e6 # weight on cart pos error
+    pole_angle_weight: 1e6 # weight on difference of angle
+    cart_vel_weight: 3e2 # weight on difference of cart velocity
+    pole_swing_weight: 4e+4 # cart angle derivative weight
+
+    # following weight the control and its change
+    control_cost_weight: 1e1 # cost of control
+    control_cost_change_weight: 1e0 # cost of changing control
+
+    # spin
+    spin_dir:  1  # cw+1 ccw-1 1 or cw, -1 or ccw
+    upright_pole_energy_multiple_to_count_spin_direction_cost: .25 # pole must have this excess total energy before we start counting spin direction energy
+    spin_energy_weight: 5e+6 # cost factor only for spin behavior to weight the spin energy
+
+
+    # balance
+    balance_dir: 1 # 1 for up, -1 for down, multiplied by target_equilibrium
+
+    # shimmy does a sinusoidal dance of cart around target position with period in seconds and amplitude in meters
+    shimmy_freq_hz: 1.125   # for 160bpm song, 2.25 Hz is beat, 1.125 is half beat, 0.5625 is quarter beat
+    shimmy_amp: .04 # in practice, 0.03 results in about .07m amplitude, not clear why
+    shimmy_freq2_hz: 2.
+    shimmy_amp2: .025
+    shimmy_duration: 8. # for shimmy that lasts a finite time and ramps from freq to freq2 and amp to amp2
+    shimmy_dir: up # up, down
+#    shimmy_plot: 0 # to plot the shimmy function
+
+    # cartonly only controls cart position, pole is ignored. Trajectory is triangular with amp and per
+    cartonly_amp: 0.05 # amplitude in meters (half of peak to peak of triangle wave)
+    cartonly_freq_hz: 1.125
+    cartonly_duty_cycle: 0.5 # set to .5 for triangle, 0 or 1 for sawtooth
+
+    # cartwheel and toandfro steps.
+    cartwheel_cycles: +6 # positive cw, negative ccw. For toandfro, must be >2 to do complete back and forth steps
+    cartwheel_balance_angle_limit_deg: 10.0
+    cartwheel_balance_angled_limit_deg_per_sec: 30.0
+    cartwheel_freefall_angle_limit_deg: 20.
+    cartwheel_freefall_angled_limit_deg_per_sec: 30.
+    cartwheel_freefall_to_after_angle_limit_deg: 60.
+#    cartwheel_target_duration_s: 1.1
+
+    dance_song_file: 'others/Media/Rolling_Stones_Satisfaction.mp3' # no spaces or special chars, no / . - etc....
+    dance_csv_file: 'Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv'
+    dance_song_playback_rate: 1. # rate to match song speed compared to real time simulation rate, we should set to 1 for song to sound native but code must run fast enough then
diff --git a/Control_Toolkit_ASF/config_optimizers.yml b/Control_Toolkit_ASF/config_optimizers.yml
index 8ee940f7..d1d7340b 100644
--- a/Control_Toolkit_ASF/config_optimizers.yml
+++ b/Control_Toolkit_ASF/config_optimizers.yml
@@ -1,9 +1,40 @@
+# This config specifies the parameters for all the different MPC optimizers, e.g. mppi, rpgd-tf
+#  To select a particular optimizer from here, see optimizer: key in the config_controllers.yml selection of controller.
+mppi: # used as basic MPC optimizer for cartpole (tobi)
+  seed: null                            # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!)
+  mpc_horizon: 40                       # steps, 40 steps of 0.02 is .8s
+  num_rollouts: 700                    # Number of Monte Carlo samples
+  cc_weight: 1.0
+  R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in cost functions config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
+  LBD: 100.0                            # Cost parameter lambda
+  NU: 1000.0                            # Exploration variance
+  SQRTRHOINV: 0.03                      # Sampling variance
+  period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
+rpgd-tf: # this is the tensorflow (tf) version of the Resampling Parallel Gradient Descent (RPGD) optimizer reported in ICRA paper
+  # the options below are passed directly to the constructor __init__ method of Control_Toolkit/Optimizers/optimizer_rpgd_tf.py and must match the expected options
+  seed: null                            # If null, random seed based on datetime is used
+  mpc_horizon: 40                       # steps
+  SAMPLING_DISTRIBUTION: uniform  # "normal" or "uniform"
+  period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
+  learning_rate: 0.05
+  adam_beta_1: 0.9
+  adam_beta_2: 0.999
+  adam_epsilon: 1.0e-08
+  gradmax_clip: 5
+  rtol: 1.0e-3
+  num_rollouts: 32
+  opt_keep_k_ratio: .75
+  outer_its: 2
+  resamp_per: 10
+  sample_stdev: 0.5
+  warmup: false
+  warmup_iterations: 250
 cem-tf:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 40                       # steps
   cem_outer_it: 3                    #how many outer iterations to use
   cem_initial_action_stdev: 0.5
-  num_rollouts: 200          #how many rollouts per outer cem iteration
+  batch_size: 200          #how many rollouts per outer cem iteration
   cem_stdev_min: 0.01
   cem_best_k: 40
   warmup: false
@@ -12,7 +43,7 @@ cem-gmm-tf:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 40                       # steps
   cem_outer_it: 3                    #how many outer iterations to use
-  num_rollouts: 200          #how many rollouts per outer cem iteration
+  batch_size: 200          #how many rollouts per outer cem iteration
   cem_stdev_min: 0.01
   cem_initial_action_stdev: 0.5
   cem_best_k: 40
@@ -20,7 +51,7 @@ cem-naive-grad-tf:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 35                       # steps
   cem_outer_it: 1                       # how many outer iterations to use
-  num_rollouts: 200                     # how many rollouts per outer cem iteration
+  batch_size: 200                     # how many rollouts per outer cem iteration
   cem_stdev_min: 0.1
   cem_initial_action_stdev: 0.5
   cem_best_k: 40
@@ -33,7 +64,7 @@ cem-grad-bharadhwaj-tf:
   adam_beta_1: 0.9
   adam_beta_2: 0.999
   adam_epsilon: 1.0e-08
-  num_rollouts: 32
+  batch_size: 32
   cem_best_k: 8
   cem_outer_it: 2
   cem_initial_action_stdev: 2
@@ -50,7 +81,7 @@ gradient-tf:
   adam_epsilon: 1.0e-07
   rtol: 1.0e-3
   gradient_steps: 5
-  num_rollouts: 40
+  batch_size: 40
   initial_action_stdev: 0.5
   gradmax_clip: 5
   warmup: false
@@ -63,7 +94,7 @@ mppi-optimize-tf:
   adam_epsilon: 1.0e-7                  #default: 1.0e-7
   gradmax_clip: 1000
   mpc_horizon: 35                       # steps
-  num_rollouts: 400                     # Number of Monte Carlo samples
+  batch_size: 400                     # Number of Monte Carlo samples
   cc_weight: 1.0
   R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in cost functions config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
   LBD: 100.0                            # Cost parameter lambda
@@ -83,24 +114,8 @@ dist-adam-resamp2-tf:
   rtol: 1.0e-3
   num_rollouts: 32
   opt_keep_k_ratio: 0.25
-  outer_its: 2
-  resamp_per: 10
-  sample_stdev: 0.5
-  warmup: false
-  warmup_iterations: 250
-rpgd-tf:
-  seed: null                            # If null, random seed based on datetime is used
-  mpc_horizon: 40                       # steps
-  SAMPLING_DISTRIBUTION: uniform  # "normal" or "uniform"
-  period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
-  learning_rate: 0.05
-  adam_beta_1: 0.9
-  adam_beta_2: 0.999
-  adam_epsilon: 1.0e-08
-  gradmax_clip: 5
-  rtol: 1.0e-3
-  num_rollouts: 32
-  opt_keep_k_ratio: 0.25
+  batch_size: 32
+  opt_keep_k: 8
   outer_its: 2
   resamp_per: 10
   sample_stdev: 0.5
@@ -137,6 +152,7 @@ rpgd-me-param-tf:
   maximum_entropy_alpha: 0.0
   gradmax_clip: 10
   rtol: 1.0e-3
+  opt_keep_k: 0
   num_rollouts: 32
   opt_keep_k_ratio: 0.25
   outer_its: 25
@@ -156,6 +172,7 @@ rpgd-ml-tf:
   maximum_entropy_alpha: 0.1
   gradmax_clip: 10
   rtol: 1.0e-3
+  opt_keep_k: 8
   num_rollouts: 32
   opt_keep_k_ratio: 0.25
   outer_its: 5
@@ -173,6 +190,7 @@ rpgd-particle-tf:
   adam_epsilon: 1.0e-08
   gradmax_clip: 10
   rtol: 1.0e-3
+  opt_keep_k: 8
   num_rollouts: 32
   opt_keep_k_ratio: 0.25
   outer_its: 5
@@ -183,7 +201,6 @@ rpgd-particle-tf:
 mppi-var-tf:
   seed: null                          # If null, random seed based on datetime is used
   mpc_horizon: 35                       # steps
-  num_rollouts: 400                     # Number of Monte Carlo samples
   period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
   cc_weight: 1.0
   R: 1.0                                # How much to punish Q
@@ -195,16 +212,6 @@ mppi-var-tf:
   STDEV_min: 0.01                       # Maximal variance for sampling
   STDEV_max: 10                         # Minimal sampling variance for sampling
   max_grad_norm: 100000                 # max norm of gradient such that ||gradient||_2
-mppi:
-  seed: null                            # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!)
-  mpc_horizon: 35                       # steps
-  num_rollouts: 3500                    # Number of Monte Carlo samples
-  cc_weight: 1.0
-  R: 1.0                                # How much to punish Q, For MPPI YOU have to make sure that this is the same as in cost functions config, as it plays a special role in the optimization algorithm as well as is used in cost functions!
-  LBD: 100.0                            # Cost parameter lambda
-  NU: 1000.0                            # Exploration variance
-  SQRTRHOINV: 0.03                      # Sampling variance
-  period_interpolation_inducing_points: 10                #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated
 random-action-tf:
   seed: null                          # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!)
   mpc_horizon: 35                      # steps
diff --git a/GUI/_CartPoleGUI_MainWindow.py b/GUI/_CartPoleGUI_MainWindow.py
index 2acd397e..edd1eba0 100644
--- a/GUI/_CartPoleGUI_MainWindow.py
+++ b/GUI/_CartPoleGUI_MainWindow.py
@@ -1,6 +1,7 @@
 """
 Main window (and main class) of CartPole GUI
 """
+from typing import Any
 
 # Necessary only for debugging in Visual Studio Code IDE
 try:
@@ -14,6 +15,8 @@
 import sys
 
 from others.p_globals import (
+    # k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias,
+    TrackHalfLength,
     k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength,
 )
 
@@ -23,7 +26,7 @@
 from PyQt6.QtWidgets import QMainWindow, QRadioButton, QApplication, QSlider, QVBoxLayout, \
     QHBoxLayout, QLabel, QPushButton, QWidget, QCheckBox, \
     QLineEdit, QMessageBox, QComboBox, QButtonGroup, QFrame
-from PyQt6.QtCore import QThreadPool, QTimer, Qt
+from PyQt6.QtCore import QThreadPool, QTimer, Qt, pyqtSignal, QObject, QRunnable
 # The main drawing functionalities are implemented in CartPole Class
 # Some more functions needed for interaction of matplotlib with PyQt6
 from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
@@ -56,13 +59,47 @@
     pass
 from GUI._ControllerGUI_NoiseOptionsWindow import NoiseOptionsWindow
 
+from others.prefs import MyPreferences
+
+prefs=MyPreferences() # store and retrieve sticky values
 
 # Class implementing the main window of CartPole GUI
-class MainWindow(QMainWindow):
+class CartPoleMainWindow(QMainWindow):
+
+    # class instance variable to let controllers etc put status into the window
+    CartPoleGuiStatusText:QLabel= None
+
+    # class variable to allow objects to check if simulation is running, stopped, etc
+    CartPoleGuiSimulationState:str='stopped' # 'running', 'paused'
+
+    # signal that emits CartPoleGuiSimulationState when simulation 'running' 'stopped' 'paused' state changes
+    # to use this signal, in class init, connect a method to process the signal, e.g.
+    #         CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal.connect(self.process_signal)
+    # the process_signal(str) method gets passed one of the string signals when the main gui button is pressed.
+    # note that you MUST use the CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal signal that is bound by PyQt inside the *instance* of CartPoleMainWindow;
+    # do not try to bind to the unbound class variable CartPoleMainWindow.CartPoleSimulationStateSignal
+    CartPoleSimulationStateSignal:pyqtSignal=pyqtSignal(str)
+
+    # class variable to access the one main window from any subclass, set in init
+    CartPoleMainWindowInstance:QMainWindow=None
+
+    def set_status_text(text: str) -> None:
+        """ Sets the text at top of GUI window. Any class can use this via the static class method, e.g. like this:
 
-    def __init__(self, *args, **kwargs):
-        super(MainWindow, self).__init__(*args, **kwargs)
+                MainWindow.set_status_text(self.format_step(time))
 
+        (the self.format_step(time) call formats the current dance step text)
+
+        :param str: the string to show
+        """
+        if not CartPoleMainWindow.CartPoleGuiStatusText is None and not text is None:
+            CartPoleMainWindow.CartPoleGuiStatusText.setText(text)
+
+    def __init__(self, parent=None, *args, **kwargs):
+        super(CartPoleMainWindow, self).__init__(parent, *args, **kwargs)
+        self.speedup_measured = None  # filled in by actual measured speedup relative to real wall clock
+        CartPoleMainWindow.CartPoleMainWindowInstance=self
+        # QObject.__init__(self)
         # region Create CartPole instance and load initial settings
 
         # Create CartPole instance
@@ -144,16 +181,19 @@ def __init__(self, *args, **kwargs):
         layout = QVBoxLayout()
         # endregion
 
+
         # region - Change geometry of the main window
-        self.setGeometry(300, 300, 2500, 1000)
+        self.setGeometry(300, 100, 1000, 800) # origin x,y size w,h
         # endregion
 
         # region - Matplotlib figures (CartPole drawing and Slider)
         # Draw Figure
         self.fig = Figure(figsize=(25, 10))  # Regulates the size of Figure in inches, before scaling to window size.
+        from matplotlib import gridspec
+        gs = gridspec.GridSpec(2,1, height_ratios=[4, 1]) # make pole plot 4 times larger than cart target position plot
         self.canvas = FigureCanvas(self.fig)
-        self.fig.AxCart = self.canvas.figure.add_subplot(211)
-        self.fig.AxSlider = self.canvas.figure.add_subplot(212)
+        self.fig.AxCart = self.canvas.figure.add_subplot(gs[0])
+        self.fig.AxSlider = self.canvas.figure.add_subplot(gs[1])
         self.fig.AxSlider.set_ylim(0, 1)
 
         self.CartPoleInstance.draw_constant_elements(self.fig, self.fig.AxCart, self.fig.AxSlider)
@@ -162,6 +202,9 @@ def __init__(self, *args, **kwargs):
         lf = QVBoxLayout()
         lf.addWidget(self.canvas)
 
+        CartPoleMainWindow.CartPoleGuiStatusText = QLabel('(status)')
+        layout.addWidget(CartPoleMainWindow.CartPoleGuiStatusText)
+
         # endregion
 
         # region - Radio buttons selecting current controller
@@ -310,12 +353,12 @@ def __init__(self, *args, **kwargs):
         ip = QHBoxLayout()  # Layout for initial position sliders
         self.initial_position_slider = QSlider(orientation=Qt.Orientation.Horizontal)
         self.initial_position_slider.setRange(-int(float(1000*TrackHalfLength)), int(float(1000*TrackHalfLength)))
-        self.initial_position_slider.setValue(0)
+        self.initial_position_slider.setValue(prefs.get('initial-position',0))
         self.initial_position_slider.setSingleStep(1)
         self.initial_position_slider.valueChanged.connect(self.update_initial_position)
         self.initial_angle_slider = QSlider(orientation=Qt.Orientation.Horizontal)
         self.initial_angle_slider.setRange(-int(float(100*np.pi)), int(float(100*np.pi)))
-        self.initial_angle_slider.setValue(0)
+        self.initial_angle_slider.setValue(prefs.get('initial-angle',0))
         self.initial_angle_slider.setSingleStep(1)
         self.initial_angle_slider.valueChanged.connect(self.update_initial_angle)
         ip.addWidget(QLabel("Initial position:"))
@@ -400,6 +443,7 @@ def __init__(self, *args, **kwargs):
         l_text = QHBoxLayout()
         textbox_title = QLabel('CSV file name:')
         self.textbox = QLineEdit()
+        self.textbox.setText(prefs.get('last-csv-file','cartpole-history.csv'))
         l_text.addWidget(textbox_title)
         l_text.addWidget(self.textbox)
         layout.addLayout(l_text)
@@ -461,8 +505,7 @@ def __init__(self, *args, **kwargs):
 
         # region -- Checkbox: Decide how the cartpole should be displayed
         self.cb_show_hanging_pole = QCheckBox('Show hanging pole', self)
-        if self.CartPoleInstance.show_hanging_pole:
-            self.cb_show_hanging_pole.toggle()
+        self.cb_show_hanging_pole.setChecked(self.CartPoleInstance.show_hanging_pole)
         self.cb_show_hanging_pole.toggled.connect(self.cb_show_hanging_pole_f)
         l_cb.addWidget(self.cb_show_hanging_pole)
 
@@ -563,6 +606,7 @@ def experiment_thread(self):
             pass
 
         self.looper.start_loop()
+
         while not self.terminate_experiment_or_replay_thread:
             if self.pause_experiment_or_replay_thread:
                 time.sleep(0.1)
@@ -591,6 +635,7 @@ def experiment_thread(self):
                                                        decimals=2))
             self.CartPoleInstance.save_history_csv(csv_name=csv_name,
                                                    mode='save offline')
+            prefs.put('last-csv-file',csv_name)
 
         self.experiment_or_replay_thread_terminated = True
 
@@ -747,6 +792,7 @@ def start_stop_button(self):
             else:
                 self.start_thread()
             self.start_or_stop_action = "STOP!"
+            self.set_simulation_state('running')
 
         # If "START! / STOP!" button in "STOP!" mode...
         elif self.start_or_stop_action == 'STOP!':
@@ -761,7 +807,17 @@ def start_stop_button(self):
                 # It is implemented this way, because thread my terminate not only due "STOP!" button
                 # (e.g. replay thread when whole experiment is replayed)
             self.start_or_stop_action = "START!"
-        
+            self.set_simulation_state('stopped')
+
+    def set_simulation_state(self, state):
+        """ Used by CartPoleMainWindow to send signals to listeners for simulation state.
+        A signal is emitted on self.CartPoleSimulationStateSignal
+
+        :param state: the str state
+        """
+        CartPoleMainWindow.CartPoleGuiSimulationState = state
+        self.CartPoleSimulationStateSignal.emit(CartPoleMainWindow.CartPoleGuiSimulationState)
+
     def pause_unpause_button(self):
         if self.simulator_mode == 'Physical CP':
             if self.pause_or_unpause_action == 'PAUSE' and self.start_or_stop_action == 'STOP!':
@@ -778,10 +834,14 @@ def pause_unpause_button(self):
                 self.pause_or_unpause_action = 'UNPAUSE'
                 self.pause_experiment_or_replay_thread = True
                 self.bp.setText("UNPAUSE")
+                self.set_simulation_state('paused')
+
             elif self.pause_or_unpause_action == 'UNPAUSE' and self.start_or_stop_action == 'STOP!':
                 self.pause_or_unpause_action = 'PAUSE'
                 self.pause_experiment_or_replay_thread = False
                 self.bp.setText("PAUSE")
+                self.set_simulation_state('running')
+
 
     # Run thread. works for all simulator modes.
     def start_thread(self):
@@ -804,9 +864,16 @@ def start_thread(self):
 
         # Set user-provided initial values for state (or its part) of the CartPole
         # Search implementation for more detail
-        # The following line is important as it let the user to set with the slider the starting target position
+        # The following line is important as it let the user set with the slider the starting target position
         # After the slider was reset at the end of last experiment
         # With the small sliders he can also adjust starting initial_state
+        self.initial_state = create_cartpole_state()
+        # don't reset the slider on restart experiment
+        # self.initial_position_slider.setValue(0)
+        # self.initial_angle_slider.setValue(0)
+        # reset state to slider values on restart
+        self.initial_state[ANGLE_IDX] = self.initial_angle_slider.value() / 100.
+        self.initial_state[POSITION_IDX] = self.initial_position_slider.value() / 1000.
         self.reset_variables(2, s=np.copy(self.initial_state), target_position=self.CartPoleInstance.target_position)
 
         if self.simulator_mode == 'Random Experiment':
@@ -833,8 +900,10 @@ def start_thread(self):
         elif self.simulator_mode == 'Slider-Controlled Experiment' or self.simulator_mode == 'Random Experiment':
             worker = Worker(self.experiment_thread)
         worker.signals.finished.connect(self.finish_thread)
+
         # Execute
         self.threadpool.start(worker)
+        # worker.my_worker_signal.emit('start')
 
     # finish_threads works for all simulation modes
     # Some lines mya be redundant for replay,
@@ -845,10 +914,15 @@ def finish_thread(self):
 
         self.CartPoleInstance.use_pregenerated_target_position = False
         self.initial_state = create_cartpole_state()
-        self.initial_position_slider.setValue(0)
-        self.initial_angle_slider.setValue(0)
+        # don't reset the slider on restart experiment
+        # self.initial_position_slider.setValue(0)
+        # self.initial_angle_slider.setValue(0)
+        # reset state to slider values on restart
+        self.initial_state[ANGLE_IDX] = self.initial_angle_slider.value() / 100.
+        self.initial_state[POSITION_IDX] = self.initial_position_slider.value() / 1000.
         self.CartPoleInstance.s = self.initial_state
 
+
         # Some controllers may collect they own statistics about their usage and print it after experiment terminated
         if self.simulator_mode != 'Replay':
             try:
@@ -940,9 +1014,10 @@ def set_labels_thread(self):
 
             mean_dt_real = np.mean(self.looper.circ_buffer_dt_real)
             if mean_dt_real > 0:
+                self.speedup_measured=self.CartPoleInstance.dt_simulation / mean_dt_real
                 self.labSpeedUp.setText('Speed-up (measured): x{:.2f}'
-                                        .format(self.CartPoleInstance.dt_simulation / mean_dt_real))
-            sleep(0.1)
+                                        .format(self.speedup_measured))
+            sleep(0.5) # rate of label update is 2Hz which is fast enough
 
     # Function to measure the time of simulation as experienced by user
     # It corresponds to the time of simulation according to equations only if real time mode is on
@@ -1236,9 +1311,11 @@ def open_additional_noise_widget(self):
 
     def update_initial_position(self, value: str):
         self.initial_state[POSITION_IDX] = float(value) / 1000.0
+        prefs.put('initial-position',value)
 
     def update_initial_angle(self, value: str):
         self.initial_state[ANGLE_IDX] = float(value) / 100.0
+        prefs.put('initial-angle',value)
 
     # endregion
 
@@ -1260,6 +1337,7 @@ def kick_pole(self):
         elif self.sender().text() == "Right":
             self.CartPoleInstance.s[ANGLED_IDX] -= .6
 
+
     # endregion
 
 
diff --git a/GUI/_CartPoleGUI_worker_template.py b/GUI/_CartPoleGUI_worker_template.py
index c8cf8637..a284ccb7 100644
--- a/GUI/_CartPoleGUI_worker_template.py
+++ b/GUI/_CartPoleGUI_worker_template.py
@@ -14,6 +14,10 @@ class WorkerSignals(QObject):
 
 class Worker(QRunnable):
 
+    # register to this signal to be updated about simulation start/stop/pause
+    my_worker_signal=pyqtSignal(str)
+
+
     def __init__(self, fn, *args, **kwargs):
         super(Worker, self).__init__()
 
diff --git a/GUI/_ControllerGUI_MPPIOptionsWindow.py b/GUI/_ControllerGUI_MPPIOptionsWindow.py
index 780bd8a2..e8d92c4f 100644
--- a/GUI/_ControllerGUI_MPPIOptionsWindow.py
+++ b/GUI/_ControllerGUI_MPPIOptionsWindow.py
@@ -34,7 +34,7 @@ def __init__(self):
         super(MPPIOptionsWindow, self).__init__()
 
         self.horizon_steps = controller_mppi_cartpole.mpc_horizon
-        self.num_rollouts = controller_mppi_cartpole.num_rollouts
+        self.batch_size = controller_mppi_cartpole.batch_size
         self.dd_weight = controller_mppi_cartpole.dd_weight
         self.ep_weight = controller_mppi_cartpole.ep_weight
         self.ekp_weight = controller_mppi_cartpole.ekp_weight * 1.0e1
@@ -73,13 +73,13 @@ def __init__(self):
 
         slider = QSlider(orientation=Qt.Orientation.Horizontal)
         slider.setRange(10, 3000)
-        slider.setValue(self.num_rollouts)
+        slider.setValue(self.batch_size)
         slider.setTickPosition(QSlider.TickPosition.TicksBelow)
         slider.setTickInterval(10)
         slider.setSingleStep(10)
         rollouts_options_layout.addWidget(slider)
 
-        slider.valueChanged.connect(self.num_rollouts_changed)
+        slider.valueChanged.connect(self.batch_size_changed)
 
         ### Set Cost Weights
         cost_weight_layout = QVBoxLayout()
@@ -261,9 +261,9 @@ def horizon_length_changed(self, val: int):
         controller_mppi_cartpole.mpc_horizon = self.horizon_steps
         self.update_slider_labels()
 
-    def num_rollouts_changed(self, val: int):
-        self.num_rollouts = val
-        controller_mppi_cartpole.num_rollouts = self.num_rollouts
+    def batch_size_changed(self, val: int):
+        self.batch_size = val
+        controller_mppi_cartpole.batch_size = self.batch_size
         self.update_slider_labels()
 
     def dd_weight_changed(self, val: int):
@@ -330,7 +330,7 @@ def update_slider_labels(self):
             f"Horizon: {self.horizon_steps} steps = {round(self.horizon_steps * controller_mppi_cartpole.dt, 2)} s"
         )
         self.rollouts_label.setText(
-            f"Rollouts: {self.num_rollouts}"
+            f"Rollouts: {self.batch_size}"
         )
         self.dd_weight_label.setText(
             f"Distance difference cost weight: {round(self.dd_weight, 2)}"
diff --git a/GUI/__init__.py b/GUI/__init__.py
index 15d475a4..a133189b 100644
--- a/GUI/__init__.py
+++ b/GUI/__init__.py
@@ -18,7 +18,7 @@
     os.chdir("Driver")
 
 # Import custom made elements of GUI
-from GUI._CartPoleGUI_MainWindow import MainWindow
+from GUI._CartPoleGUI_MainWindow import CartPoleMainWindow
 
 # This piece of code gives a custom ID to our application
 # It is essential for packaging
@@ -48,7 +48,7 @@ def run_gui():
     # Set the default icon to use for all the windows of our application
     app.setWindowIcon(QIcon('GUI/gui_icon.ico'))  # GUI Icon, feel free to change
     # Create an instance of the GUI window.
-    window = MainWindow()
+    window = CartPoleMainWindow()
     window.show()
     # Next line hands the control over to Python GUI
     app.exec()
diff --git a/GUI/gui_default_params.py b/GUI/gui_default_params.py
index e5f30f85..1d0f085a 100644
--- a/GUI/gui_default_params.py
+++ b/GUI/gui_default_params.py
@@ -19,9 +19,9 @@
 # controller_init = 'custom-mpc-scipy'  # Defines which controller is loaded at the start of the program
 import os
 if os.getcwd().split(os.sep)[-1] == 'Driver':
-    controller_init = 'mpc'  # Load as default if loaded as part of physical-cartpole
+    controller_init = 'energy'  # TODO shreyan controller CCNW2023 Load as default if loaded as part of physical-cartpole
 else:
-    controller_init = 'mpc'  # Load as default if loaded as cartpole simulator stand alone
+    controller_init = 'energy'  # TODO shreyan controller CCNW2023 # Load as default if loaded as cartpole simulator stand alone
 """
 Possible choices for CartPole controller:
 
@@ -51,7 +51,7 @@
 # Action toggling between showing the ground level and above
 # and showing above and below ground level the length of the pole
 # Second option is good for visualizing swing-up
-show_hanging_pole_init = False
+show_hanging_pole_init = True
 
 # Variables for random trace generation - GUI only
 # Data Generator sets these parameters independently
diff --git a/GUI/loop_timer.py b/GUI/loop_timer.py
index 66be15bc..73e75a4c 100644
--- a/GUI/loop_timer.py
+++ b/GUI/loop_timer.py
@@ -10,10 +10,11 @@
 class loop_timer():
     """ Simple game loop timer that sleeps for leftover time (if any) at end of each iteration"""
     LOG_INTERVAL_SEC = 10
-    NUM_SAMPLES = 1000
+    NUM_SAMPLES = 200 # last few seconds
 
     def __init__(self, rate_hz: float = None, dt_target: float = None, do_diagnostics: bool = False) -> None:
-        """ Make a new loop_timer, specifying the target frame rate in Hz or time interval dt_target in seconds
+        """ Make a new loop_timer, specifying the target frame rate in Hz or time interval dt_target in seconds.
+        The actual average rate can be obtained from np.mean(self.looper.circ_buffer_dt_real).
 
         :param rate_hz: the target loop rate in Hz. The rate can be changed anytime by modifying rate_hz.
         :param dt_target: the time interval dt in seconds. It can be changed anytime by modifying dt.
@@ -40,7 +41,7 @@ def __init__(self, rate_hz: float = None, dt_target: float = None, do_diagnostic
         self.last_log_time = 0
         self.circ_buffer_dt = deque(iterable=np.zeros(self.NUM_SAMPLES), maxlen=self.NUM_SAMPLES)
         self.circ_buffer_leftover = deque(iterable=np.zeros(self.NUM_SAMPLES), maxlen=self.NUM_SAMPLES)
-        self.circ_buffer_dt_real = deque(iterable=np.zeros(50), maxlen=50)
+        self.circ_buffer_dt_real = deque(iterable=np.zeros(self.NUM_SAMPLES), maxlen=self.NUM_SAMPLES)
 
     @property
     def rate_hz(self):
@@ -52,6 +53,9 @@ def dt_target(self):
 
     @rate_hz.setter
     def rate_hz(self, new_rate):
+        """ Sets the target loop rate.
+        :param new_rate: the target rate in Hz
+        """
         if new_rate is None:
             self._rate_hz = None
         elif new_rate > 0.0:
@@ -62,6 +66,9 @@ def rate_hz(self, new_rate):
 
     @dt_target.setter
     def dt_target(self, new_dt):
+        """ Set the target delta time
+        :param new_dt: the new target delta time in seconds for each loop iteration
+        """
         if new_dt is None:
             self._dt_target = None
         elif new_dt > 0.0:
diff --git a/GymlikeCartPole/CartPoleEnv_LTC.py b/GymlikeCartPole/CartPoleEnv_LTC.py
index 08ecf403..ff89cf60 100644
--- a/GymlikeCartPole/CartPoleEnv_LTC.py
+++ b/GymlikeCartPole/CartPoleEnv_LTC.py
@@ -9,10 +9,10 @@
 
 import numpy as np
 
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple
 
-from others.globals_and_utils import load_config, my_logger
-logger = my_logger(__name__)
+from Control_Toolkit.others.get_logger import get_logger
+log = get_logger(__name__)
 
 # FIXME: Set reset properly
 # FIXME: set random position - pregenerate
diff --git a/GymlikeCartPole/play_gym.py b/GymlikeCartPole/play_gym.py
index cba9016a..ce1b926b 100644
--- a/GymlikeCartPole/play_gym.py
+++ b/GymlikeCartPole/play_gym.py
@@ -1,6 +1,6 @@
 from time import sleep
 
-from others.globals_and_utils import load_config, my_logger
+from others.globals_and_utils import load_config
 from Control_Toolkit.others.globals_and_utils import import_controller_by_name
 
 from GymlikeCartPole.CartPoleEnv_LTC import CartPoleEnv_LTC
@@ -11,7 +11,8 @@
 
 config = load_config("config.yml")
 
-logger = my_logger(__name__)
+from Control_Toolkit.others.get_logger import get_logger
+log = get_logger(__name__)
 
 env = CartPoleEnv_LTC()
 controller = Controller(env.CartPoleInstance, **{**config["controller"][controller_name], **{"num_control_inputs": config["cartpole"]["num_control_inputs"]}})
@@ -23,7 +24,7 @@
     action = controller.step(state)
     # action = env.action_space.sample()  # take a random action
     state, reward, done, _ = env.step(action)
-    logger.info(f"Iteration {k}: Reward = {reward}")
+    log.info(f"Iteration {k}: Reward = {reward}")
     if done:
         break
 env.close()
diff --git a/README.md b/README.md
index 822033c1..23ffe56a 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,17 @@ on macOS with `pip list --format=freeze > requirements.txt`.
 It required some minor manual corrections (deleting the package versions) to make the re-installation smooth.
 It should be also possible to create this file with `conda list -e > requirements.txt`.
 
+### Notes
+**ASF** stands for Application Specific Files. There are ASF for controllers [Control_Toolkit_ASF](Control_Toolkit_ASF/) and System Identification [SI_Toolkit](SI_Toolkit/).
+
 ## Basic Operation
 1. **Run GUI:** Run `python run_cartpole_gui.py` from top-level path.
-2. **Run a single experiment:** Open `run_data_generator.py`. In the marked section, you can define your experiment. For a single run, set `number_of_experiments = 1`. Then open `./config.yml` to modify controller-related parameters. For example, you can choose there whether MPPI should run with the true model ('predictor_ODE' == Euler Integrator) or with a neural network ('predictor_autoregressive_neural'). Once set, run `python -m run_data_generator`. It will create a new folder `./Experiment_Recordings/` and store a csv log file in it.
+ If tensoflow JIT compilation fails, turn it off in [SI_Toolkit_ASF/__init__.py](SI_Toolkit_ASF/__init__.py) by setting USE_TENSORFLOW_EAGER_MODE=True until you can sort out tensorflow woes.
+2. **Run a single experiment:** Open `run_data_generator.py`. In the marked section, you can define your experiment. For a single run, set `number_of_experiments = 1`. Then open [config.yml](./config.yml) to modify controller-related parameters. For example, you can choose there whether MPPI should run with the true model ('predictor_ODE' == Euler Integrator) or with a neural network ('predictor_autoregressive_neural'). Once set, run `python -m run_data_generator`. Or you can use the pycharm target `Run Cartpole GUI`. It will create a new folder `./Experiment_Recordings/` and store a csv log file in it.
+
+### Getting Tensorflow Just In Time (JIT) compilation to work
+You might need to install the CUDA toolkit and DNN toolkit if you want to accelerate (a LOT) the arithmetic.
+ See https://www.tensorflow.org/install/pip . Note that this installation can be INSIDE your virtual conda environment, not on the host computer, although you might need to use an NVIDIA graphic driver to give the tools access to the physical GPU.
 
 ## Run a Machine Learning Pipeline
 
@@ -95,9 +103,10 @@ The "CSV file name" text box is used for naming a file to be saved or to be load
 Files regions are folded with #region #endregion syntax
 For Pycharm default, for Atom install
 
-## Parameter exploration with NNI
+## Parameter exploration with Neural Network Intelligence (NNI)
+
 
-For intelligent parameter space exploration with NNI, we have 2 special files : 
+For AutoML intelligent parameter space exploration with [NNI](https://nni.readthedocs.io/en/stable/), we have 2 special files : 
 
 1. modeling/rnn_tf/search_space.json : Search space for parameter search
 2. config.yml : Configuring the NNI experiments. 
diff --git a/SI_Toolkit b/SI_Toolkit
index 8f460926..89f4ead9 160000
--- a/SI_Toolkit
+++ b/SI_Toolkit
@@ -1 +1 @@
-Subproject commit 8f460926721c0ba7ff7acb7b91b6ca861f444015
+Subproject commit 89f4ead943a975fb5ce549b0462a234308febde7
diff --git a/SI_Toolkit_ASF/DataSelector.py b/SI_Toolkit_ASF/DataSelector.py
index f0276116..f57ee4c0 100644
--- a/SI_Toolkit_ASF/DataSelector.py
+++ b/SI_Toolkit_ASF/DataSelector.py
@@ -131,8 +131,8 @@ def return_dataset_for_training(self,
                                     raw=False
                                     ):
 
-        if batch_size is None and self.args.batch_size is not None:
-            batch_size = self.args.batch_size
+        if batch_size is None and self.args.num_rollouts is not None:
+            batch_size = self.args.num_rollouts
 
         if inputs is None and self.args.inputs is not None:
             inputs = self.args.inputs
diff --git a/SI_Toolkit_ASF/__init__.py b/SI_Toolkit_ASF/__init__.py
index 51fa92b6..71b1ca42 100644
--- a/SI_Toolkit_ASF/__init__.py
+++ b/SI_Toolkit_ASF/__init__.py
@@ -1,2 +1,14 @@
-GLOBALLY_DISABLE_COMPILATION = False  # Set to False to use tf.function
-USE_JIT_COMPILATION = True  # XLA ignores random seeds. Set to False for reproducibility
+### Choose whether to run TensorFlow in eager mode (slow, interpreted) or graph mode (fast, compiled)
+# Set `USE_TENSORFLOW_EAGER_MODE=False` to:
+# 1. decorate functions in optimizers and predictors with `@tf.function`.
+# 2. and thereby enable TensorFlow graph mode. This is orders of magnitude faster than the standard eager mode.
+# Use USE_TENSORFLOW_EAGER_MODE=True to enable debugging
+USE_TENSORFLOW_EAGER_MODE = False
+
+
+### Choose whether to use TensorFlow Accelerated Linear Algebra (XLA).
+# XLA uses machine-specific conversions to speed up the compiled TensorFlow graph.
+# Set USE_TENSORFLOW_XLA to True to accelerate the execution (for real-time).
+# If `USE_TENSORFLOW_XLA=True`, this adds `jit_compile=True` to the `tf.function` decorator.
+# However, XLA ignores random seeds. Set to False for guaranteed reproducibility, such as for simulations.
+USE_TENSORFLOW_XLA = True
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/config.yml b/config.yml
index 3ec15478..343de2cc 100644
--- a/config.yml
+++ b/config.yml
@@ -1,3 +1,5 @@
+# the root config that specifies basic physical parameters of the cartpole.
+#  However, for physical-cartpole, we still use the legacy p_globals.py which overrides these parameters.
 cartpole:
   actuator_noise: 0.1  # Noise on top of the calculated control input by +/- this value, we usually set 0.5 to explore various controllers while collecting data for training, 0.1 to test an not-ideal case
   mode: stabilization
@@ -5,7 +7,7 @@ cartpole:
   PATH_TO_EXPERIMENT_RECORDINGS_DEFAULT: './Experiment_Recordings/'   # Where to save experiment recording per default
   m_pole: 0.087  # mass of pole, kg # Checked by Antonio & Tobi
   m_cart: 0.230  # mass of cart, kg # Checked by Antonio
-  L: "0.395/2.0"  # HALF (!!!) length of pend, m # Checked by Antonio & Tobi
+  L: 0.199  # HALF (!!!) length of pole, full length is .398m, m # Checked by Antonio & Tobi
   u_max: 2.62  # max force produced by the motor, N # Checked by Marcin
   M_fric: 4.77  # cart friction on track, N/m/s # Checked by Marcin
   J_fric: 2.5e-4  # friction coefficient on angular velocity in pole joint, Nm/rad/s # Checked by Marcin
@@ -15,7 +17,8 @@ cartpole:
   controlDisturbance: 0.0  # disturbance, as factor of u_max # I used 0.2-0.5 for data collection
   controlBias: 0.0  # bias of control input
   g: 9.81  # absolute value of gravity acceleration, m/s^2
-  k: "1.0/3.0"  # Dimensionless factor of moment of inertia of the pole with length 2L: I: (1/3)*m*(2L)^2 = (4/3)*m*(L)^2
+  k: 0.33333333333  # 1/3: Dimensionless factor of moment of inertia of the pole with length 2L: I: (1/3)*m*(2L)^2 = (4/3)*m*(L)^2
+  cart_bounce_factor: .8 # fraction of cart speed after bounce from end of track, tobi guesstimated, not yet measured
   latency: 0.0 # s
   noise:
     noise_mode: 'OFF'
diff --git a/config_data_gen.yml b/config_data_gen.yml
index 7fda622c..d5327d7f 100644
--- a/config_data_gen.yml
+++ b/config_data_gen.yml
@@ -1,7 +1,7 @@
 controller: mpc
 PATH_TO_EXPERIMENT_RECORDINGS_DEFAULT: './Experiment_Recordings/'   # Where to save experiment recording per default
-seed: 1998   # If not assign random seed based on datetime is used
-length_of_experiment: 3600  # Length of each experiment in s:
+seed: 1998   # If not assigned (i.e. commented out) random seed based on datetime is used
+length_of_experiment: 60  # Length of each experiment in s:
 split: [0.8, 0.1]  # Train/Val/(Test) split - only matters if you run it in ML Pipeline mode
 random_initial_state:
   # Unassigned variables will be randomly initialized (see below)
@@ -33,4 +33,4 @@ save_mode: 'online'  # It was intended to save memory usage, but it doesn't seem
 # Show popup window in the end with summary of experiment?
 show_summary_plots: False
 show_controller_report: False
-number_of_experiments: 10  # How many experiments will be generated
\ No newline at end of file
+number_of_experiments: 1  # How many experiments will be generated
\ No newline at end of file
diff --git a/others/Controllers-out-of-use/controller_mpc_opti.py b/others/Controllers-out-of-use/controller_mpc_opti.py
index a35852e5..bc8942d1 100644
--- a/others/Controllers-out-of-use/controller_mpc_opti.py
+++ b/others/Controllers-out-of-use/controller_mpc_opti.py
@@ -130,9 +130,9 @@ def cost_function(self, Q_hat):
 
         return cost
 
-    def step(self, s, target_position, time=None):
+    def step(self, state, target_position, time=None):
 
-        self.s = s
+        self.s = state
         self.target_position = target_position
 
         opti = casadi.Opti()
diff --git a/others/Controllers-out-of-use/controller_pid.py b/others/Controllers-out-of-use/controller_pid.py
index 9d2d69ec..627842e6 100644
--- a/others/Controllers-out-of-use/controller_pid.py
+++ b/others/Controllers-out-of-use/controller_pid.py
@@ -24,12 +24,12 @@ def __init__(self):
         self.I_position = config["controller"]["pid"]["I_position"]
         self.D_position = config["controller"]["pid"]["D_position"]
 
-    def step(self, s: np.ndarray, target_position: np.ndarray, time=None):
+    def step(self, state: np.ndarray, target_position: np.ndarray, time=None):
         error = -np.array([
-            [s[cartpole_state_varname_to_index('position')] - target_position],
-            [s[cartpole_state_varname_to_index('positionD')]],
-            [s[cartpole_state_varname_to_index('angle')]],
-            [s[cartpole_state_varname_to_index('angleD')]]
+            [state[cartpole_state_varname_to_index('position')] - target_position],
+            [state[cartpole_state_varname_to_index('positionD')]],
+            [state[cartpole_state_varname_to_index('angle')]],
+            [state[cartpole_state_varname_to_index('angleD')]]
         ])
 
         positionCMD = self.P_position * error[0, :] + self.D_position * error[1]
diff --git a/others/Media/01 Sookie_ Sookie.mp3 b/others/Media/01 Sookie_ Sookie.mp3
new file mode 100644
index 00000000..2e8ed82a
Binary files /dev/null and b/others/Media/01 Sookie_ Sookie.mp3 differ
diff --git a/others/Media/Rolling_Stones_Satisfaction.mp3 b/others/Media/Rolling_Stones_Satisfaction.mp3
new file mode 100644
index 00000000..3c624ed5
Binary files /dev/null and b/others/Media/Rolling_Stones_Satisfaction.mp3 differ
diff --git a/others/PycharmRunConfigurations/Generate Data.run.xml b/others/PycharmRunConfigurations/Generate Data.run.xml
index 2f40dabd..18d452d5 100644
--- a/others/PycharmRunConfigurations/Generate Data.run.xml	
+++ b/others/PycharmRunConfigurations/Generate Data.run.xml	
@@ -1,14 +1,15 @@
 <component name="ProjectRunConfigurationManager">
   <configuration default="false" name="Generate Data" type="PythonConfigurationType" factoryName="Python" singleton="false">
-    <module name="CartPoleSimulation" />
+    <module name="physical-cartpole" />
     <option name="INTERPRETER_OPTIONS" value="" />
     <option name="PARENT_ENVS" value="true" />
     <envs>
       <env name="PYTHONUNBUFFERED" value="1" />
     </envs>
-    <option name="SDK_HOME" value="" />
+    <option name="SDK_HOME" value="$USER_HOME$/miniconda3/envs/physical-cartpole/bin/python" />
+    <option name="SDK_NAME" value="Python 3.8 (physical-cartpole)" />
     <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
-    <option name="IS_MODULE_SDK" value="true" />
+    <option name="IS_MODULE_SDK" value="false" />
     <option name="ADD_CONTENT_ROOTS" value="true" />
     <option name="ADD_SOURCE_ROOTS" value="true" />
     <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
diff --git a/others/PycharmRunConfigurations/Run CartPole GUI.run.xml b/others/PycharmRunConfigurations/Run CartPole GUI.run.xml
index 40d7dfcc..c271363f 100644
--- a/others/PycharmRunConfigurations/Run CartPole GUI.run.xml	
+++ b/others/PycharmRunConfigurations/Run CartPole GUI.run.xml	
@@ -1,14 +1,15 @@
 <component name="ProjectRunConfigurationManager">
   <configuration default="false" name="Run CartPole GUI" type="PythonConfigurationType" factoryName="Python">
-    <module name="CartPoleSimulation" />
+    <module name="physical-cartpole" />
     <option name="INTERPRETER_OPTIONS" value="" />
     <option name="PARENT_ENVS" value="true" />
     <envs>
       <env name="PYTHONUNBUFFERED" value="1" />
     </envs>
-    <option name="SDK_HOME" value="" />
-    <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
-    <option name="IS_MODULE_SDK" value="true" />
+    <option name="SDK_HOME" value="$USER_HOME$/anaconda3/envs/CartPoleSimulation/bin/python" />
+    <option name="SDK_NAME" value="CartPoleSimulation" />
+    <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/Driver/CartPoleSimulation" />
+    <option name="IS_MODULE_SDK" value="false" />
     <option name="ADD_CONTENT_ROOTS" value="true" />
     <option name="ADD_SOURCE_ROOTS" value="true" />
     <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
diff --git a/others/globals_and_utils.py b/others/globals_and_utils.py
index 5a2f1a15..cb81edec 100644
--- a/others/globals_and_utils.py
+++ b/others/globals_and_utils.py
@@ -7,8 +7,22 @@
 import os
 import time
 from datetime import datetime
-from typing import Tuple
-import yaml
+from pathlib import Path
+from typing import Tuple, Optional
+
+import dictdiffer as dictdiffer
+import ruamel.yaml as yaml # correctly supports scientific notation for numbers, see https://stackoverflow.com/questions/30458977/yaml-loads-5e-6-as-string-and-not-a-number
+# import yaml
+import warnings
+warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning)
+# from generallibrary import print_link, print_link_to_obj # for logging links to source code in logging output for pycharm clicking, see https://stackoverflow.com/questions/26300594/print-code-link-into-pycharms-console
+from munch import Munch, DefaultMunch
+
+from SI_Toolkit.computation_library import TensorType
+
+from Control_Toolkit.others.get_logger import get_logger
+log = get_logger(__name__)
+
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' # all TF messages
 
@@ -126,10 +140,12 @@ def input_with_timeout(prompt, timeout=30):
             signal.alarm(0) # cancel alarm
 
 def yes_or_no(question, default='y', timeout=None):
-    """ Get y/n answer with default choice and optional timeout
+    """ Get y/n answer with default choice and optional timeout from terminal input.
+
     :param question: prompt
     :param default: the default choice, i.e. 'y' or 'n'
     :param timeout: the timeout in seconds, default is None
+
     :returns: True or False
     """
     if default is not None and (default!='y' and default!='n'):
@@ -156,43 +172,8 @@ def yes_or_no(question, default='y', timeout=None):
         if reply[0].lower() == 'n':
             return False
 
-class CustomFormatter(logging.Formatter):
-    """Logging Formatter to add colors and count warning / errors"""
-
-    grey = "\x1b[38;21m"
-    yellow = "\x1b[33;21m"
-    red = "\x1b[31;21m"
-    bold_red = "\x1b[31;1m"
-    reset = "\x1b[0m"
-    format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
-
-    FORMATS = {
-        logging.DEBUG: grey + format + reset,
-        logging.INFO: grey + format + reset,
-        logging.WARNING: yellow + format + reset,
-        logging.ERROR: red + format + reset,
-        logging.CRITICAL: bold_red + format + reset
-    }
-
-    def format(self, record):
-        log_fmt = self.FORMATS.get(record.levelno)
-        formatter = logging.Formatter(log_fmt)
-        return formatter.format(record)
 
 
-def my_logger(name):
-    # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-    logger = logging.getLogger(name)
-    logger.setLevel(LOGGING_LEVEL)
-    # create console handler
-    ch = logging.StreamHandler()
-    ch.setFormatter(CustomFormatter())
-    logger.addHandler(ch)
-    return logger
-
-
-log=my_logger(__name__)
-
 
 def create_rng(id: str, seed: str, use_tf: bool=False):
     if seed == None:
@@ -212,12 +193,220 @@ def load_config(filename: str) -> dict:
     :type filename: str
     """
     try:
-        config = yaml.load(open(os.path.join("CartPoleSimulation", filename), "r"), Loader=yaml.FullLoader)
+        config = yaml.load(open(os.path.join("CartPoleSimulation", filename), "r"))
     except FileNotFoundError:
-        config = yaml.load(open(filename), Loader=yaml.FullLoader)
+        config = yaml.load(open(filename))
     return config
 
 
+def load_or_reload_config_if_modified(filepath:str, every:int=5, target_obj=None, search_path:Optional[list]=None)->Tuple[Munch,Optional[dict]]:
+    """
+    Reloads a YAML config if the yaml file was modified since runtime started or since last reloaded.
+    The initial call will store the config in a private dict to return on subsequent calls.
+
+    Only modified entries are returned in changes. Added or deleted items are not returned.
+
+    :param filepath the relative path to file. Generate for call with e.g. os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml")
+    :param target_obj: an optional object into which the config is assigned for (possibly) use in tensorflow
+    :param every: only check every this many times we are invoked
+    :param search_path: list of additional folders to search for the filepath, e.g. ['CartPoleSimulation'] (to handle invovation from another project). The relative path from cwd is always checked first.
+
+    :returns: (config,changes)
+        config: nested Munch of the original config if file has not been modified
+            since startup or last reload time,
+            otherwise the reloaded config Munch
+            The entries are accessed by name, e.g. "CartPole.default.ddweight".
+            See https://github.com/Infinidat/munch.
+        changes: a dict of key,value of modified config entries in format dict([key,new_value], ...),
+            where key is the final element of path a.b..c.key that is in the nested yaml config file
+    """
+    counter=0
+    if filepath in load_or_reload_config_if_modified.counter:
+        load_or_reload_config_if_modified.counter[filepath]+=1
+        counter=load_or_reload_config_if_modified.counter[filepath]
+    else:
+        load_or_reload_config_if_modified.counter[filepath] =1
+    if filepath in load_or_reload_config_if_modified.cached_configs and counter>0 and counter%every!=0:
+        return (load_or_reload_config_if_modified.cached_configs[filepath],None) # if not checking this time, return cached config
+    try:
+        fp=None
+        mtime=None
+        if filepath in load_or_reload_config_if_modified.actual_path:
+            fp=Path(filepath)
+            mtime=fp.stat().st_mtime # get mod time
+        else:
+            fp=Path(filepath)
+            if not fp.exists() and search_path:
+                for s in search_path:
+                    fp=Path(os.path.join(s,filepath))
+                    if fp.exists():
+                        filepath=fp.absolute()
+                        load_or_reload_config_if_modified.actual_path[filepath]=filepath
+                        break
+                if not fp.exists():
+                    raise FileNotFoundError(f'"{filepath}" does not exist on search path ".;{search_path}"; maybe path incorrect? os.getcwd()="{os.getcwd()}"')
+        fp=None
+        mtime=None
+        if filepath in load_or_reload_config_if_modified.actual_path:
+            fp=Path(filepath)
+        else:
+            fp=Path(filepath)
+            if not fp.exists() and search_path:
+                for s in search_path:
+                    fp=Path(os.path.join(s,filepath))
+                    if fp.exists():
+                        filepath=fp.absolute()
+                        load_or_reload_config_if_modified.actual_path[filepath]=filepath
+                        break
+                if not fp.exists():
+                    raise FileNotFoundError(f'"{filepath}" does not exist on search path ".;{search_path}"; maybe path incorrect? os.getcwd()="{os.getcwd()}"')
+
+        mtime = fp.stat().st_mtime  # get modification time of config
+
+        if ((not (filepath in load_or_reload_config_if_modified.cached_configs))) \
+                or ((filepath in load_or_reload_config_if_modified.cached_configs) and mtime > load_or_reload_config_if_modified.mtimes[filepath]):
+            # if loading first time, or we have loaded and the file has been modified since we loaded it, then reload it and flag that it was modified (globally)
+            changes = None
+            new_config = yaml.load(open(filepath),Loader=yaml.Loader) # loads a nested dict of config file, set loader explicitly to suppress warning about unsafe loader
+            new_config_obj=DefaultMunch.fromDict(new_config)
+            # now check if there are any changes compared with cached config
+            if filepath in load_or_reload_config_if_modified.cached_configs:
+                old_config=load_or_reload_config_if_modified.cached_configs[filepath]
+                diff=dictdiffer.diff(new_config,old_config) # https://github.com/inveniosoftware/dictdiffer https://stackoverflow.com/questions/32815640/how-to-get-the-difference-between-two-dictionaries-in-python
+                for (type,path,change) in diff:
+                    if type=='change':
+                        (new,old)=change
+                        log.info(f'{path} changed: old/new {old}/{new}')
+                        if changes is None:
+                            changes=dict()
+                        key=path.split('.')[-1]
+                        changes[key]=new
+
+            load_or_reload_config_if_modified.mtimes[filepath]=mtime
+            load_or_reload_config_if_modified.cached_configs[filepath]=new_config_obj
+            log.debug(f"(re)loaded modified config (File '{filepath}')") # format (File "XXX") generates pycharm link to file in console output
+            if not changes is None and not target_obj is None:
+                update_attributes(changes, target_obj)
+            return (new_config_obj,changes) # it was modified, so return changes dict
+        else:
+            return (load_or_reload_config_if_modified.cached_configs[filepath], None) # return previous config and None for changes
+    except Exception as e:
+        logging.error(f'could not reload {filepath}: got exception {e}')
+        raise e
+
+load_or_reload_config_if_modified.cached_configs=dict()
+load_or_reload_config_if_modified.mtimes=dict()
+load_or_reload_config_if_modified.start_time=time.time()
+load_or_reload_config_if_modified.counter=dict()
+load_or_reload_config_if_modified.actual_path=dict()
+
+import numbers
+import re
+
+def extract_int(new_value)-> (str,int):
+    """  Extracts (possibly signed) int from trailing part of string, e.g. 'ccw-1' -> -1
+
+    :param new_value: str input
+    :returns: s,n: leading string and trailing int value or None. If new_value contains /, it is assumed a path and is returned as new_value,None
+    """
+    str_pat='[a-zA-Z]+'
+    int_pat='[-+]?[0-9]+$'
+    sp_pat='\/' # file path string
+    if re.search(sp_pat,new_value):
+        log.debug(f'string {new_value} is a path, will not extract int from it')
+        return new_value,None
+    string_name=re.search(str_pat,new_value).group(0)
+    int_match=re.search(int_pat, new_value)
+    int_val=int(int_match.group(0)) if int_match else None
+    return string_name,int_val
+
+
+def update_attributes(updated_attributes: "dict[str, TensorType]", target_obj):
+    """ Update attributes in compiled code (tensorflow JIT) that have changed, i.e. copy them to the compiled/GPU instance.
+
+    After this call, such attribute values are available to the TF function as self.key, where key is the key used in dict.
+
+    Note that this attribute will NOT be accessible the compiled TF code if it has been declared ahead of time. I.e., do not define e.g. self.a=something and then
+    expect config a: to be picked up by the compiled code. It must be set for the first time by update_attributes!
+
+    Used in various controllers in Control_Toolkit/Control_Toolkit_ASF_Template/Controllers.
+
+    :param updated_attributes: a dict [key,value] with string key 'attribute' (aka property) and scalar numeric value or string value to set.
+        If the value is a string, and it ends with an intteger, e.g. 'spin0', then the
+        attribute is set to 'spin' and attribute_number is set to the int value of the number.
+    :param target_obj: the object into which we set the attribute.
+
+    """
+    for property, new_value in updated_attributes.items():
+        if hasattr(target_obj, property) : # target already has this attribute  # make sure it is mutable if we want to set it
+            objtype=None
+            if isinstance(new_value,numbers.Integral):
+                objtype=target_obj.lib.int32
+            elif isinstance(new_value,numbers.Real):
+                objtype=target_obj.lib.float32
+            elif isinstance(new_value,str):
+                objtype=target_obj.lib.string
+            elif isinstance(new_value,np.ndarray):
+                objtype=target_obj.lib.float32  # todo assuming all np arrays should go to float 32 here
+            elif isinstance(new_value,tf.Tensor):
+                objtype=tf.Tensor
+            elif isinstance(new_value,tf.Variable):
+                objtype=tf.Variable
+            else:
+                raise AttributeError(f'attribute \'{property}\' has unknown object type {type(new_value)}; cannot assign it')
+            if objtype==tf.Tensor:
+                # we cannot assign a value to an existing Tensor; they are immutable.
+                # setattr(target_obj, property, new_value)
+                raise AttributeError(f"target '{objtype}  for property '{property}'  is an immutable tf.Tensor. The compiled target_obj will not see this new value. Instead, create a tf.Variable that can be reassigned in compiled Tensorflow code..")
+            elif objtype==tf.Variable:
+                target_obj.lib.assign(getattr(target_obj, property), new_value)
+            elif not objtype is None and objtype!=target_obj.lib.string:
+                try:
+                    target_obj.lib.assign(getattr(target_obj, property), target_obj.lib.to_variable(new_value,objtype))
+                except ValueError:
+                    log.error(f'target attribute \'{property}\' is probably float type but in config file it is int. Add a trailing "." to the number \'{new_value}\'')
+                except AttributeError as e:
+                    log.error(f'target attribute \'{property}\' on target_obj \'{target_obj}\' could not be set to \'{new_value}\': caught {e}')
+
+                    # target_obj.lib.assign(getattr(target_obj, property), target_obj.lib.to_variable(new_value, target_obj.lib.to_variable(float(new_value), target_obj.lib.float32)))
+            else: # string type; if it ends with int, then also assign a name_number variable
+                val_str,val_int = extract_int(new_value)
+                if isinstance(val_int,int):
+                    property_number_name = property + '_number'
+                    target_obj.lib.assign(getattr(target_obj, property), target_obj.lib.to_variable(val_str, objtype))
+                    target_obj.lib.assign(getattr(target_obj, property_number_name),
+                                          target_obj.lib.to_variable(val_int, target_obj.lib.int32))
+                else: # just assign it if it does not end with digit
+                    target_obj.lib.assign(getattr(target_obj, property),
+                                          target_obj.lib.to_variable(new_value, objtype))
+
+        else: # target_object doesn't have this attribute (field) yet
+            # log.debug(
+            #     f"tensorflow attribute '{property}' does not exist in {target_obj.__class__.__name__}, setting it for first time")
+            if target_obj.lib is None:
+                setattr(target_obj, property, new_value)
+            else:
+                # just set the attribute, don't assign in (like =) since some immutable objects cannot be assigned
+                if isinstance(new_value,numbers.Integral):
+                    setattr(target_obj, property, target_obj.lib.to_variable(new_value, target_obj.lib.int32))
+                elif isinstance(new_value, str):
+                    # create a string tensor with base name, and another tf.variable named name_number, e.g. policy='dance' and policy_number=0
+                    val_str,val_int = extract_int(new_value)
+                    if isinstance(val_int,int):
+                        property_number_name = property + '_number'
+                        setattr(target_obj, property,target_obj.lib.to_variable(val_str, target_obj.lib.string)) # set string var
+                        setattr(target_obj, property_number_name,target_obj.lib.to_variable(val_int, target_obj.lib.int32)) # set int code var
+                        log.debug(f"setting tensorflow attribute '{property}={val_str}' and {property_number_name}={int(val_int)}")
+                    else:  # just assign it if it does not end with digit
+                        setattr(target_obj, property, target_obj.lib.to_variable(new_value, target_obj.lib.string))
+                elif isinstance(new_value,numbers.Real) or isinstance(new_value,np.ndarray):
+                    setattr(target_obj, property, target_obj.lib.to_variable(new_value, target_obj.lib.float32))
+                elif isinstance(new_value,(tf.Tensor,tf.Variable)):
+                    setattr(target_obj, property, new_value)
+                else:
+                    raise AttributeError(f'type \'{type(new_value)}\' of attribute \'{property}\' is not settable type, must be int, float (or np.ndarray), or string')
+
+
 class MockSpace:
     def __init__(self, low, high, shape: Tuple, dtype=np.float32) -> None:
         self.low, self.high = np.atleast_1d(low).astype(dtype), np.atleast_1d(high).astype(dtype)
diff --git a/others/p_globals.py b/others/p_globals.py
index a445a2d9..b3e0f650 100644
--- a/others/p_globals.py
+++ b/others/p_globals.py
@@ -1,46 +1,54 @@
 from types import SimpleNamespace
 import numpy as np
 
-from others.globals_and_utils import load_config
-
-config = load_config("config.yml")
-
-# Parameters of the CartPole
-P_GLOBALS = SimpleNamespace()  # "p" like parameters
-P_GLOBALS.m_pole = config["cartpole"]["m_pole"]
-P_GLOBALS.m_cart = config["cartpole"]["m_cart"]
-P_GLOBALS.L = float(config["cartpole"]["L"].split("/")[0])/float(config["cartpole"]["L"].split("/")[1])
-P_GLOBALS.u_max = config["cartpole"]["u_max"]
-P_GLOBALS.M_fric = config["cartpole"]["M_fric"]
-P_GLOBALS.J_fric = config["cartpole"]["J_fric"]
-P_GLOBALS.v_max = config["cartpole"]["v_max"]
-
-cart_length = config["cartpole"]["cart_length"]
-usable_track_length = config["cartpole"]["usable_track_length"]
-P_GLOBALS.TrackHalfLength = (usable_track_length-cart_length)/2.0  # m, effective length, by which cart center can move
-
-P_GLOBALS.controlDisturbance = config["cartpole"]["controlDisturbance"]
-P_GLOBALS.controlBias = config["cartpole"]["controlBias"]
-
-P_GLOBALS.g = config["cartpole"]["g"]
-P_GLOBALS.k = float(config["cartpole"]["k"].split("/")[0])/float(config["cartpole"]["k"].split("/")[1])
-
+from others.globals_and_utils import load_or_reload_config_if_modified, get_logger
+log=get_logger(__name__)
+
+(config, _) = load_or_reload_config_if_modified("config.yml",search_path=['CartPoleSimulation']) # add search path in case run from physical-cartpole
+CARTPOLE_PHYSICAL_CONSTANTS=config.cartpole # take this branch as the constants of the robot
+
+CARTPOLE_PHYSICAL_CONSTANTS.TrackHalfLength = (CARTPOLE_PHYSICAL_CONSTANTS.usable_track_length - CARTPOLE_PHYSICAL_CONSTANTS.cart_length) / 2.0  # m, effective length, by which cart center can move
+
+# https://en.wikipedia.org/wiki/Pendulum_(mechanics), https://en.wikipedia.org/wiki/List_of_moments_of_inertia
+# T=2*pi*sqrt(I/(mg(L/2)))  where
+# T is period,
+# L is length of rod,
+# I=1/3mL^2 is moment of inertia at end of rod
+# L/2 is distance from pivot axle to center of mass of rod (i.e. half of rod length),
+# m is rod mass
+# g is gravitational acceleration
+pole_length=CARTPOLE_PHYSICAL_CONSTANTS.L*2 # note factor of 2 to get complete pole length
+pole_mass=CARTPOLE_PHYSICAL_CONSTANTS.m_pole
+Iend=(1./3.)*pole_mass*pole_length**2 # the moment of inertia of pole from end of pole
+g=9.8 # gravity
+natural_period=2*np.pi*np.sqrt(Iend/(pole_mass*g*(pole_length/2))) # note the pole_length/2, it is the distance to COM of pole from pivot
+CARTPOLE_PHYSICAL_CONSTANTS.NaturalPeriod=natural_period
+log.info(f'computed natural period of cartpole T={CARTPOLE_PHYSICAL_CONSTANTS.NaturalPeriod:.3f}s, natural frequency={1/CARTPOLE_PHYSICAL_CONSTANTS.NaturalPeriod:.3f}Hz')
+# result is  T=1.034s, natural frequency=0.967Hz
+# tobi measured natural frequency of 1.02Hz +-.02
+# discrepancy might be explained by mass of mounting hardware near pivot point
 
 # Export variables as global
 def export_globals():
+    """ Returns a tuple of cartpole physical constants where each element of tuple is a numpy scalar single element array.
+    We use it to accelerate numpy operations with these constants.
+    """
     return (
-    np.array(P_GLOBALS.k, dtype=np.float32),
-    np.array(P_GLOBALS.m_cart, dtype=np.float32),
-    np.array(P_GLOBALS.m_pole, dtype=np.float32),
-    np.array(P_GLOBALS.g, dtype=np.float32),
-    np.array(P_GLOBALS.J_fric, dtype=np.float32),
-    np.array(P_GLOBALS.M_fric, dtype=np.float32),
-    np.array(P_GLOBALS.L, dtype=np.float32),
-    np.array(P_GLOBALS.v_max, dtype=np.float32),
-    np.array(P_GLOBALS.u_max, dtype=np.float32),
-    np.array(P_GLOBALS.controlDisturbance, dtype=np.float32),
-    np.array(P_GLOBALS.controlBias, dtype=np.float32),
-    np.array(P_GLOBALS.TrackHalfLength, dtype=np.float32)
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.k, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.m_cart, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.m_pole, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.g, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.J_fric, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.M_fric, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.L, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.v_max, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.u_max, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.controlDisturbance, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.controlBias, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.TrackHalfLength, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.cart_bounce_factor, dtype=np.float32),
+    np.array(CARTPOLE_PHYSICAL_CONSTANTS.NaturalPeriod, dtype=np.float32)
 )
 
-k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength = export_globals()
+# TODO why are these constants set here? They lose scope as soon as we leave this module
+k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength, cart_bounce_factor, NaturalPeriod = export_globals()
diff --git a/others/predictor_autoregressive_tf_Jerome.py b/others/predictor_autoregressive_tf_Jerome.py
index a36a67dc..6048c1e5 100644
--- a/others/predictor_autoregressive_tf_Jerome.py
+++ b/others/predictor_autoregressive_tf_Jerome.py
@@ -85,7 +85,7 @@ def predict(self, initial_state, Q):
 
     # Predict (Euler: 6.8ms, RNN:10.5ms)
     @CompileTF
-    def predict_tf(self, initial_state, Q):
+    def predict_tf(self, initial_state, Q, time:float=None):
         # assert tf.rank(Q) == 3
         # Select States
 
diff --git a/others/prefs.py b/others/prefs.py
new file mode 100644
index 00000000..16e08a69
--- /dev/null
+++ b/others/prefs.py
@@ -0,0 +1,13 @@
+# from __future__ import print_function
+from typing import Any
+
+from pypref import SinglePreferences as PREF
+class MyPreferences(PREF):
+    """ Class to hold user preference values, adds put() method to pypref SinglePreferences"""
+    # *args and **kwargs can be replaced by fixed arguments
+    def put(self,key:str, value:Any):
+        """ Put a value to preferences using key
+        :param key: a string key
+        :param value: the value to put
+        """
+        self.update_preferences({key:value})
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 0186e25c..4ed503d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,12 +23,18 @@ typing-extensions
 PyQt6
 tensorflow-probability
 Pillow
+# generallibrary # print links in logging output https://stackoverflow.com/questions/26300594/print-code-link-into-pycharms-console
+dictdiffer
 
 torch
-tensorflow
+tensorflow==2.10 # need 2.10 since 2.11 changes some things like Adam optimzer
 
 do-mpc==4.1.1
 casadi
 nni
+munch # https://github.com/Infinidat/munch
+pypref # https://bachiraoun.github.io/pypref/
+ruamel.yaml # to load yml files with numbers in scientific notation
+python-vlc # to play music for cartpole-dancer, needs vlc installed to system also; make sure it is 64-bit version of VLC on windows!
 
 -e ./SI_Toolkit
diff --git a/run_data_generator.py b/run_data_generator.py
index b61caecd..1469be7d 100644
--- a/run_data_generator.py
+++ b/run_data_generator.py
@@ -1,3 +1,6 @@
+# runs experiments without interative (and slow) GUI showing cartpole state
+# see config_data_gen.yml
+
 import os
 import timeit
 from time import sleep
@@ -12,7 +15,7 @@
 from others.p_globals import TrackHalfLength
 
 # Uncomment if you want to get interactive plots for MPPI in Pycharm on MacOS
-# On other OS you have to chose a different interactive backend.
+# On other OS you have to choose a different interactive backend.
 # from matplotlib import use
 # # use('TkAgg')
 # use('macOSX')