diff --git a/CartPole/__init__.py b/CartPole/__init__.py index 302ffdf5..071a36b8 100644 --- a/CartPole/__init__.py +++ b/CartPole/__init__.py @@ -10,9 +10,12 @@ import csv # Import module to interact with OS import os +import sys import traceback # Import module to get a current time and date used to name the files containing the history of simulations from datetime import datetime +from typing import Optional + # To detect the latest csv file import numpy as np @@ -22,9 +25,9 @@ from Control_Toolkit.others.globals_and_utils import ( get_available_controller_names, get_available_optimizer_names, get_controller_name, get_optimizer_name, import_controller_by_name) from others.globals_and_utils import MockSpace, create_rng, load_config -from others.p_globals import (P_GLOBALS, J_fric, L, m_cart, M_fric, TrackHalfLength, - controlBias, controlDisturbance, export_globals, - g, k, m_pole, u_max, v_max) +from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength, + controlBias, controlDisturbance, g, k, m_pole, u_max, v_max, cart_bounce_factor, NaturalPeriod, + export_globals) # Interpolate function to create smooth random track from scipy.interpolate import BPoly, interp1d # Run range() automatically adding progress bar in terminal @@ -40,6 +43,9 @@ from CartPole.state_utilities import (ANGLE_COS_IDX, ANGLE_IDX, ANGLE_SIN_IDX, ANGLED_IDX, POSITION_IDX, POSITIOND_IDX) +from Control_Toolkit.others.get_logger import get_logger +log = get_logger(__name__) + # region Imported modules try: @@ -143,7 +149,7 @@ def __init__(self, initial_state=s0, path_to_experiment_recordings=None): # region Variables controlling operation of the program - should not be modified directly self.save_flag = False # Signalizes that the current time step should be saved self.csv_filepath = None # Where to save the experiment history. - self.controller = None # Placeholder for the currently used controller function + self.controller:template_controller = Optional[None] # Placeholder for the currently used controller function self.controller_name = '' # Placeholder for the currently used controller name self.optimizer_name = '' # Placeholder for the currently used optimizer name self.controller_idx = None # Placeholder for the currently used controller index @@ -199,7 +205,7 @@ def __init__(self, initial_state=s0, path_to_experiment_recordings=None): self.slider_max = 1.0 self.slider_value = 0.0 - self.show_hanging_pole = False + self.show_hanging_pole = True self.physical_to_graphics = None self.graphics_to_physical = None @@ -446,6 +452,7 @@ def cartpole_integration(self): def edge_bounce(self): # Elastic collision at edges + # TODO should be semielastic self.s[ANGLE_IDX], self.s[ANGLED_IDX], self.s[POSITION_IDX], self.s[POSITIOND_IDX] = edge_bounce_numba( self.s[ANGLE_IDX], np.cos(self.s[ANGLE_IDX]), @@ -460,6 +467,10 @@ def edge_bounce(self): # This function should be called for the first time to calculate 0th time step # Otherwise it goes out of sync with saving def Update_Q(self): + """ Determine the dimensionless [-1,1] value of the motor power Q + This function should be called for the first time to calculate 0th time step + Otherwise it goes out of sync with saving, + """ # Calculate time steps from last update # The counter should be initialized at max-1 to start with a control input update self.dt_controller_steps_counter += 1 @@ -471,7 +482,7 @@ def Update_Q(self): # in this case slider corresponds already to the power of the motor self.Q = self.slider_value else: # in this case slider gives a target position, lqr regulator - self.Q = self.controller.step(self.s_with_noise_and_latency, self.time, {"target_position": self.target_position, "target_equilibrium": self.target_equilibrium}) + self.Q = self.controller.step(self.s_with_noise_and_latency, self.time, updated_attributes= {"target_position": self.target_position, "target_equilibrium": self.target_equilibrium}) self.dt_controller_steps_counter = 0 @@ -488,6 +499,11 @@ def update_parameters(self): # This method saves the dictionary keeping the history of simulation to a .csv file def save_history_csv(self, csv_name=None, mode='init', length_of_experiment='unknown'): + """ Saves history of cartpole state and control + :param csv_name: the filename base, .csv is appended if it is not there. self.path_to_experiment_recordings is prepended for path + :param mode: the mode for saving, default is 'init' which makes the timestamped folder etc TODO what are these modes??????? + :param length_of_experiment: the duration of this experiment in seconds, written to header of CSV + """ if mode == 'init': @@ -550,8 +566,9 @@ def save_history_csv(self, csv_name=None, mode='init', length_of_experiment='unk writer.writerow(['#']) writer.writerow(['# Parameters:']) - for k in P_GLOBALS.__dict__: - writer.writerow(['# ' + k + ': ' + str(getattr(P_GLOBALS, k))]) + c = load_config("config.yml") + for k,v in c.items(): + writer.writerow(['# ' + k + ': ' + str(v)]) writer.writerow(['#']) writer.writerow(['# Data:']) @@ -900,8 +917,9 @@ def set_controller(self, controller_name=None, controller_idx=None): ) else: + log.debug(f'configuring controller "{self.controller}"') self.controller.configure() - + # Set the maximal allowed value of the slider - relevant only for GUI if self.controller_name == 'manual-stabilization': @@ -932,8 +950,10 @@ def set_cartpole_state_at_t0(self, reset_mode=1, s=None, target_position=None, r pass # reset global variables - global k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength - k[...], m_cart[...], m_pole[...], g[...], J_fric[...], M_fric[...], L[...], v_max[...], u_max[...], controlDisturbance[...], controlBias[...], TrackHalfLength[...] = export_globals() + global k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength, cart_bounce_factor, NaturalPeriod + # TODO why is ellipis object used here? https://stackoverflow.com/questions/772124/what-does-the-ellipsis-object-do + # these outputs of export_globals are numpy scalar arrays, i.e. each constant is a np.array with a single element + k[...], m_cart[...], m_pole[...], g[...], J_fric[...], M_fric[...], L[...], v_max[...], u_max[...], controlDisturbance[...], controlBias[...], TrackHalfLength[...], cart_bounce_factor[...], NaturalPeriod[...] = export_globals() self.time = 0.0 if reset_mode == 0: # Don't change it @@ -1297,3 +1317,17 @@ def animationManage(i): return anim # endregion + + +def is_physical_cartpole_running_and_control_enabled(): + """ super hack to determine if we are running physical cartpole and control is turned on""" + if 'DriverFunctions' in sys.modules: # if this module exists in sys.modules, we can deduce that physical-cartpole is running + try: + physical_cartpole_instance = sys.modules[ + 'DriverFunctions'].PhysicalCartPoleDriver.PhysicalCartPoleDriver.PhysicalCartPoleDriverInstance + if getattr(physical_cartpole_instance, 'controlEnabled') == True: + log.debug(f'physical cartpole present and control enabled') + return True + except Exception as e: + log.warning(f'Could not determine if control is enabled: {e}') + return False diff --git a/CartPole/cartpole_model.py b/CartPole/cartpole_model.py index f35b8358..505b2151 100644 --- a/CartPole/cartpole_model.py +++ b/CartPole/cartpole_model.py @@ -3,7 +3,7 @@ import numpy as np from others.globals_and_utils import create_rng, load_config from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength, - controlBias, controlDisturbance, g, k, m_pole, u_max, v_max) + controlBias, controlDisturbance, g, k, m_pole, u_max, v_max,cart_bounce_factor) from CartPole.state_utilities import (ANGLE_COS_IDX, ANGLE_IDX, ANGLE_SIN_IDX, ANGLED_IDX, POSITION_IDX, POSITIOND_IDX, @@ -30,8 +30,8 @@ Should be the same up to the angle-direction-convention and notation changes. The convention: -Pole upright position defines 0 angle -Cart movement to the right is positive +Pole upright position defines 0 angle, units of angle is in radians +Cart movement to the right is positive, units are meters Clockwise angle rotation is defined as negative Required angle convention for CartPole GUI: CLOCK-NEG @@ -146,12 +146,24 @@ def cartpole_ode(s: np.ndarray, u: float, return angleDD, positionDD -def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L): +def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L, cart_bounce_factor=cart_bounce_factor): + """ Models bounce at edge of cart track. Very simple complete elastic bounce currently. + + :param angle: + :param angle_cos: + :param angleD: + :param position: + :param positionD: + :param t_step: the timestep in seconds + :param L: the pole length + + :returns: angle, angleD, position, positionD + """ if position >= TrackHalfLength or -position >= TrackHalfLength: # Without abs to compile with tensorflow - angleD -= 2 * (positionD * angle_cos) / L - angle += angleD * t_step - positionD = -positionD - position += positionD * t_step + angleD -= 2 * (positionD * angle_cos) / L # TODO why this formula??? + # angle += angleD * t_step # update angle according to new derivative of angle + positionD = -cart_bounce_factor*positionD # perfect bounce + # position += positionD * t_step # step back the amount of bounce return angle, angleD, position, positionD diff --git a/CartPole/cartpole_model_tf.py b/CartPole/cartpole_model_tf.py index 4a416d03..58ba6161 100644 --- a/CartPole/cartpole_model_tf.py +++ b/CartPole/cartpole_model_tf.py @@ -3,7 +3,7 @@ import numpy as np import tensorflow as tf from others.globals_and_utils import create_rng, load_config -from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength, +from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength, cart_bounce_factor, controlBias, controlDisturbance, g, k, m_pole, u_max, v_max) from SI_Toolkit.Functions.TF.Compile import CompileTF @@ -26,6 +26,7 @@ controlDisturbance = tf.convert_to_tensor(controlDisturbance) controlBias = tf.convert_to_tensor(controlBias) TrackHalfLength = tf.convert_to_tensor(TrackHalfLength) +cart_bounce_factor = tf.convert_to_tensor(cart_bounce_factor) rng = create_rng(__name__, config["cartpole"]["seed"]) @@ -71,7 +72,10 @@ def _cartpole_ode(ca, sa, angleD, positionD, u, Calculates current values of second derivative of angle and position from current value of angle and position, and their first derivatives - :param angle, angleD, position, positionD: Essential state information of cart + :param angle, angleD, position, positionD: + Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative. + Cart position is in meters, 0 at middle of track, positive to rightwards. + Essential state information of cart :param u: Force applied on cart in unnormalized range :returns: angular acceleration, horizontal acceleration @@ -135,19 +139,46 @@ def cartpole_ode(s: np.ndarray, u: float, ) return angleDD, positionDD -def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L): - if position >= TrackHalfLength or -position >= TrackHalfLength: # Without abs to compile with tensorflow - angleD -= 2 * (positionD * angle_cos) / L - angle += angleD * t_step - positionD = -positionD - position += positionD * t_step +# @tf.function +def edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L=L, cart_bounce_factor=cart_bounce_factor): + """ Models bounce at edge of cart track. Very simple elastic bounce currently. + + :param angle: Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative. + :param angle_cos: + :param angleD: rad/s. Positive means CCW rotation + :param position: meters, 0 at middle of table, positive rightwards + :param positionD: m/w, positive rightwards + :param t_step: the timestep in seconds + :param L: the pole length in meters + :param cart_bounce_factor: fraction of cart speed after bounce from edge + + :returns: angle, angleD, position, positionD + """ + + i=tf.greater_equal(tf.abs(position),TrackHalfLength) # find those rollouts that go past edge of track + + # for those that do, update the swing according to this dynamics + angleD = tf.where(i,angleD-2 * (positionD * angle_cos) / L, angleD) # TODO why this formula??? + # don't update angle since the euler step will already do it + # angle = angle+angleD * t_step # update angle according to new derivative of angle + # and the cart velocity is reversed with some absorption + positionD = tf.where(i,-cart_bounce_factor*positionD,positionD) # imperfect bounce + # don't update position since Euler step will do it + # position = position+positionD * t_step # step back the amount of bounce + + # following is old serial code + # if position >= TrackHalfLength or -position >= TrackHalfLength: # Without abs to compile with tensorflow + # angleD -= 2 * (positionD * angle_cos) / L # TODO why this formula??? + # angle += angleD * t_step # update angle according to new derivative of angle + # positionD = -cart_bounce_factor*positionD # perfect bounce + # position += positionD * t_step # step back the amount of bounce return angle, angleD, position, positionD def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L=L): for i in range(position.size): angle[i], angleD[i], position[i], positionD[i] = edge_bounce(angle[i], angle_cos[i], angleD[i], position[i], positionD[i], - t_step, L) + t_step, L) # see cartpole_tf.py; note we no longer need the edge_bounce wrapper because edge bounce compiles as tf code return angle, angleD, position, positionD diff --git a/CartPole/cartpole_tf.py b/CartPole/cartpole_tf.py index e2ebab84..818c6a5e 100644 --- a/CartPole/cartpole_tf.py +++ b/CartPole/cartpole_tf.py @@ -1,6 +1,6 @@ import tensorflow as tf from others.globals_and_utils import create_rng, load_config -from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength, +from others.p_globals import (J_fric, L, m_cart, M_fric, TrackHalfLength,cart_bounce_factor, controlBias, controlDisturbance, g, k, m_pole, u_max, v_max) from SI_Toolkit.Functions.TF.Compile import CompileTF @@ -25,11 +25,12 @@ controlDisturbance = tf.convert_to_tensor(controlDisturbance) controlBias = tf.convert_to_tensor(controlBias) TrackHalfLength = tf.convert_to_tensor(TrackHalfLength) +cart_bounce_factor = tf.convert_to_tensor(cart_bounce_factor) rng = create_rng(__name__, config["cartpole"]["seed"]) ### -# FIXME: Currently tf predictor is not modeling edge bounce! +# TODO: Currently tf predictor is not modeling edge bounce! ### @@ -56,7 +57,7 @@ def wrap_angle_rad(sin, cos): @CompileTF -def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L=L): +def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L=L, cart_bounce_factor=cart_bounce_factor): angle_bounced = tf.TensorArray(tf.float32, size=tf.size(angle), dynamic_size=False) angleD_bounced = tf.TensorArray(tf.float32, size=tf.size(angleD), dynamic_size=False) position_bounced = tf.TensorArray(tf.float32, size=tf.size(position), dynamic_size=False) @@ -65,7 +66,7 @@ def edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L for i in tf.range(tf.size(position)): angle_i, angleD_i, position_i, positionD_i = edge_bounce_tf(angle[i], angle_cos[i], angleD[i], position[i], positionD[i], - t_step, L) + t_step, L, cart_bounce_factor=cart_bounce_factor) angle_bounced = angle_bounced.write(i, angle_i) angleD_bounced = angleD_bounced.write(i, angleD_i) position_bounced = position_bounced.write(i, position_i) @@ -127,10 +128,12 @@ def _cartpole_fine_integration_tf(angle, angleD, positionDD, t_step, ) # The edge bounce calculation seems to be too much for a GPU to tackle - # angle_cos = tf.cos(angle) - # angle, angleD, position, positionD = edge_bounce_wrapper(angle, angle_cos, angleD, position, positionD, t_step, L) - + # TODO it is currently commented out in master branch angle_cos = tf.cos(angle) + angle, angleD, position, positionD = edge_bounce(angle, angle_cos, angleD, position, positionD, t_step, L, cart_bounce_factor) + # # note we no longer need the edge_bounce wrapper because edge_bounce compiles as tf code + + # angle_cos = tf.cos(angle) angle_sin = tf.sin(angle) angle = wrap_angle_rad(angle_sin, angle_cos) @@ -138,8 +141,10 @@ def _cartpole_fine_integration_tf(angle, angleD, return angle, angleD, position, positionD, angle_cos, angle_sin +@CompileTF def cartpole_fine_integration_tf(s, u, t_step, intermediate_steps, k=k, m_cart=m_cart, m_pole=m_pole, g=g, J_fric=J_fric, M_fric=M_fric, L=L): + #print('test 5') """ Calculates current values of second derivative of angle and position from current value of angle and position, and their first derivatives diff --git a/CartPole/load.py b/CartPole/load.py index 9db54677..bff031a6 100644 --- a/CartPole/load.py +++ b/CartPole/load.py @@ -3,6 +3,8 @@ import os import glob +from typing import Optional + import pandas as pd def get_full_paths_to_csvs(default_locations='', csv_names=None): @@ -87,7 +89,12 @@ def get_full_paths_to_csvs(default_locations='', csv_names=None): # load csv file with experiment recording (e.g. for replay) -def load_csv_recording(file_path): +def load_csv_recording(file_path:str)->pd.DataFrame: + """ Loads the recording CSV file + :param file_path: path to CSV including full filename with suffix + + :returns: False if file not found or pd.DataFrame if found + """ if isinstance(file_path, list): file_path = file_path[0] diff --git a/CartPole/state_utilities.py b/CartPole/state_utilities.py index 6afbee6e..ffc95f0e 100644 --- a/CartPole/state_utilities.py +++ b/CartPole/state_utilities.py @@ -6,6 +6,9 @@ ["angle", "angleD", "angle_cos", "angle_sin", "position", "positionD",] ) +NUM_STATES:int=len(STATE_VARIABLES) + +# dict[state_name, index], e.g. STATE_INDICES['angle']=0 STATE_INDICES = {x: np.where(STATE_VARIABLES == x)[0][0] for x in STATE_VARIABLES} CONTROL_INPUTS = np.sort(["Q"]) @@ -13,12 +16,12 @@ CONTROL_INDICES = {x: np.where(CONTROL_INPUTS == x)[0][0] for x in CONTROL_INPUTS} """Define indices of values in state statically""" -ANGLE_IDX = STATE_INDICES["angle"].item() -ANGLED_IDX = STATE_INDICES["angleD"].item() -POSITION_IDX = STATE_INDICES["position"].item() -POSITIOND_IDX = STATE_INDICES["positionD"].item() -ANGLE_COS_IDX = STATE_INDICES["angle_cos"].item() -ANGLE_SIN_IDX = STATE_INDICES["angle_sin"].item() +ANGLE_IDX = STATE_INDICES["angle"].item() # Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative. +ANGLED_IDX = STATE_INDICES["angleD"].item() # Angular velocity of pole in rad/s, CCW is positive. +POSITION_IDX = STATE_INDICES["position"].item() # Horizontal position of pole in meters. +POSITIOND_IDX = STATE_INDICES["positionD"].item() # Horizontal velocity of pole in m/s. Cart movement to the right is positive. +ANGLE_COS_IDX = STATE_INDICES["angle_cos"].item() # cos of angle, vertical means 1 and -1 means hanging down +ANGLE_SIN_IDX = STATE_INDICES["angle_sin"].item() # sin of angle, 0 upright and hanging, +1 for leftwards, -1 for rightwards def create_cartpole_state(state: dict = {}, dtype=None) -> np.ndarray: @@ -28,18 +31,22 @@ def create_cartpole_state(state: dict = {}, dtype=None) -> np.ndarray: Input parameters are passed as a dict with the following possible keys. Other keys are ignored. Unset key-value pairs are initialized to 0. - :param angle: Pole angle. 0 means pole is upright. Clockwise angle rotation is defined as negative. - :param angleD: Angular velocity of pole. - :param position: Horizontal position of pole. - :param positionD: Horizontal velocity of pole. Cart movement to the right is positive. + :param angle: Pole angle in radians. 0 means pole is upright. Clockwise angle rotation is defined as negative. + :param angleD: Angular velocity of pole in rad/s, CCW is positive. + :param position: Horizontal position of pole in meters. + :param positionD: Horizontal velocity of pole in m/s. Cart movement to the right is positive. :returns: A numpy.ndarray with values filled in order set by STATE_VARIABLES """ + initial_pole_angle=0 # set to zero to start upright, np.pi to hang down + + state['angle']=initial_pole_angle + state["angle_cos"] = ( - np.cos(state["angle"]) if "angle" in state.keys() else np.cos(0.0) + np.cos(state["angle"]) if "angle" in state.keys() else np.cos(initial_pole_angle) ) state["angle_sin"] = ( - np.sin(state["angle"]) if "angle" in state.keys() else np.sin(0.0) + np.sin(state["angle"]) if "angle" in state.keys() else np.sin(initial_pole_angle) ) if dtype is None: @@ -48,6 +55,7 @@ def create_cartpole_state(state: dict = {}, dtype=None) -> np.ndarray: s = np.zeros_like(STATE_VARIABLES, dtype=np.float32) for i, v in enumerate(STATE_VARIABLES): s[i] = state.get(v) if v in state.keys() else s[i] + return s diff --git a/Control_Toolkit b/Control_Toolkit index 09a0a720..807c46de 160000 --- a/Control_Toolkit +++ b/Control_Toolkit @@ -1 +1 @@ -Subproject commit 09a0a7204d8f52656e8d056625b45ee398d243c8 +Subproject commit 807c46de404c542c4a074b99645be8e7c8397be0 diff --git a/Control_Toolkit_ASF/Controllers/controller_do_mpc.py b/Control_Toolkit_ASF/Controllers/controller_do_mpc.py index 1d2f7d75..c2431fa9 100644 --- a/Control_Toolkit_ASF/Controllers/controller_do_mpc.py +++ b/Control_Toolkit_ASF/Controllers/controller_do_mpc.py @@ -9,7 +9,7 @@ from CartPole.cartpole_model import Q2u, cartpole_ode_namespace from CartPole.state_utilities import cartpole_state_vector_to_namespace from Control_Toolkit.Controllers import template_controller -from others.globals_and_utils import create_rng +from others.globals_and_utils import create_rng, update_attributes from SI_Toolkit.computation_library import NumpyLibrary, TensorType @@ -136,16 +136,16 @@ def tvp_fun(self, t_ind): return self.tvp_template - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - self.update_attributes(updated_attributes) + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + update_attributes(updated_attributes,self) - s = cartpole_state_vector_to_namespace(s) + state = cartpole_state_vector_to_namespace(state) - self.x0['s.position'] = s.position - self.x0['s.positionD'] = s.positionD + self.x0['s.position'] = state.position + self.x0['s.positionD'] = state.positionD - self.x0['s.angle'] = s.angle - self.x0['s.angleD'] = s.angleD + self.x0['s.angle'] = state.angle + self.x0['s.angleD'] = state.angleD self.tvp_template['_tvp', :, 'target_position'] = self.target_position diff --git a/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py b/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py index 353792fa..dbf13dde 100644 --- a/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py +++ b/Control_Toolkit_ASF/Controllers/controller_do_mpc_discrete.py @@ -9,6 +9,7 @@ from CartPole.state_utilities import cartpole_state_vector_to_namespace from Control_Toolkit.Controllers import template_controller from SI_Toolkit.computation_library import NumpyLibrary, TensorType +from others.globals_and_utils import update_attributes def mpc_next_state(s, u, dt): @@ -145,16 +146,16 @@ def configure(self): def tvp_fun(self, t_ind): return self.tvp_template - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - self.update_attributes(updated_attributes) + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + update_attributes(updated_attributes,self) - s = cartpole_state_vector_to_namespace(s) + state = cartpole_state_vector_to_namespace(state) - self.x0['s.position'] = s.position - self.x0['s.positionD'] = s.positionD + self.x0['s.position'] = state.position + self.x0['s.positionD'] = state.positionD - self.x0['s.angle'] = s.angle - self.x0['s.angleD'] = s.angleD + self.x0['s.angle'] = state.angle + self.x0['s.angleD'] = state.angleD self.tvp_template['_tvp', :, 'target_position'] = self.target_position diff --git a/Control_Toolkit_ASF/Controllers/controller_energy.py b/Control_Toolkit_ASF/Controllers/controller_energy.py new file mode 100644 index 00000000..cbf15e6d --- /dev/null +++ b/Control_Toolkit_ASF/Controllers/controller_energy.py @@ -0,0 +1,182 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu May 11 18:07:58 2023 + +@author: Shreyan Banerjee +""" + +""" +This is a linear-quadratic regulator +It assumes that the input relation is u = Q*u_max (no fancy motor model) ! +""" + +from SI_Toolkit.computation_library import NumpyLibrary, TensorType +import numpy as np +import scipy +import yaml +import os + +from CartPole.cartpole_jacobian import cartpole_jacobian +from CartPole.cartpole_model import s0, u_max +from CartPole.state_utilities import (ANGLE_IDX, ANGLED_IDX, POSITION_IDX, + POSITIOND_IDX) +from Control_Toolkit.Controllers import template_controller +from others.globals_and_utils import create_rng + +config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader) +actuator_noise = config["cartpole"]["actuator_noise"] + + +class controller_energy(template_controller): + _computation_library = NumpyLibrary + + def configure(self): + # From https://github.com/markwmuller/controlpy/blob/master/controlpy/synthesis.py#L8 + """Solve the continuous time LQR controller for a continuous time system. + A and B are system matrices, describing the systems dynamics: + dx/dt = A x + B u + The controller minimizes the infinite horizon quadratic cost function: + cost = integral (x.T*Q*x + u.T*R*u) dt + where Q is a positive semidefinite matrix, and R is positive definite matrix. + Returns K, X, eigVals: + Returns gain the optimal gain K, the solution matrix X, and the closed loop system eigenvalues. + The optimal input is then computed as: + input: u = -K*x + """ + self.p_Q = actuator_noise + # ref Bertsekas, p.151 + self.theta = 0 + self.thetaprev = 0 + self.thetadot = 0 + self.thetadotprev = 0 + self.U = 0 + self.K = 0 + self.deltaU = 0 + + seed = self.config_controller["seed"] + self.rng = create_rng(self.__class__.__name__, seed if seed==None else seed*2) + + # Calculate Jacobian around equilibrium + # Set point around which the Jacobian should be linearized + # It can be here either pole up (all zeros) or pole down + s = s0 + s[POSITION_IDX] = 0.0 + s[POSITIOND_IDX] = 0.0 + s[ANGLE_IDX] = 0.0 + s[ANGLED_IDX] = 0.0 + u = 0.0 + + + + jacobian = cartpole_jacobian(s, u) + A = jacobian[:, :-1] + B = np.reshape(jacobian[:, -1], newshape=(4, 1)) * u_max + + # Cost matrices for LQR controller + self.Q = np.diag(self.config_controller["Q"]) # How much to punish x, v, theta, omega + self.R = self.config_controller["R"] # How much to punish Q + + # first, try to solve the ricatti equation + X = scipy.linalg.solve_continuous_are(A, B, self.Q, self.R) + + # compute the LQR gain + if np.array(self.R).ndim == 0: + Ri = 1.0 / self.R + else: + Ri = np.linalg.inv(self.R) + + K = np.dot(Ri, (np.dot(B.T, X))) + + eigVals = np.linalg.eigvals(A - np.dot(B, K)) + + self.K = K + self.X = X + self.eigVals = eigVals + + def controller_reset(self): + #self.optimizer.optimizer_reset() + #self.cartpole_trajectory_generator.reset() + pass + + def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + self.update_attributes(updated_attributes) + + state = np.array( + [[s[POSITION_IDX] - self.variable_parameters.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]]) + + #Q = np.dot(-self.K, state).item() + + self.theta = s[ANGLE_IDX] + self.thetadot = s[ANGLED_IDX] + + m = 1 + l = 1 + mc = 1 + g = 9.8 + + #Modify + self.K = 0.5 * m*l*l * self.thetadot**2 + self.U = 0.5 * m*g*l*np.cos(self.theta) + E = self.K+self.U + + + u = ((m*g*abs(np.sin(self.theta))/9.8)*2)*self.thetadot/(abs(self.thetadot+0.01)) + #u = (m*g*abs(np.sin(theta)) - m*l*thetaddot)*2/g + #c1+=1 + self.deltaU = m*g*l*(1-np.cos(self.theta)) + # if thetadot*theta>0: + # pausefactor = -K + deltaU + # #c = 1/(1+np.exp(pausefactor*10)) + #c1+=1 + # if c1%500==0: + # done = True + + if (np.cos(self.theta)-np.cos(self.thetaprev))<0: + if (self.K+self.deltaU - m*g*l)>=0: + u=0 + else: + if (self.K-self.deltaU)>=0: + u=0 + + u= (2*u/l)*(m/mc) + u = u * (np.cos(self.theta)/abs(np.cos(self.theta))) + u*=6 + #print(theta) + # if abs(theta)>0.9: + # print("Failed") + # theta sign reversal for stabilization problem + if abs(self.theta)<0.5:# and self.theta*self.thetadot<0: + # if abs(self.thetadot)<0.05 and abs(theta)>0.3: + # u = + u = m*g*l*np.sin(self.theta)*1 + u= (2*u/l)*(m/mc) + u = -u#*(np.cos(theta)/abs(np.cos(theta))) + u*=1 + #print(u) + + #print(u) + # if c1 <5: + # u = 1 + self.thetaprev = self.theta + self.thetadotprev = self.thetadot + self.Kprev = self.K + # time_step = env.step([u]) + # observation = time_step.observation + # position = T.Tensor(time_step.observation['position']) + # velocity = T.Tensor(time_step.observation['velocity']) + # observation = T.cat((position,velocity),0).reshape(len(position)+len(velocity)) + # costheta = position[1].item() + # sintheta = position[2].item() + # theta = math.atan2(sintheta,costheta) + # thetadot = velocity[1].item() + #thetadotprev = thetadot + #thetaprev = theta + + + # Mod ends + Q = u + #Q *= (1 + self.p_Q * float(self.rng.uniform(self.action_low, self.action_high))) + + # Clip Q + Q = np.clip(Q, -1.0, 1.0, dtype=np.float32) + return Q \ No newline at end of file diff --git a/Control_Toolkit_ASF/Controllers/controller_lqr.py b/Control_Toolkit_ASF/Controllers/controller_lqr.py index a4010aee..78e4239b 100644 --- a/Control_Toolkit_ASF/Controllers/controller_lqr.py +++ b/Control_Toolkit_ASF/Controllers/controller_lqr.py @@ -14,7 +14,7 @@ from CartPole.state_utilities import (ANGLE_IDX, ANGLED_IDX, POSITION_IDX, POSITIOND_IDX) from Control_Toolkit.Controllers import template_controller -from others.globals_and_utils import create_rng +from others.globals_and_utils import create_rng, update_attributes config = yaml.load(open("config.yml", "r"), Loader=yaml.FullLoader) actuator_noise = config["cartpole"]["actuator_noise"] @@ -81,11 +81,11 @@ def configure(self): self.X = X self.eigVals = eigVals - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): - self.update_attributes(updated_attributes) + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + update_attributes(updated_attributes,self) state = np.array( - [[s[POSITION_IDX] - self.target_position], [s[POSITIOND_IDX]], [s[ANGLE_IDX]], [s[ANGLED_IDX]]]) + [[state[POSITION_IDX] - self.target_position], [state[POSITIOND_IDX]], [state[ANGLE_IDX]], [state[ANGLED_IDX]]]) Q = np.dot(-self.K, state).item() diff --git a/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py b/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py index 21bd2c9f..8bf9a2f9 100644 --- a/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py +++ b/Control_Toolkit_ASF/Controllers/controller_mppi_cartpole.py @@ -26,6 +26,8 @@ from matplotlib.widgets import Slider from numba import jit from numpy.random import SFC64, Generator + +from others.globals_and_utils import update_attributes from others.p_globals import TrackHalfLength from scipy.interpolate import interp1d from SI_Toolkit.Predictors.predictor_ODE import predictor_ODE @@ -42,14 +44,14 @@ """Timestep and sampling settings""" mpc_horizon = config_mppi_cartpole["mpc_horizon"] -num_rollouts = config_mppi_cartpole["num_rollouts"] +batch_size = config_mppi_cartpole["batch_size"] update_every = config_mppi_cartpole["update_every"] predictor_specification = config_mppi_cartpole["predictor_specification"] dt = config_data_gen["dt"]["control"] """Define Predictor""" predictor = PredictorWrapper() -predictor.configure(batch_size=num_rollouts, horizon=mpc_horizon, dt=dt, predictor_specification=predictor_specification) +predictor.configure(batch_size=batch_size, horizon=mpc_horizon, dt=dt, predictor_specification=predictor_specification) if predictor.predictor_config['predictor_type'] == 'neural': MODEL_NAME = predictor.predictor_config['model_name'] @@ -89,7 +91,6 @@ LBD = config_mppi_cartpole["LBD"] NU = config_mppi_cartpole["NU"] SQRTRHODTINV = config_mppi_cartpole["SQRTRHOINV"] * (1 / np.sqrt(dt)) -GAMMA = config_mppi_cartpole["GAMMA"] SAMPLING_TYPE = config_mppi_cartpole["SAMPLING_TYPE"] @@ -178,7 +179,7 @@ def trajectory_rollouts( :type S_tile_k: np.ndarray :param u: Vector of nominal inputs computed in previous iteration :type u: np.ndarray - :param delta_u: Array containing all input perturbation samples. Shape (num_rollouts x horizon_steps) + :param delta_u: Array containing all input perturbation samples. Shape (batch_size x horizon_steps) :type delta_u: np.ndarray :param u_prev: Array with nominal inputs from previous iteration. Used to compute cost of control change :type u_prev: np.ndarray @@ -187,7 +188,7 @@ def trajectory_rollouts( :return: S_tilde_k - Array filled with a cost for each rollout trajectory """ - initial_state = np.tile(s, (num_rollouts, 1)) + initial_state = np.tile(s, (batch_size, 1)) s_horizon = predictor.predict(initial_state, (u + delta_u)[..., np.newaxis])[:, :, : len(STATE_INDICES)] @@ -220,7 +221,7 @@ def trajectory_rollouts( # (1 x mpc_horizon) LOGS.get("states").append( np.copy(s_horizon[:, :-1, :]) - ) # num_rollouts x mpc_horizon x STATE_VARIABLES + ) # batch_size x mpc_horizon x STATE_VARIABLES return S_tilde_k @@ -328,9 +329,9 @@ def update_inputs(u: np.ndarray, S: np.ndarray, delta_u: np.ndarray): :param u: Sampling mean / warm started control inputs of size (,mpc_horizon) :type u: np.ndarray - :param S: Cost array of size (num_rollouts) + :param S: Cost array of size (batch_size) :type S: np.ndarray - :param delta_u: The input perturbations that had been used, shape (num_rollouts x mpc_horizon) + :param delta_u: The input perturbations that had been used, shape (batch_size x mpc_horizon) :type delta_u: np.ndarray """ u += reward_weighted_average(S, delta_u) @@ -369,8 +370,8 @@ def configure(self): self.s_horizon = np.zeros((), dtype=np.float32) self.u = np.zeros((mpc_horizon), dtype=np.float32) self.u_prev = np.zeros_like(self.u, dtype=np.float32) - self.delta_u = np.zeros((num_rollouts, mpc_horizon), dtype=np.float32) - self.S_tilde_k = np.zeros((num_rollouts), dtype=np.float32) + self.delta_u = np.zeros((batch_size, mpc_horizon), dtype=np.float32) + self.S_tilde_k = np.zeros((batch_size), dtype=np.float32) self.wash_out_len = WASH_OUT_LEN self.warm_up_countdown = self.wash_out_len @@ -404,7 +405,7 @@ def initialize_perturbations( - "interpolated" - Sample a new independent perturbation every 10th MPC horizon step. Interpolate in between the samples - "iid" - Sample independent and identically distributed samples of a Gaussian distribution :type sampling_type: str, optional - :return: Independent perturbation samples of shape (num_rollouts x horizon_steps) + :return: Independent perturbation samples of shape (batch_size x horizon_steps) :rtype: np.ndarray """ """ @@ -413,23 +414,23 @@ def initialize_perturbations( If random_walk is true, each row represents a 1D random walk with Gaussian steps. """ if sampling_type == "random_walk": - delta_u = np.empty((num_rollouts, mpc_horizon), dtype=np.float32) + delta_u = np.empty((batch_size, mpc_horizon), dtype=np.float32) delta_u[:, 0] = stdev * self.rng_mppi.standard_normal( - size=(num_rollouts,), dtype=np.float32 + size=(batch_size,), dtype=np.float32 ) for i in range(1, mpc_horizon): delta_u[:, i] = delta_u[:, i - 1] + stdev * self.rng_mppi.standard_normal( - size=(num_rollouts,), dtype=np.float32 + size=(batch_size,), dtype=np.float32 ) elif sampling_type == "uniform": - delta_u = np.empty((num_rollouts, mpc_horizon), dtype=np.float32) + delta_u = np.empty((batch_size, mpc_horizon), dtype=np.float32) for i in range(0, mpc_horizon): delta_u[:, i] = self.rng_mppi.uniform( - low=-1.0, high=1.0, size=(num_rollouts,) + low=-1.0, high=1.0, size=(batch_size,) ).astype(np.float32) elif sampling_type == "repeated": delta_u = np.tile( - stdev * self.rng_mppi.standard_normal(size=(num_rollouts, 1), dtype=np.float32), + stdev * self.rng_mppi.standard_normal(size=(batch_size, 1), dtype=np.float32), (1, mpc_horizon), ) elif sampling_type == "interpolated": @@ -438,33 +439,33 @@ def initialize_perturbations( t = np.arange(start=0, stop=range_stop, step=step) t_interp = np.arange(start=0, stop=range_stop, step=1) t_interp = np.delete(t_interp, t) - delta_u = np.zeros(shape=(num_rollouts, range_stop), dtype=np.float32) + delta_u = np.zeros(shape=(batch_size, range_stop), dtype=np.float32) delta_u[:, t] = stdev * self.rng_mppi.standard_normal( - size=(num_rollouts, t.size), dtype=np.float32 + size=(batch_size, t.size), dtype=np.float32 ) f = interp1d(t, delta_u[:, t]) delta_u[:, t_interp] = f(t_interp) delta_u = delta_u[:, :mpc_horizon] else: delta_u = stdev * self.rng_mppi.standard_normal( - size=(num_rollouts, mpc_horizon), dtype=np.float32 + size=(batch_size, mpc_horizon), dtype=np.float32 ) return delta_u - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): """Perform controller step - :param s: State passed to controller after system has evolved for one step - :type s: np.ndarray + :param state: State passed to controller after system has evolved for one step + :type state: np.ndarray :param time: Time in seconds that has passed in the current experiment, defaults to None :type time: float, optional :return: A normed control value in the range [-1.0, 1.0] :rtype: np.float32 """ - self.update_attributes(updated_attributes) + update_attributes(updated_attributes,self) - self.s = s + self.s = state self.iteration += 1 @@ -510,8 +511,8 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # This is a lot of unnecessary calculation, but a stateful RNN in TF has frozen batch size # FIXME: Problaby you can reduce it! - rollout_trajectory = predictor.predict(np.tile(self.s, (num_rollouts, 1)), - np.tile(self.u[np.newaxis, :, np.newaxis], (num_rollouts, 1, 1)) + rollout_trajectory = predictor.predict(np.tile(self.s, (batch_size, 1)), + np.tile(self.u[np.newaxis, :, np.newaxis], (batch_size, 1, 1)) )[0, ...] LOGS.get("nominal_rollouts").append(np.copy(rollout_trajectory[:-1, :])) @@ -527,8 +528,8 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy ): self.warm_up_countdown -= 1 - if abs(s[ANGLE_IDX]) < np.pi/10.0: # Stabilize during warm_up with auxiliary controller if initial angle small - Q = self.auxiliary_controller.step(s, self.target_position) + if abs(state[ANGLE_IDX]) < np.pi/10.0: # Stabilize during warm_up with auxiliary controller if initial angle small + Q = self.auxiliary_controller.step(state, self.target_position) else: Q = self.rng_mppi_rnn.uniform(-1, 1) # Apply random input to let RNN "feel" the system behaviour else: @@ -564,7 +565,7 @@ def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorTy # self.u = zeros_like(self.u) # Prepare predictor for next timestep - Q_update = np.tile(Q, (num_rollouts, 1, 1)) + Q_update = np.tile(Q, (batch_size, 1, 1)) predictor.update(Q_update, self.s) return Q # normed control input in the range [-1,1] @@ -586,7 +587,7 @@ def controller_report(self): ### Plot the average state cost per iteration ctglgs = np.stack( LOGS.get("cost_to_go"), axis=0 - ) # ITERATIONS x num_rollouts + ) # ITERATIONS x batch_size NUM_ITERATIONS = np.shape(ctglgs)[0] time_axis = update_every * dt * np.arange(start=0, stop=np.shape(ctglgs)[0]) plt.figure(num=2, figsize=(16, 9)) @@ -698,7 +699,7 @@ def draw_rollouts( ) # Prepare data - # shape(slgs) = ITERATIONS x num_rollouts x mpc_horizon x STATE_VARIABLES + # shape(slgs) = ITERATIONS x batch_size x mpc_horizon x STATE_VARIABLES slgs = np.stack(LOGS.get("states"), axis=0) wrap_angle_rad_inplace(slgs[:, :, :, ANGLE_IDX]) # shape(iplgs) = ITERATIONS x mpc_horizon @@ -735,7 +736,7 @@ def draw_rollouts( ) # Normalize cost to go to use as opacity in plot - # shape(ctglgs) = ITERATIONS x num_rollouts + # shape(ctglgs) = ITERATIONS x batch_size ctglgs = np.divide(ctglgs.T, np.max(np.abs(ctglgs), axis=1)).T # This function updates the plot when a new iteration is selected diff --git a/Control_Toolkit_ASF/Controllers/controller_secloc.py b/Control_Toolkit_ASF/Controllers/controller_secloc.py index fc03108a..b8fab586 100644 --- a/Control_Toolkit_ASF/Controllers/controller_secloc.py +++ b/Control_Toolkit_ASF/Controllers/controller_secloc.py @@ -36,12 +36,12 @@ def configure(self): self.interpolation = interp1d([-self.motor_map,self.motor_map], [1,-1]) self.step_idx = 0 - def step(self, s: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): + def step(self, state: np.ndarray, time=None, updated_attributes: "dict[str, TensorType]" = {}): self.update_attributes(updated_attributes) # Read the cartpole state s = ["angle", "angleD", "angle_cos", "angle_sin", "position", "positionD"] # angle: pole UP -> 0, then +/-pi print(f"***** Step #{self.step_idx} *****") - pole_angle = s[0] + pole_angle = state[0] print(f"Current pole angle value: {pole_angle} radians. left/right --> +/- pi. Pole UP --> angle=0") print("Update the potentiometer event based sensor using the cartpole sate") self.potentiometer.update(signal_in= pole_angle) diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/__init__.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/__init__.py new file mode 100644 index 00000000..b28b04f6 --- /dev/null +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/__init__.py @@ -0,0 +1,3 @@ + + + diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv new file mode 100644 index 00000000..f2782173 --- /dev/null +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv @@ -0,0 +1,26 @@ +# cartpole dancer dance sequence for song satisfaction rolling stones +# it has 135bpm with offset of 40ms according to BeatEdit +# which means the beat frequency is 2.25Hz and so all frequencies should be multiple or integer fraction of 2.25Hz, e.g. 1.125, .5625 +# policies can be balance1, spin2, shimmy3, cartonly4, cartwheel5 +# self.cartwheel_cycles=self.cartpole_dancer.amp +endtime, cartpos, policy, option, freq, amp, freq2, amp2 +3,0, balance1,1,,,, +00:7.3,0, cartonly4,.5,1.125,0.1,, +00:14.6,0, cartonly4,.5,2.25,0.1,, +00:18.3,0, toandfro6, ,,2,, +00:21.3,0, toandfro6, ,,-2,, +00:24.5,0, cartwheel5, ,,2,, +00:27.9,0, cartwheel5, ,,-2,, +00:35.5,-0.05, spin2, -1,,,, +00:42.2,0.05, spin2, +1,,,, +00:50.0,0, shimmy3,,.5625,0.07,2.25,0.08 +00:53.5,0, shimmy3,,1.125,0.04,2.25,0.08 +00:57.0,0, shimmy3,,2.25,0.03,2.25,0.08 +01:00.4,0, cartwheel5, ,,1,, +01:04.5,0, cartwheel5, ,,-1,, +01:07.3,0, cartwheel5, ,,2,, +01:09.8,0, cartwheel5, ,,-2,, +01:15.1,-0.05, spin2, -1,,,, +01:19.0,0.05, spin2, +1,,,, +01:22.2,0, shimmy3,,2.25,0.03,2.25,0.08 +01:26.1,0, balance1,-1,,,, diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv new file mode 100644 index 00000000..1a9329e6 --- /dev/null +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv @@ -0,0 +1,12 @@ +# cartpole dancer dance sequence for song sookie-sookie grant green +# it has 126bpm with offset of 420ms according to BeatEdit +# which means the beat frequency is 2.1Hz and so all frequencies should be multiple or integer fraction of 2.1Hz, e.g. 1.05, .525 etc +# policies can be balance1, spin2, shimmy3, cartonly4 +endtime, cartpos, policy, option, freq, amp, freq2, amp2 +5.4, 0, balance1, up,,,, +12, -.05, shimmy3,up, 1.05, .04, 1.05, .04 +29 , +.05, shimmy3,up, 2.1, .02, 2.1, .02 +33, .05, spin2, cw, 5,,, +59 , -.05, spin2, ccw, 5,,, +166, 0, shimmy3,up, 2.1, .03, 2.1, .1 + diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance.csv b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance.csv new file mode 100644 index 00000000..5b80eb1e --- /dev/null +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance.csv @@ -0,0 +1,17 @@ +# cartpole dancer dance sequence +# policies can be balance1, spin2, shimmy3, cartonly4 +duration, cartpos, policy, option, freq, amp, freq2, amp2 +4, 0, balance1, up,,,, +6, .1, spin2, cw, 5,,, +#1, 0, balance1, up,,,, +6, -.1, spin2,ccw, 5,,, +3, -.1, balance1, up,,,, +6, 0, shimmy3,up, 2, .05, 1, .1 +1, 0, balance1, up,,,, +8, 0, shimmy3,up, 1, .001, 1, .1 +1.5, .15, balance1, up,,,, +1.5, -.15, balance1, up,,,, +#2, 0, balance1, up,,,, +# 4, 0, cartonly4,.9, 2, .1,, +3, 0, balance1, down,,,, + diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer.py new file mode 100644 index 00000000..cdd83451 --- /dev/null +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer.py @@ -0,0 +1,278 @@ +import csv +from pathlib import Path +from typing import Dict + +from CartPole import is_physical_cartpole_running_and_control_enabled +from GUI import CartPoleMainWindow +from others.globals_and_utils import get_logger + +# SONG='others/Media/01 Sookie_ Sookie.mp3' +# CSV='Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-sookie-sookie.csv' +# SONG="others/Media/Rolling_Stones_Satisfaction.mp3" +# CSV='Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv' +# SONG_PLAYBACK_RATE=0.55 # rate to match song speed compared to real time simulation rate + +import os +try: + import vlc +except Exception as e: + raise Exception(f'{e}: pip install python-vlc and install VLC (https://www.videolan.org/vlc/) to your system. Y' + f'ou may need to restart your python IDE. See also https://stackoverflow.com/questions/59014318/filenotfounderror-could-not-find-module-libvlc-dll ;' + f'you might need 64 bit version of VLC since default installs 32-bit version') +import sys + +log = get_logger(__name__) +try: + import winsound +except: + log.warning('winsound is not available - will not play beeps when dance step changes') + pass + +UPDATE_INTERVAL=10 # update interval for status text and song rate update + +class cartpole_dancer: + """ Reads steps from CSV file for cartpole dancer + """ + + def __init__(self): + """ Constructs the dance reader, opening the CSV file + + """ + self.last_status_string=None + """ The last status line shown in GUI for simulation and printed out during runtime in physical-cartpole""" + self.started = False + self.song_file_name:str = None + self.song_player:vlc.MediaPlayer = None + self.row_iterator = None + self.reader = None + self.csvfile = None + csv.register_dialect('cartpole-dancer', skipinitialspace=True) + + self.ENDTIME = 'endtime' + self.POLICY = 'policy' + self.OPTION = 'option' + self.CARTPOS = 'cartpos' + self.FREQ = 'freq' + self.AMP = 'amp' + self.FREQ2 = 'freq2' + self.AMP2 = 'amp2' + + self.first_step_run=False # used to avoid starting playing music when dance is configured in config_cost_functions but simulation not running + self.state:str='running' # set by signals emitted by CartPoleMainWindow, set to running initially so that we read the first row of CSV to compute trajectory for init + if CartPoleMainWindow.CartPoleMainWindowInstance and CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal: + CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal.connect(self.process_signal) + + self.csv_file_name = None + self.csv_file_path = None + self.mtime = None + self.reset_fields() + self.paused=False + self.time_step_started = float(0) + self.time_dance_started = float(0) + + self.cartpole_trajectory_generator = None # set in start() + self.iteration_counter=0 # to reduce some updates + + def reload_csv_and_song(self): + physical_cartpole_path='CartPoleSimulation' # for checking for media in CartPoleSimulation when running from physical-cartpole + self.csv_file_name = bytes.decode(self.cartpole_trajectory_generator.cost_function.dance_csv_file.numpy()) + if not os.path.exists(self.csv_file_name): + self.csv_file_name=os.path.join(physical_cartpole_path,self.csv_file_name) + if not os.path.exists(self.csv_file_name): + raise FileNotFoundError(f'CSV file for dance {self.cartpole_trajectory_generator.cost_function.dance_song_file} not found, cannot play music') + self.csv_file_path = Path(self.csv_file_name) + self.mtime = self.csv_file_path.stat().st_mtime + + self.song_file_name=bytes.decode(self.cartpole_trajectory_generator.cost_function.dance_song_file.numpy()) + if not os.path.exists(self.song_file_name): + self.song_file_name=os.path.join(physical_cartpole_path,self.song_file_name) + if not os.path.exists(self.song_file_name): + raise FileNotFoundError(f'mp3/wave file for dance {self.song_file_name} not found, cannot play music') + if self.song_player: + self.song_player.release() + try: + self.song_player:vlc.MediaPlayer = vlc.MediaPlayer(self.song_file_name) + log.debug(f'loaded CSV {self.csv_file_name} and song {self.song_file_name}') + except AttributeError as e: + log.error(f'cannot open VLC song_player: {e}') + + if self.song_player: + if self.song_player.set_rate(self.cartpole_trajectory_generator.cost_function.dance_song_playback_rate)==-1: + log.warning('could not set playback rate for song') + em = self.song_player.event_manager() + em.event_attach(vlc.EventType.MediaPlayerEndReached, self.start) # restart dance when song restarts + self.mtime = self.csv_file_path.stat().st_mtime + if self.csvfile: + self.csvfile.close() + self.csvfile = open(self.csv_file_name, 'r') + self.reader = csv.DictReader(filter(lambda row: row[0] != '#', self.csvfile), + dialect='cartpole-dancer') # https://stackoverflow.com/questions/14158868/python-skip-comment-lines-marked-with-in-csv-dictreader + self.row_iterator = self.reader.__iter__() + + def reset_fields(self) -> None: + self.time_dance_started = float(0) + self.time_step_started = float(0) + self.current_row = None + self.started = False + self.endtime = None + self.option = None + self.policy = None + self.policy_number = None + self.cartpos = None + self.freq = None + self.amp = None + self.freq2 = None + self.amp2 = None + + def start(self, time: float=0.0, cartpole_trajectory_generator=None) -> None: + """ Starts the dance now at time in seconds + + :param time: the current simulation time in seconds, 0 by default + :param cartpole_trajectory_generator: the trajectory generator, if not None it sets it for use by cartpole_dancer use + """ + if cartpole_trajectory_generator: + self.cartpole_trajectory_generator=cartpole_trajectory_generator + self.reset_fields() + self.reload_csv_and_song() + self.time_step_started = float(time) + self.time_dance_started = float(time) + if ((CartPoleMainWindow.CartPoleMainWindowInstance) and CartPoleMainWindow.CartPoleGuiSimulationState=='running')\ + or is_physical_cartpole_running_and_control_enabled(): + if self.song_player: + log.debug(f'starting playing song {self.song_file_name}') + self.song_player.play() # only play if simulator running or running physical cartpole + self.started = True + self.paused=False + + def stop(self)->None: + if self.song_player: + self.song_player.stop() + self.started=False + + def pause(self)->None: + self.paused=True + if self.song_player: + self.song_player.pause() + + def process_signal(self, signal:str): + """ Process signal from CartPoleMainWindow + + :param signal: the string state, 'running', 'paused', 'stopped' + """ + log.debug(f'got signal "{signal}"') + self.state=signal + if self.state=='paused': + self.pause() + elif self.state=='stopped': + self.stop() + elif self.state=='running': + if self.started: + self.start(0) # only start (song) if dance has been started, don't play if e.g. policy is balance + + def step(self, time: float)-> Dict: + """ Does next time step, reading the csv for next step if the current one's duration has timed out + + :param time: the current simulation time in seconds + + :returns: the current step row of cartpole_dance.csv as dict of column name -> value + """ + + if self.state=='running': + if not self.started: + log.warning('cartpole_dancer must be start()ed before calling step() - calling start now') + self.start(time) + if self.endtime is None or time >= self.time_dance_started+self.endtime: + self._read_next_step(time) + if self.iteration_counter % UPDATE_INTERVAL== 0: + # setting rate dynamically produces very choppy sound; we must ensure that we run fast enough for real time by optimizing code.... + # if not CartPoleMainWindow.CartPoleMainWindowInstance.speedup_measured is None: + # rate = CartPoleMainWindow.CartPoleMainWindowInstance.speedup_measured + # self.song_player.set_rate(rate) # TODO check effect on performance in vlc + CartPoleMainWindow.set_status_text(self.format_step(time)) + self.iteration_counter+=1 + return self.current_row + + def _read_next_step(self, time: float) ->None: + """ Reads next step from CSV file, reloading file from start if it has been modified since it was last read. + Plays a beep when the step changes. + + :param time: the current time in seconds + + :returns: None + """ + try: + # check if file modified, if so, restart whole dance + mtime = self.csv_file_path.stat().st_mtime + if mtime>self.mtime: + log.warning('cartpole_dance.csv modified, restarting dance') + self.start(time) + + self.time_step_started=time + self.current_row = self.reader.__next__() + log.info(f"At t={time:.1f}s new dance step is '{self.current_row['policy']}' ending at t={self.current_row['endtime']}") + if 'winsound' in sys.modules: + try: + winsound.Beep(1000,300) # beep + except ValueError as e: + log.warning(f'could not play beep, caught {e}') + + except StopIteration: + log.info(f'restarting dance at time={time}s') + self.start(time) + self.step(time) + return + try: + self.endtime = hms_to_seconds(self.current_row[self.ENDTIME]) + self.policy = self.current_row[self.POLICY][:-1] # the dqnce step 'policy" column has entries like balance1, spin2, etc + self.policy_number=int(self.current_row[self.POLICY][-1]) # todo not sufficient for XLA / TF compiled code to see this variable change + opt_str=self.current_row[self.OPTION] + try: + self.option=int(opt_str) + except ValueError: + self.option = self.current_row[self.OPTION] + self.cartpos = float(self.current_row[self.CARTPOS]) + self.freq = self.convert_float(self.FREQ) + self.amp = self.convert_float(self.AMP) + self.freq2 = self.convert_float(self.FREQ2) + self.amp2 = self.convert_float(self.AMP2) + except ValueError as e: + log.warning(f'could not convert value in row {self.current_row}: {e}') + + def convert_float(self, key): + if self.current_row[key] and self.current_row[key]!='': + return float(self.current_row[key]) + else: + return None + + def format_step(self, time:float) -> str: + """ Formats the current step for display in GUI main window status line. Sets self.last_status_string for use by other classes. + + :param time: the current time in seconds + + :returns: the step string + """ + duration=self.endtime-self.time_step_started + timestr=f't={(time):.1f}/{self.endtime:.1f}s (dur={duration:.1f}s)' + s=None + if not self.freq is None and not self.freq2 is None and not self.amp is None and not self.amp2 is None: + s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m f0/f1={self.freq:.1f}/{self.freq2:.1f}Hz a0/a1={self.amp:.2f}/{self.amp2:.2f}m' + elif not self.freq is None and not self.amp is None: + s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m freq={self.freq:.1f}Hz amp={self.amp:.2f}m' + elif not self.freq is None: + s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m freq={self.freq:.1f}Hz' + else: + s= f'Dance: {self.policy}/{self.option} {timestr} pos={self.cartpos:.1f}m' + self.last_status_string=s + return s + + +def hms_to_seconds(timestr:str)->float: + """Get seconds from time. + + :param timestr: hh:mm:ss.xxx string or mm:s.xxx or simply s.xxx where xxx is the fraction of seconds + :returns: time in float seconds + """ + seconds= 0. + for part in timestr.split(':'): + seconds= seconds*60. + float(part) + return seconds \ No newline at end of file diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer_cost.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer_cost.py new file mode 100644 index 00000000..3997ebda --- /dev/null +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dancer_cost.py @@ -0,0 +1,210 @@ +import numpy as np + +from CartPole import state_utilities +from Control_Toolkit.Controllers import template_controller +from SI_Toolkit.computation_library import TensorType, ComputationLibrary +from Control_Toolkit.Cost_Functions import cost_function_base + +from CartPole.cartpole_model import TrackHalfLength, L, m_pole +from CartPole.state_utilities import NUM_STATES + +import tensorflow as tf + +from Control_Toolkit.others.get_logger import get_logger +log = get_logger(__name__) + + + +# (config, _) = load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), +# every=1) +# +# weights=config.CartPole.cartpole_dancer_cost +# log.info(f'starting MPC cartpole trajectory cost weights={weights}') # only runs once + + +class cartpole_dancer_cost(cost_function_base): + """ Computes the rollout costs for cartpole dancer. + The state trajectories are computed from the current timestep according to desired 'step" type. + The costs are weighted according to config_cost_functions.yml values for various state components, e.g. cart position, pole swing velocity, etc. + + """ + + def __init__(self, controller: template_controller, ComputationLib: "type[ComputationLibrary]", + config: dict = None) -> None: + """ makes a new cost function + + :param controller: the controller + :param ComputationLib: the library, e.g. python, tensorflow + :param config: the dict of configuration for this cost function. The caller can modify the config to change behavior during runtime. + + """ + super().__init__(controller, ComputationLib) + self.new_target_trajectory = None + dist_metric = 'mse' + if dist_metric == 'rmse': + self.dist = lambda x: self.lib.sqrt(self.lib.pow(x, 2)) + elif dist_metric == 'mse': + self.dist = lambda x: self.lib.pow(x, 2) + elif dist_metric == 'abs': + self.dist = lambda x: self.lib.abs(x) + else: + raise Exception(f'unknown distance metric for cost "{dist_metric}"') + + def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType, time: float = None): + """ + Computes costs of all stages, i.e., costs of all rollouts for each timestep. + + :param states: Tensor of states [rollout, timestep, state]. + states[:,0,:] are num_rollouts copies of current state. + rollout dimension is the batch dimension of the rollouts, e.g. 1000 parallel rollouts. + timestep dimension are the timesteps of each rollout, e.g. 50 timesteps. + state is the state vector, defined by the STATE_INDICES in [state_utilities.py](../CartPole/state_utilities.py) + :param inputs: control input, e.g. cart acceleration + :param previous_input: previous control input, can be used to compute cost of changing control input + :param time: the time in seconds, used for generating target trajectory now + + :returns: Tensor of [rollout, timestep] over all rollouts and horizon timesteps + """ + + control_change_cost = 0 # compute the control change cost + + # position=states[:, :, POSITION_IDX] + # positionD=states[:, :, POSITIOND_IDX] + # angle=states[:, :, ANGLE_IDX] + # angleD=states[:,:,ANGLED_IDX] + + gui_target_equilibrium = self.target_equilibrium # GUI switch +1 or -1 to make pole target up or down position, set by previous call that update_attributes this field + + input_shape = self.lib.shape(states) + num_rollouts = input_shape[0] + mpc_horizon = input_shape[1] + num_states=input_shape[2] + + # allocate a zero stage costs tensor of dimension [num_rollouts, horizon], using the input states tensor that has the correct dimension + # squeeze is to remove the single state dimension to go from 3d to 2d tensor + # this 2d stage_costs tensor is immutable and is replaced every time modify it + stage_costs = tf.squeeze(tf.zeros_like(states[:,:,0])) + + + + # dance step policies are: dance0 (follow csv file) balance1 spin2 shimmy3 cartonly4 + + # states are "angle", "angleD", "angle_cos", "angle_sin", "position", "positionD" + # stage_costs = self.lib.zeros((num_rollouts, num_timesteps)) + target_trajectory = tf.transpose( + self.target_trajectory) # [horizon,num_states] array tensor computed by cartpole_trajectory_generator.py + broadcasted_target_trajectory = tf.broadcast_to(target_trajectory, + [num_rollouts, mpc_horizon, NUM_STATES]) + cost_weights = self.effective_traj_cost_weights + + traj_dist = self.dist( + states - broadcasted_target_trajectory) # difference for each rollout of predicted to target trajectory, has dim [num_rollouts, horizon, num_states] + stage_costs = tf.multiply(traj_dist, cost_weights) # multiply states dim by cost_weights + stage_costs = self.stage_cost_factor * tf.reduce_sum(stage_costs, + axis=2) # sum costs across state dimension to leave costs as [num_rollouts, mpc_horizon] + stage_costs = tf.reshape(stage_costs, [num_rollouts, mpc_horizon]) + + if self.lib.equal(self.policy_number, 2): # spin + # The cost tensor we will return. we allocate one less than num_timesteps to be all zeros because + # we concatenate the terminal cost in this branch + # stage_costs = self.lib.zeros((num_rollouts, num_timesteps - 1)) + # stage_costs = tf.zeros_like(states) + # stage_costs=stage_costs[:,-1] + + # spin is special cost aimed to first get energy into pole, then make it spin in correct direction. + # It is totally based on terminal state at end of rollout. + # Spin cost is based on total pole energy plus cart position plus rotation speed in cw or ccw direction. + # When the pole has insufficient energy for a spin, we follow objective to maximize the total pole energy. + # If the pole has sufficient energy for full rotations, then we follow objective to just maximize the signed kinetic energy in the cw or ccw direction. + upright_pole_energy=self.pole_energy_potential(0) # energy of pole upright compared with hanging down (factor of 2), means pole can swing around all the way if total energy is larger than this + current_pole_angle=states[0,0,state_utilities.ANGLE_IDX] # rad + current_pole_angleD=states[0,0,state_utilities.ANGLED_IDX] # rad/s + + # compute the total pole energy, which is sum of kinetic and potential energy of pole + current_pole_potential_energy=self.pole_energy_potential(current_pole_angle) + current_pole_kinetic_energy=self.pole_energy_kinetic(current_pole_angleD) + current_pole_total_energy=current_pole_potential_energy+current_pole_kinetic_energy + + # now find the final state at end of horizon and try to get it to either have more energy or spinning faster in particular direction + # each of following is [num_rollouts] vector of angleD, cos_angle, and cart position + angleD_states = states[:, -1, state_utilities.ANGLED_IDX] + cosangle_states = states[:, -1, state_utilities.ANGLE_COS_IDX] + pos_states = states[:, -1, state_utilities.POSITION_IDX] + + gui_target_position = self.target_position # GUI slider position + pos_cost = self.dist(pos_states - gui_target_position) # num_rollouts vector + + # compute the cost of spinning, the more energy, the lower the cost + spin_cost_energy = -(self.pole_energy_kinetic(angleD_states) + self.pole_energy_potential(cosangle_states)) + + if current_pole_total_energy>self.upright_pole_energy_multiple_to_count_spin_direction_cost * upright_pole_energy: + # pole is already spinning, multiply the spin energy by the actual direction of spinning and desired spin direction + actual_spin_dirs=self.lib.sign(angleD_states) + desired_spin_dir= -float(self.spin_dir) * gui_target_equilibrium # minus sign to get correct cost sign for cw/ccw rotation + spin_cost_energy=spin_cost_energy*desired_spin_dir*actual_spin_dirs + + # compute the cart position cost as distance from desired cart position + # total stage cost is sum of energy of pole spin, direction of spin, and cart position + # the returned stage_costs is a tensor [num_rollouts, horizon] that has concatenated the terminal costs at end of horizon to the zero stage costs along the horizon + term_costs=self.spin_energy_weight *spin_cost_energy + self.cart_pos_weight * pos_cost + term_costs=tf.expand_dims(term_costs,-1) # make it, e.g. with num_rollouts=700, from [700] to [700,1] tensor + stage_costs = self.lib.concat([stage_costs[:,:-1],term_costs],1) # now concatenate the terminal costs (all we count for spin) to the zero stage costs replacing the last horizon step + stage_costs = tf.reshape(stage_costs, [num_rollouts, mpc_horizon]) + + if previous_input is not None: + control_change_cost = self.control_cost_change_weight * self._control_change_rate_cost(inputs, previous_input) + control_cost = self.control_cost_weight * self._CC_cost(inputs) # compute the cart acceleration control cost + + terminal_traj_edge_barrier_cost = self.track_edge_barrier_cost * self.track_edge_barrier(states[:, :, state_utilities.POSITION_IDX]) + + stage_costs = stage_costs + terminal_traj_edge_barrier_cost + control_cost + control_change_cost # result has dimension [num_rollouts, horizon] + + return stage_costs + + # final stage cost + def get_terminal_cost(self, terminal_states: TensorType): + """ returns 0; terminal cost and barrier cost computed in get_stage_cost()""" + return self.lib.zeros_like(terminal_states) + + # cost for distance from cart track edge + def track_edge_barrier(self, position): + """Soft constraint: Do not crash into border; returns a 1 for any rollout cart position state that exceeds the track edge boundary""" + return self.lib.cast( + self.lib.abs(position) > self.track_length_fraction * TrackHalfLength, self.lib.float32 + ) + + # actuation cost + def _CC_cost(self, u): + return self.lib.sum(u ** 2, 2) + + # cost of changing control too fast + def _control_change_rate_cost(self, u, u_prev): + """Compute penalty of control jerk, i.e. difference to previous control input + + :poram u: the current control input (cart acceleration in dimensionless scale) + :param u_prev: the previous timesteps control input + + :returns: the cost of changing control + """ + u_prev_vec = self.lib.concat( + (self.lib.ones((u.shape[0], 1, u.shape[2])) * u_prev, u[:, :-1, :]), 1 + ) + return self.lib.sum((u - u_prev_vec) ** 2, 2) + + def pole_energy_potential(self, cosangle): + """Compute potential energy of pole + + :param cosangle: the cosine of pole angle (1 when upright) + + :returns: the pole potential energy in Joules, it is zero when pole is hanging down + """ + return ((1+cosangle)/2) * m_pole * (L/2) * 9.8 # hopefully this is height of COM potential gravitational energy + + def pole_energy_kinetic(self, angleD): + """Compute kinetic energy of pole + + :param angleD: time derivative of pole angle in rad/s + + :returns: the kinetic energy in Joules + """ + return (1. / 6.) * m_pole * L * L * (angleD ** 2) diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_trajectory_generator.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_trajectory_generator.py new file mode 100644 index 00000000..b8ae30a5 --- /dev/null +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_trajectory_generator.py @@ -0,0 +1,438 @@ +# import time +# from pydoc import text +from typing import List, Optional, Union + +import numpy as np +# import scipy +# from matplotlib.pyplot import pause +from scipy.signal import sawtooth, square + +from CartPole import state_utilities +from CartPole.state_utilities import NUM_STATES, ANGLED_IDX, ANGLE_IDX, ANGLE_SIN_IDX, ANGLE_COS_IDX, POSITION_IDX, POSITIOND_IDX +from Control_Toolkit.Controllers import template_controller +from Control_Toolkit.Cost_Functions import cost_function_base +from Control_Toolkit_ASF.Cost_Functions.CartPole.cartpole_dancer import cartpole_dancer +from GUI import gui_default_params, CartPoleMainWindow +from SI_Toolkit.computation_library import TensorType +import tensorflow as tf + +from Control_Toolkit.others.get_logger import get_logger +from others.globals_and_utils import update_attributes +from others.p_globals import CARTPOLE_PHYSICAL_CONSTANTS + +log = get_logger(__name__) + +def to_numpy(v): + return v if not isinstance(v,(tf.Tensor,tf.Variable)) else v.numpy() + +class cartpole_trajectory_generator: + + POLICIES=('balance','spin','shimmy','cartonly','cartwheel','toandfro') + POLICIES_NUMBERED=('balance0','spin1','shimmy2','cartonly3','cartwheel4','toandfro5') # for tensorflow scalar BS + CARTWHEEL_STATES={'before','starting','during','after'} + + def __init__(self): + + self.last_status_text:str=None + self.step_start_time = None + self.policy_changed=False + self.prev_policy=None + self.prev_dance_policy=None + self.time_policy_changed=None # when the new dance step started + self.shimmy_function=None # used for ramp of shimmy + self.controller=None + self.cost_function=None # set by each generate_trajectory, used by contained classes, e.g. cartpole_dancer, to access config values + self.lib=None + self.cartpole_dancer=cartpole_dancer() + self.traj:np.ndarray=None # stores the latest trajectory, for later reference, e.g. for measuring model mismatch + self.start_time=0 + + self.cartwheel_cycles = None + self.cartwheel_cycles_to_do:int = None + self.cartwheel_state:str = None + self.cartwheel_time_state_changed:float = None + self.cartwheel_starttime:float = None + self.cartwheel_direction:int = None + self.last_cartwheel_cycles:int=None + + self.mpc_horizon=None + """ the horizion, updated from cost function on every call of generate_cartpole_trajectory""" + + self.traj:np.ndarray=None + """ The state vector trajectories to follow. It is a 2d array [state,horizon] where first dimension are the states and 2nd is the MPC horizon""" + + self.traj_states_to_consider:List[int]=[] + """ a list of state indices to count difference over for cost computation. It is put to cost_function as a tf.Variable so that the compiled cost function can use it.""" + + self.state_cost_weights=None + """ Vector of state trajectory cost weights from config_cost_functions. Filled in on every generate_cartpole_trajectory().""" + + # we cannot determine if we are running physical cartpole with following method since control is not running at start time.time() + # if is_physical_cartpole_running_and_control_enabled() else 0 # subtracted from all times for running physical cartpole to avoid numerical overflow + # log.debug(f'self.start_time={self.start_time:.1f}s') + + self.reset_cartwheel_state() + + def reset_cartwheel_state(self): + self.cartwheel_state = 'before' + self.cartwheel_time_state_changed = 0 + self.set_cartwheel_state('before', self.start_time) + self.cartwheel_cycles_to_do = 0 + if not self.cost_function is None and hasattr(self.cost_function,'cartwheel_cycles'): + self.cartwheel_cycles_to_do=np.abs(to_numpy(self.cost_function.cartwheel_cycles)) + self.cartwheel_starttime = 0 # time that this particular cartwheel started + self.cartwheel_direction = 1 # 1 for ccw (increasing angle), -1 for cw + self.last_cartwheel_cycles=0 # to test if config file value changes so we can restart + + def reset(self): + """ stops dance if it is going + """ + self.cartpole_dancer.stop() + self.reset_cartwheel_state() + + + def generate_cartpole_trajectory(self, time: float, state: np.ndarray, controller:template_controller, cost_function: cost_function_base): + """ Computes the desired future state trajectory at this time. + Sets the cost function attributes target_trajectory and traj_states_to_consider. + The target state trajectory of cartpole is valid for steps that use this trajectory (spin does not use it) + It is np.ndarray[states,timesteps] where only considered states are in the 2d array. The first dimension is the state, 2nd is the mpc_horizon. + + :param time: the scalar time in seconds + :param horizon: the number of horizon steps + :param dt: the timestep in seconds + :param state: the current state of the cartpole as 1d vector of current state components, e.g. the scalar angle state[state_utilities.ANGLE] + + """ + self.controller=controller + self.cost_function=cost_function + self.lib=self.cost_function.lib + dt = gui_default_params.controller_update_interval # TODO fix for physical-cartpole which has CONTROL_PERIOD_MS in globals.py + # dt=CartPoleMainWindow.CartPoleMainWindowInstance.dt_simulation # todo figure out way to get from single place + self.mpc_horizon = to_numpy(controller.optimizer.mpc_horizon) + gui_target_position = to_numpy(cost_function.target_position) # GUI slider position + gui_target_equilibrium = to_numpy(cost_function.target_equilibrium) # GUI switch +1 or -1 to make pole target up or down position + # log.debug(f'dt={dt:.3f} mpc_horizon={self.mpc_horizon}') + time=time-self.start_time # account for 1970's time returned when running physical-cartpol + + + + policy:str = bytes.decode(to_numpy(cost_function.policy)) # we really want a python str out of the policy so we can compare it using python previous policy + dancer_current_step=None + if policy is None: + raise RuntimeError(f'set policy in config_self.controller.cost_function_wrapper.cost_functions.yml') + + if self.prev_policy is None or policy!=self.prev_policy: + self.time_policy_changed=time + self.policy_changed=True + else: + self.policy_changed=False + self.prev_policy=policy + + if self.policy_changed and self.prev_policy== 'dance' and policy!= 'dance': + self.cartpole_dancer.stop() # stop music + + # **************************** + # if we are following a complete dance from CSV file, then set the appropriate fields here from the CSV columns + if policy=='dance': + if self.policy_changed: + self.cartpole_dancer.start(time, self) + self.cartpole_dancer.step(time=time) + + policy=self.cartpole_dancer.policy + self.prev_dance_policy=policy + + gui_target_position=to_numpy(self.cartpole_dancer.cartpos) + + # must assign for TF/XLA to see the policy number in the compiled cost function. BS... + self.lib.assign(getattr(cost_function, 'policy_number'), + self.lib.to_variable(self.cartpole_dancer.policy_number, cost_function.lib.int32)) + if policy=='spin': + spin_dir=self.cartpole_dancer.option + # hack to get int value for TF cost function use, is referred to in cartpole_dancer_cost as self.spin_dir + cost_function.lib.assign(getattr(cost_function, 'spin_dir'), + cost_function.lib.to_variable(spin_dir, + cost_function.lib.int32)) + # cost_function.spin_freq_hz=self.cartpole_dancer.freq + elif policy=='balance': + cost_function.balance_dir=self.cartpole_dancer.option + elif policy=='shimmy': + cost_function.shimmy_freq_hz=self.cartpole_dancer.freq + cost_function.shimmy_amp=self.cartpole_dancer.amp + cost_function.shimmy_freq2_hz=self.cartpole_dancer.freq2 + cost_function.shimmy_amp2=self.cartpole_dancer.amp2 + cost_function.shimmy_duration=self.cartpole_dancer.endtime + cost_function.shimmy_dir=self.cartpole_dancer.option + elif policy=='cartonly': + cost_function.cartonly_freq_hz=self.cartpole_dancer.freq + cost_function.cartonly_amp=self.cartpole_dancer.amp + cost_function.cartonly_duty_cycle=float(self.cartpole_dancer.option) + elif policy=='cartwheel': + cost_function.cartwheel_cycles=self.cartpole_dancer.amp + elif policy=='toandfro': + cost_function.cartwheel_cycles=self.cartpole_dancer.amp + else: + log.error(f"policy '{policy}' is unknown") + + + # ********************************** + # Now set the state trajectories to follow and for each type of dance step movement + self.clear_traj() + if policy== 'spin': # spin pole CW or CCW depending on target_equilibrium up or down + # spin is handled entirely by a special cost function in cartpole_dancer_cost.py + pass + elif policy=='balance': # balance upright or down at desired cart position + balance_dir = gui_target_equilibrium*to_numpy(cost_function.balance_dir) + target_angle = np.pi * (1 - balance_dir) / 2 # either 0 for up and pi for down + self.add_traj(POSITION_IDX,gui_target_position) + # traj[state_utilities.ANGLE_SIN_IDX, :] = np.sin(target_angle) + if balance_dir>0: + self.add_traj(ANGLE_IDX,0) # for up, target angle zero, which is increases linearly with angle, unlike cos which only goes away from 1 like 1-angle^2 + else: + self.add_traj(ANGLE_COS_IDX,-1) # for down, avoid the 2pi cut ot +pi/-pi by targetting cos to as negative as possible + + self.add_traj(ANGLED_IDX,0) + self.add_traj(POSITIOND_IDX,0) + elif policy=='shimmy': # cart follows a desired cart position shimmy while keeping pole up or down + f0 = to_numpy(cost_function.shimmy_freq_hz) # seconds + a0 = to_numpy(cost_function.shimmy_amp) # meters + if self.policy_changed or self.step_start_time is None: + log.debug(f'shimmy with freq={f0}Hz and amp={a0}m restarted at time={time}') + self.step_start_time=time + # shimmy_endtime=self._time_policy_changed+cost_function.shimmy_duration + # compute times from current time to end of horizon + time_since_step_started=time-self.step_start_time + horizon_endtime = time_since_step_started + self.mpc_horizon * dt + # time for shimmy must be relative to start of shimmy step for freq ramp to make sense + times = np.linspace(time_since_step_started, horizon_endtime, num=self.mpc_horizon, dtype=np.float32) + # time_frac=times/cost_function.shimmy_duration + # f1 = cost_function.shimmy_freq2_hz # seconds + # a1 = cost_function.shimmy_amp2 # meters + + # compute the shimmmy trajectory over the horizon + # amps=a0+time_frac*(a1-a0) + # freqs=f0+time_frac*(f1-f0) + + # print(f'abs_time={time:.2f}, rel_time={times[0]:.2f} time_frac={time_frac[0]:.3f} amp={amps[0]:.3f} freq={freqs[0]:.2f}') + + cartpos=a0*np.sin(2*np.pi*f0*times, dtype=np.float32) + cartpos_d=np.gradient(cartpos,dt) + angle=np.arcsin(cartpos/(2*CARTPOLE_PHYSICAL_CONSTANTS.L), dtype=np.float32)/2 # compute the angle towards the center of shimmy to keep head of pole fixed as well as possible. L is half of pole length. + # we divide by 2 to reduce the required angle a bit more + angle_d=np.gradient(angle,dt) + self.add_traj(POSITION_IDX, gui_target_position + cartpos) + self.add_traj(ANGLE_IDX,angle) # keep pole pointing towards center of shimmy to minimize pole tip movement + self.add_traj(ANGLED_IDX,angle_d) + self.add_traj(POSITIOND_IDX, cartpos_d) + elif policy=='cartonly': # cart follows the trajectory, pole ignored + f0 = to_numpy(cost_function.cartonly_freq_hz) # seconds + a0 = to_numpy(cost_function.cartonly_amp) # meters + if self.policy_changed or self.step_start_time is None: + log.debug(f'cartonly with freq={f0}Hz and amp={a0}m restarted at time={time}') + self.step_start_time=time + # shimmy_endtime=self._time_policy_changed+cost_function.shimmy_duration + # compute times from current time to end of horizon + time_since_step_started=time-self.step_start_time + horizon_endtime = time_since_step_started + self.mpc_horizon * dt + # time for shimmy must be relative to start of shimmy step for freq ramp to make sense + times = np.linspace(time_since_step_started, horizon_endtime, num=self.mpc_horizon, dtype=np.float32) + # sawtooth is out for now to avoid sharp turns at ends + # cartpos = a0 * square((2 * np.pi * f0) * times, duty=cost_function.cartonly_duty_cycle) # duty=.5 makes square with equal halfs https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html + cartpos = a0 * sawtooth((2 * np.pi * f0) * times, width=to_numpy(cost_function.cartonly_duty_cycle)) # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html + # cartpos = a0 * np.sin((2 * np.pi * f0) * times) # width=.5 makes triangle https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sawtooth.html + cartvel = np.gradient(cartpos, dt) + cartpos_vector = gui_target_position + cartpos + self.add_traj(ANGLE_COS_IDX,-1) + self.add_traj(POSITION_IDX,cartpos_vector) + self.add_traj(POSITIOND_IDX, cartvel) + elif policy=='cartwheel' or policy=='toandfro': + # cartwheel starts with balance, once balanced the cartwheels start, after the cartwheels we again balance + # toandfro swings one way and then the other immediately, must have cycles>2 + + # determine state transitions based on current state + angle = state[state_utilities.ANGLE_IDX] + angle_d = state[state_utilities.ANGLED_IDX] + + # need to convert cartwheel cycles to real float since equals test on tf.Variable does not work if object does not change like it does not with assignment in update_attributes. + # also cartwheel cycles can be set to float during dance above + self.cartwheel_cycles=to_numpy(cost_function.cartwheel_cycles) + if self.policy_changed or self.cartwheel_cycles!=self.last_cartwheel_cycles: + self.cartwheel_direction=np.sign(self.cartwheel_cycles) + self.set_cartwheel_state('before',time) + self.cartwheel_cycles_to_do=np.abs(self.cartwheel_cycles) + self.last_cartwheel_cycles=self.cartwheel_cycles + + # first update state depending on cartwheel state, cartpole state, and time + if self.cartwheel_state=='before': + # if before cartwheel, we need to get balanced + if self.cartwheel_cycles_to_do>0 and self.is_pole_balanced(state): + self.set_cartwheel_state('starting',time) + self.cartwheel_starttime=time + elif self.cartwheel_state=='starting': + # start the free fall in the correct direction using angle_d indicator function + # if pole is falling in correct direction and fast enough and has fallen enough move to free fall + if np.abs(angle_d)>to_numpy(self.cost_function.cartwheel_freefall_angle_limit_deg)*np.pi/180\ + and np.sign(angle_d)==self.cartwheel_direction \ + and np.abs(angle)>to_numpy(self.cost_function.cartwheel_freefall_angled_limit_deg_per_sec)*np.pi/180: + self.cartwheel_cycles_to_do-=1 + self.set_cartwheel_state('during',time) + elif self.cartwheel_state=='during': + # during the cartwheel, we don't go to 'after' until the angle has exceeded some angle and spin speed + if np.abs(angle)>to_numpy(self.cost_function.cartwheel_freefall_to_after_angle_limit_deg)*np.pi/180: + if self.cartwheel_cycles_to_do>0: + self.set_cartwheel_state('before',time) + if policy=='toandfro': + self.cartwheel_direction= -self.cartwheel_direction # go other direction this time + else: + self.set_cartwheel_state('after',time) + elif self.cartwheel_state=='after': + pass # end state, don't get out of it except by starting a new cartwheel + + # now set the target trajectory objectives according to state + if self.cartwheel_state=='before' or self.cartwheel_state=='after': # just get balanced again + self.add_traj(POSITION_IDX, gui_target_position) + self.add_traj(ANGLE_IDX, 0) + self.add_traj(ANGLED_IDX, 0) + self.add_traj(POSITIOND_IDX, 0) + + elif self.cartwheel_state=='starting': # we just try to get the pole spinning the correct direction + self.add_traj(POSITION_IDX, gui_target_position) + self.add_traj(POSITIOND_IDX, 0) + self.add_traj(ANGLED_IDX, 1e6*self.cartwheel_direction) + + elif self.cartwheel_state=='during': # free fall, just get the cart back to target position + self.add_traj(POSITION_IDX, gui_target_position) + self.add_traj(POSITIOND_IDX, 0) + + else: + log.error(f'cost policy "{policy}" is unknown') + + + self.compile_traj_for_cost_function() + + self.set_status_text(time, state, cost_function) # update CartPoleSimulation GUI status line (if it exists) + + + def add_traj(self, idx:int, traj:Union[float,np.ndarray]): + """ Add a target trajectory vector to the target states trajectories. + + :param idx: the state index, e.g. POSITION + :param traj: either a float value or a vector with length self.mpc_horizon + """ + traj_vec=None + + self.traj_states_to_consider.append(idx) + + if isinstance(traj,(float,int, np.float32)): + traj_vec=np.full(shape=(1,self.mpc_horizon),fill_value=traj,dtype=np.float32) + elif isinstance(traj, np.ndarray): + if len(traj.shape)>1 or traj.shape[0]!=self.mpc_horizon: + raise AttributeError(f'trajectory {traj} must be a vector with length self.mpc_horizon') + traj_vec=traj + else: + raise AttributeError(f'trajectory vector {traj} must be a scalar int,float value or np.ndarray or tf.Tensor') + + self.traj[idx,:]=traj_vec # put this vector to 2d array of trajectories + + def clear_traj(self): + """ Clear the target state trajectories""" + # initialize desired trajectory with 0 entries, will fill appropriate rows with target state trajectories + + self.traj = np.full(shape=(NUM_STATES, self.mpc_horizon), + fill_value=0., dtype=np.float32) # use numpy not tf, too hard to work with immutable tensors + self.traj_states_to_consider=[] + + def compile_traj_for_cost_function(self): + """ Compile the target state trajectories to a fixed-size tensor is used in cartpole_dancer_cost.py to + measure the rollout costs. Transfer the target states trajectory and effective cost weighting vector to cost function. + """ + # states are "angle", "angleD", "angle_cos", "angle_sin", "position", "positionD" + raw_state_cost_weights = np.array([to_numpy(self.cost_function.pole_angle_weight), + to_numpy(self.cost_function.pole_swing_weight), + to_numpy(self.cost_function.pole_angle_weight), + to_numpy(self.cost_function.pole_angle_weight), + to_numpy(self.cost_function.cart_pos_weight), + to_numpy(self.cost_function.cart_vel_weight)]) + effective_cost_weights=np.zeros([1,NUM_STATES]) + np.put(effective_cost_weights, self.traj_states_to_consider,raw_state_cost_weights[self.traj_states_to_consider]) + + + updated_attributes = {} # empty dict + updated_attributes['target_trajectory'] = tf.Variable(self.traj, dtype=tf.float32) + updated_attributes['effective_traj_cost_weights'] = tf.Variable(effective_cost_weights, dtype=tf.float32) + update_attributes(updated_attributes, self.cost_function) # update the cost_fuction tf.Variable's to give access to them in compiled cost_function + + def set_cartwheel_state(self,new_state:str,time:float): + """Sets the cartwheel state and the time the state changed if it did + """ + if self.cartwheel_state is None or self.cartwheel_state!=new_state: + self.cartwheel_time_state_changed=time + log.debug(f'changed state from {self.cartwheel_state}->{new_state} at t={time:.3f}') + self.cartwheel_state=new_state + + def is_pole_balanced(self, state): + """ Returns True if angle and angle_d are sufficiently small""" + return np.abs(state[state_utilities.ANGLE_IDX]) None: + s = self.get_status_string(time, state, cost_function) + CartPoleMainWindow.set_status_text(s) + + def get_status_string(self, time, state, cost_function): + """ Returns a string describing current objective trajectory or control target""" + policy: str = cost_function.policy + if policy == 'dance': + return # status of cartpole GUI set by dancer + gui_target_position = float(cost_function.target_position) # GUI slider position + gui_target_equilibrium = float( + cost_function.target_equilibrium) # GUI switch +1 or -1 to make pole target up or down position + + if policy == 'balance': + dir = to_numpy(cost_function.balance_dir) + s = f'Policy: balance/{dir} pos={gui_target_position:.1f}m' + elif policy == 'spin': + dir_int = cost_function.spin_dir.numpy() + dir='cw' if dir_int<0 else 'ccw' + s = f'Policy: spin/{dir}*up/down pos={gui_target_position:.1f}m' + elif policy == 'shimmy': + s = f'Policy: shimmy pos={gui_target_position:.1f}m freq={float(cost_function.shimmy_freq_hz):.1f}Hz amp={float(cost_function.shimmy_amp):.3f}m' + elif policy == 'cartonly': + s = f'Policy: cartonly pos={gui_target_position:.1f}m freq={float(cost_function.cartonly_freq_hz):.1f}Hz amp={float(cost_function.cartonly_amp):.3f}m' + elif policy == 'cartwheel': + s = f'Policy: cartwheel pos={gui_target_position:.1f}m state={self.cartwheel_state} cycles_to_do={self.cartwheel_cycles_to_do} angle={state[state_utilities.ANGLE_IDX]*180/np.pi:.1f}deg angle_d={state[state_utilities.ANGLED_IDX]*180/np.pi:.1f}deg/s' + elif policy == 'toandfro': + s = f'Policy: toandfro pos={gui_target_position:.1f}m state={self.cartwheel_state} cycles_to_do={self.cartwheel_cycles_to_do} angle={state[state_utilities.ANGLE_IDX]*180/np.pi:.1f}deg angle_d={state[state_utilities.ANGLED_IDX]*180/np.pi:.1f}deg/s' + else: + s = f'unknown/not implemented string' + self.last_status_text=s + return s + + + def decode_string(self,tfstring:tf.Variable): + """ Decodes tensorflow string to unicode python str. + :param tfstring: the tf.Variable with dtype 'string'. + + :returns: If tfstring is tf.Variable, returns tfstring.numpy().decode('utf-8'). If tfstring is already str, returns result of tfstring.encode().decode('utf-8') + """ + if isinstance(tfstring,str): return tfstring.encode().decode('utf-8') + return tfstring.numpy().decode('utf-8') + + def keyboard_input(self, c): + if c == 'M': # switch dance step (mode) + newpol=self.next_policy() + update_attributes({'policy':newpol},self.cost_function) + + def next_policy(self)->str: + curpolicy = self.decode_string(self.cost_function.policy) + ind=0 + try: + ind=cartpole_trajectory_generator.POLICIES.index(curpolicy)+1 + if ind==len(cartpole_trajectory_generator.POLICIES): ind=0 + except ValueError as e: + log.error(f'error cycling policy {e}') + newpol=cartpole_trajectory_generator.POLICIES_NUMBERED[ind] + print(f'changed policy from {curpolicy} -> {newpol}') + return newpol + + def print_keyboard_help(self): + print('M cycle dance policy') diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py index ee811a52..e3c29e4c 100644 --- a/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/default.py @@ -4,21 +4,50 @@ from SI_Toolkit.computation_library import TensorType from Control_Toolkit.Cost_Functions import cost_function_base -from others.globals_and_utils import load_config +from others.globals_and_utils import load_config, load_or_reload_config_if_modified from CartPole.cartpole_model import TrackHalfLength -from CartPole.state_utilities import ANGLE_IDX, POSITION_IDX +from CartPole.state_utilities import ANGLE_IDX, POSITION_IDX, ANGLED_IDX -# load constants from config file -config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r")) +class default(cost_function_base): + + # all stage costs together + def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType): + """ + Computes costs of all stages, i.e., costs of all rollouts for each timestep. + + :param states: Tensor of states [rollout, timestep, state]. + rollout dimension is the batch dimension of the rollouts, e.g. 1000 parallel rollouts. + timestep dimension are the timesteps of each rollout, e.g. 50 timesteps. + state is the state vector, defined by the STATE_INDICES in [state_utilities.py](../CartPole/state_utilities.py) + :param inputs: control input, e.g. cart acceleration + :param previous_input: previous control input, can be used to compute cost of changing control input + :return: Tensor of [rollout, timestamp] over all rollouts and horizon timesteps + """ + (config,reloaded)=load_or_reload_config_if_modified(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"),every=1,search_path=['CartPoleSimulation']) + self.dd_weight = config["CartPole"]["default"]["dd_weight"] + self.cc_weight = config["CartPole"]["default"]["cc_weight"] + self.ep_weight = config["CartPole"]["default"]["ep_weight"] + self.ccrc_weight = config["CartPole"]["default"]["ccrc_weight"] + self.R = config["CartPole"]["default"]["R"] + + dd = self.dd_weight * self._distance_difference_cost(states[:, :, POSITION_IDX]) # compute cart position target distance cost + cc = self.cc_weight * self.R*self._CC_cost(inputs) # compute the cart acceleration control cost + ccrc = 0 # compute the control change cost + angle=states[:, :, ANGLE_IDX] + angleD=states[:,:,ANGLED_IDX] + ep = self.ep_weight * self._E_pot_cost(angle) + # if previous_input is not None: + # ccrc = ccrc_weight * self._control_change_rate_cost(inputs, previous_input) + + # if self.controller.target_positions_vector[0] > 0: # TODO why is potential cost positive for this case and negative otherwise? + # stage_cost = dd + ep + cc + ccrc + # else: + stage_cost = dd + ep + cc + ccrc + + return stage_cost -dd_weight = config["CartPole"]["default"]["dd_weight"] -cc_weight = config["CartPole"]["default"]["cc_weight"] -ep_weight = config["CartPole"]["default"]["ep_weight"] -ccrc_weight = config["CartPole"]["default"]["ccrc_weight"] -R = config["CartPole"]["default"]["R"] -class default(cost_function_base): # cost for distance from track edge def _distance_difference_cost(self, position): """Compute penalty for distance of cart to the target position""" @@ -35,7 +64,7 @@ def _E_pot_cost(self, angle): # actuation cost def _CC_cost(self, u): - return R * self.lib.sum(u**2, 2) + return self.lib.sum(u**2, 2) # final stage cost def get_terminal_cost(self, terminal_states: TensorType): @@ -76,13 +105,3 @@ def _control_change_rate_cost(self, u, u_prev): ) return self.lib.sum((u - u_prev_vec) ** 2, 2) - # all stage costs together - def get_stage_cost(self, states: TensorType, inputs: TensorType, previous_input: TensorType): - dd = dd_weight * self._distance_difference_cost(states[:, :, POSITION_IDX]) - ep = ep_weight * self._E_pot_cost(states[:, :, ANGLE_IDX]) - cc = cc_weight * self._CC_cost(inputs) - ccrc = 0 - # if previous_input is not None: - # ccrc = ccrc_weight * self._control_change_rate_cost(inputs, previous_input) - stage_cost = dd + ep + cc + ccrc - return stage_cost diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py index a1b7671f..1c5e991e 100644 --- a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary.py @@ -10,7 +10,7 @@ from CartPole.state_utilities import ANGLE_IDX, POSITION_IDX # load constants from config file -config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r")) +config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r")) dd_weight = config["CartPole"]["quadratic_boundary"]["dd_weight"] cc_weight = config["CartPole"]["quadratic_boundary"]["cc_weight"] diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py index a64f39db..39e63407 100644 --- a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_grad.py @@ -10,7 +10,7 @@ from CartPole.state_utilities import ANGLE_IDX, ANGLED_IDX, POSITION_IDX #load constants from config file -config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r")) +config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r")) dd_weight = config["CartPole"]["quadratic_boundary_grad"]["dd_weight"] cc_weight = config["CartPole"]["quadratic_boundary_grad"]["cc_weight"] diff --git a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py index e15f9297..e20a5138 100644 --- a/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py +++ b/Control_Toolkit_ASF/Cost_Functions/CartPole/quadratic_boundary_nonconvex.py @@ -10,7 +10,7 @@ from CartPole.state_utilities import ANGLE_IDX, ANGLED_IDX, POSITION_IDX #load constants from config file -config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_function.yml"), "r")) +config = safe_load(open(os.path.join("Control_Toolkit_ASF", "config_cost_functions.yml"), "r")) dd_weight = config["CartPole"]["quadratic_boundary_nonconvex"]["dd_weight"] cc_weight = config["CartPole"]["quadratic_boundary_nonconvex"]["cc_weight"] diff --git a/Control_Toolkit_ASF/Cost_Functions/__init__.py b/Control_Toolkit_ASF/Cost_Functions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/Control_Toolkit_ASF/config_controllers.yml b/Control_Toolkit_ASF/config_controllers.yml index 2e383d72..e699b172 100644 --- a/Control_Toolkit_ASF/config_controllers.yml +++ b/Control_Toolkit_ASF/config_controllers.yml @@ -1,13 +1,20 @@ -mpc: - optimizer: rpgd-tf +# this config specifies the controller options. +# to select a particular controller, see TODO where is the controller specified? +# controller_mpc.py uses the mpc: section below +# see config_optimizers to specify the parameters for the optimizer, e.g. mppi + +# when running physical-cartpole, its globals.py overwrites the config regarding controller & optimizer. For simulator, options are specified completely in .yml files. + +mpc: # selects Control_Tookit/Controllers/controller_mpc.py as the controller + optimizer: rpgd-tf # selects the optimizer, see CartPoleSimulation/ControlToolkit_ASF/config_optimizers.yml. Popular options are 'mppi' and 'rpgd-tf' predictor_specification: "ODE_TF" # Can be "ODE", "ODE_TF", network/GP name (possibly with path) e.g. 'GRU-6IN-32H1-32H2-5OUT-0'/'SGP_30' or a name of a custom predictor. For more info see config_predictors in SI_Toolkit_ASF - cost_function_specification: default # One of "default", "quadratic_boundary_grad", "quadratic_boundary_nonconvex", "quadratic_boundary" + cost_function_specification: cartpole_dancer_cost # One of "default", "quadratic_boundary_grad", "quadratic_boundary_nonconvex", "quadratic_boundary" "cartpole_dancer_cost" computation_library: tensorflow # One of "numpy", "tensorflow", "pytorch". Defaults to "numpy" if none given. controller_logging: false -mppi-cartpole: +mppi-cartpole: # TODO what is this section for? seed: null # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!) - mpc_horizon: 35 # steps - num_rollouts: 3500 # Number of Monte Carlo samples + mpc_horizon: 30 # steps + batch_size: 3500 # Number of Monte Carlo samples update_every: 1 # Cost weighted update of inputs every ... steps predictor_specification: "ODE_TF" # Can be "ODE", "ODE_TF", network/GP name (possibly with path) e.g. 'GRU-6IN-32H1-32H2-5OUT-0'/'SGP_30' or a name of a custom predictor. For more info see config_predictors in SI_Toolkit_ASF cost_function_specification: default # One of "default", "quadratic_boundary_grad", "quadratic_boundary_nonconvex", "quadratic_boundary" @@ -23,7 +30,6 @@ mppi-cartpole: LBD: 100.0 # Cost parameter lambda NU: 1000.0 # Exploration variance SQRTRHOINV: 0.02 # Sampling variance - GAMMA: 1.00 # Future cost discount SAMPLING_TYPE: "interpolated" # One of ["iid", "random_walk", "uniform", "repeated", "interpolated"] controller_logging: False # Collect and show detailed insights into the controller's behavior WASH_OUT_LEN: 100 # Only matters if RNN used as predictor; For how long MPPI should be desactivated (replaced either with LQR or random input) to give memory units time to settle @@ -47,7 +53,6 @@ custom-mpc-scipy: controller_logging: True do-mpc-discrete: mpc_horizon: 50 # steps - num_rollouts: 1 # Initial positions position_init: 0.0 positionD_init: 0.0 @@ -57,7 +62,6 @@ do-mpc-discrete: do-mpc: seed: null # If null, random seed based on datetime is used mpc_horizon: 50 # steps - num_rollouts: 1 # Perturbation factors: # Change of output from optimal p_Q: 0.00 diff --git a/Control_Toolkit_ASF/config_cost_function.yml b/Control_Toolkit_ASF/config_cost_function.yml deleted file mode 100644 index 7ecd18cf..00000000 --- a/Control_Toolkit_ASF/config_cost_function.yml +++ /dev/null @@ -1,33 +0,0 @@ -cost_function_name_default: default -# Default value is used if controller does not specify a cost_function_specification (leave empty) -# Cost functions are grouped by environment name in a folder within Control_Toolkit_ASF.Cost_Functions -# Check config.yml to learn more on how cost_functions are selected - -CartPole: - default: - dd_weight: 600.0 - ep_weight: 20000.0 - ekp_weight: 80.0 - ekc_weight: 20.0 - cc_weight: 1.0 - ccrc_weight: 1.0 - R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! - quadratic_boundary_grad: - dd_weight: 600.0 - ep_weight: 20000.0 - ekp_weight: 80.0 - cc_weight: 1.0 - ccrc_weight: 1.0 - R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! - quadratic_boundary_nonconvex: - dd_weight: 600.0 - ep_weight: 20000.0 - cc_weight: 1.0 - ccrc_weight: 1.0 - R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! - quadratic_boundary: - dd_weight: 600.0 - ep_weight: 20000.0 - cc_weight: 1.0 - ccrc_weight: 1.0 - R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! diff --git a/Control_Toolkit_ASF/config_cost_functions.yml b/Control_Toolkit_ASF/config_cost_functions.yml new file mode 100644 index 00000000..716fbac5 --- /dev/null +++ b/Control_Toolkit_ASF/config_cost_functions.yml @@ -0,0 +1,94 @@ +# this config is to specify specific cost function parameters. The actual cost function used is specified in config_optimers.yml + +cost_function_name_default: default +# Default value is used if controller does not specify a cost_function_specification (leave empty) +# Cost functions are grouped by environment name in a folder within Control_Toolkit_ASF.Cost_Functions +# Check CartPoleSimulation/config.yml to learn more on how cost_functions are selected TODO I don't see any more information there, and which config.yml is this referring to? + +CartPole: + default: + dd_weight: 600.0 + ep_weight: 20000 + ekp_weight: 80.0 + ekc_weight: 20.0 + cc_weight: 1.0 + ccrc_weight: 1.0 + R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! + quadratic_boundary_grad: + dd_weight: 600.0 + ep_weight: 20000.0 + ekp_weight: 80.0 + cc_weight: 1.0 + ccrc_weight: 1.0 + R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! + quadratic_boundary_nonconvex: + dd_weight: 600.0 + ep_weight: 20000.0 + cc_weight: 1.0 + ccrc_weight: 1.0 + R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! + quadratic_boundary: + dd_weight: 600.0 + ep_weight: 20000.0 + cc_weight: 1.0 + ccrc_weight: 1.0 + R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in optimizer config, as it plays a special role in the optimization algorithm as well as is used in cost functions! + cartpole_dancer_cost: # values must be explicitly float or scientific format if needed, since they are cast to int type if provided here as int, which might not work for some ops + # the policy (dance step or move to control). It is trailed by an int to code the policy as an int for tensorflow XLA compiled code. + # e.g. dance0 will create (in update_attributes call) policy='dance' and policy_number=0 + # this hack is to allow branching in XLA code that lacks string compare operator + # see others\globals_and_utilities.py\update_attributes method + + policy: shimmy3 # dance0 (follow csv file) balance1 spin2 shimmy3 cartonly4 cartwheel5 toandfro6 + + # tobi cannot get conditional branch to work for following option in cartpole_dancer_cost.py +# distance_norm: 0 # 0=abs, 1=rmse, 2=mse are possible distance metrics + stage_cost_factor: 1e0 # how much to weight up the state costs + track_edge_barrier_cost: 1e11 # how much to penalize cart at edge of track + track_length_fraction: 0.95 # fraction of half track length to allow free movement of cart before hitting barrier function + + # cost weightings for the different state components for following desired cart state trajectory + cart_pos_weight: 2e6 # weight on cart pos error + pole_angle_weight: 1e6 # weight on difference of angle + cart_vel_weight: 3e2 # weight on difference of cart velocity + pole_swing_weight: 4e+4 # cart angle derivative weight + + # following weight the control and its change + control_cost_weight: 1e1 # cost of control + control_cost_change_weight: 1e0 # cost of changing control + + # spin + spin_dir: 1 # cw+1 ccw-1 1 or cw, -1 or ccw + upright_pole_energy_multiple_to_count_spin_direction_cost: .25 # pole must have this excess total energy before we start counting spin direction energy + spin_energy_weight: 5e+6 # cost factor only for spin behavior to weight the spin energy + + + # balance + balance_dir: 1 # 1 for up, -1 for down, multiplied by target_equilibrium + + # shimmy does a sinusoidal dance of cart around target position with period in seconds and amplitude in meters + shimmy_freq_hz: 1.125 # for 160bpm song, 2.25 Hz is beat, 1.125 is half beat, 0.5625 is quarter beat + shimmy_amp: .04 # in practice, 0.03 results in about .07m amplitude, not clear why + shimmy_freq2_hz: 2. + shimmy_amp2: .025 + shimmy_duration: 8. # for shimmy that lasts a finite time and ramps from freq to freq2 and amp to amp2 + shimmy_dir: up # up, down +# shimmy_plot: 0 # to plot the shimmy function + + # cartonly only controls cart position, pole is ignored. Trajectory is triangular with amp and per + cartonly_amp: 0.05 # amplitude in meters (half of peak to peak of triangle wave) + cartonly_freq_hz: 1.125 + cartonly_duty_cycle: 0.5 # set to .5 for triangle, 0 or 1 for sawtooth + + # cartwheel and toandfro steps. + cartwheel_cycles: +6 # positive cw, negative ccw. For toandfro, must be >2 to do complete back and forth steps + cartwheel_balance_angle_limit_deg: 10.0 + cartwheel_balance_angled_limit_deg_per_sec: 30.0 + cartwheel_freefall_angle_limit_deg: 20. + cartwheel_freefall_angled_limit_deg_per_sec: 30. + cartwheel_freefall_to_after_angle_limit_deg: 60. +# cartwheel_target_duration_s: 1.1 + + dance_song_file: 'others/Media/Rolling_Stones_Satisfaction.mp3' # no spaces or special chars, no / . - etc.... + dance_csv_file: 'Control_Toolkit_ASF/Cost_Functions/CartPole/cartpole_dance-satisfaction.csv' + dance_song_playback_rate: 1. # rate to match song speed compared to real time simulation rate, we should set to 1 for song to sound native but code must run fast enough then diff --git a/Control_Toolkit_ASF/config_optimizers.yml b/Control_Toolkit_ASF/config_optimizers.yml index 8ee940f7..d1d7340b 100644 --- a/Control_Toolkit_ASF/config_optimizers.yml +++ b/Control_Toolkit_ASF/config_optimizers.yml @@ -1,9 +1,40 @@ +# This config specifies the parameters for all the different MPC optimizers, e.g. mppi, rpgd-tf +# To select a particular optimizer from here, see optimizer: key in the config_controllers.yml selection of controller. +mppi: # used as basic MPC optimizer for cartpole (tobi) + seed: null # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!) + mpc_horizon: 40 # steps, 40 steps of 0.02 is .8s + num_rollouts: 700 # Number of Monte Carlo samples + cc_weight: 1.0 + R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in cost functions config, as it plays a special role in the optimization algorithm as well as is used in cost functions! + LBD: 100.0 # Cost parameter lambda + NU: 1000.0 # Exploration variance + SQRTRHOINV: 0.03 # Sampling variance + period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated +rpgd-tf: # this is the tensorflow (tf) version of the Resampling Parallel Gradient Descent (RPGD) optimizer reported in ICRA paper + # the options below are passed directly to the constructor __init__ method of Control_Toolkit/Optimizers/optimizer_rpgd_tf.py and must match the expected options + seed: null # If null, random seed based on datetime is used + mpc_horizon: 40 # steps + SAMPLING_DISTRIBUTION: uniform # "normal" or "uniform" + period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated + learning_rate: 0.05 + adam_beta_1: 0.9 + adam_beta_2: 0.999 + adam_epsilon: 1.0e-08 + gradmax_clip: 5 + rtol: 1.0e-3 + num_rollouts: 32 + opt_keep_k_ratio: .75 + outer_its: 2 + resamp_per: 10 + sample_stdev: 0.5 + warmup: false + warmup_iterations: 250 cem-tf: seed: null # If null, random seed based on datetime is used mpc_horizon: 40 # steps cem_outer_it: 3 #how many outer iterations to use cem_initial_action_stdev: 0.5 - num_rollouts: 200 #how many rollouts per outer cem iteration + batch_size: 200 #how many rollouts per outer cem iteration cem_stdev_min: 0.01 cem_best_k: 40 warmup: false @@ -12,7 +43,7 @@ cem-gmm-tf: seed: null # If null, random seed based on datetime is used mpc_horizon: 40 # steps cem_outer_it: 3 #how many outer iterations to use - num_rollouts: 200 #how many rollouts per outer cem iteration + batch_size: 200 #how many rollouts per outer cem iteration cem_stdev_min: 0.01 cem_initial_action_stdev: 0.5 cem_best_k: 40 @@ -20,7 +51,7 @@ cem-naive-grad-tf: seed: null # If null, random seed based on datetime is used mpc_horizon: 35 # steps cem_outer_it: 1 # how many outer iterations to use - num_rollouts: 200 # how many rollouts per outer cem iteration + batch_size: 200 # how many rollouts per outer cem iteration cem_stdev_min: 0.1 cem_initial_action_stdev: 0.5 cem_best_k: 40 @@ -33,7 +64,7 @@ cem-grad-bharadhwaj-tf: adam_beta_1: 0.9 adam_beta_2: 0.999 adam_epsilon: 1.0e-08 - num_rollouts: 32 + batch_size: 32 cem_best_k: 8 cem_outer_it: 2 cem_initial_action_stdev: 2 @@ -50,7 +81,7 @@ gradient-tf: adam_epsilon: 1.0e-07 rtol: 1.0e-3 gradient_steps: 5 - num_rollouts: 40 + batch_size: 40 initial_action_stdev: 0.5 gradmax_clip: 5 warmup: false @@ -63,7 +94,7 @@ mppi-optimize-tf: adam_epsilon: 1.0e-7 #default: 1.0e-7 gradmax_clip: 1000 mpc_horizon: 35 # steps - num_rollouts: 400 # Number of Monte Carlo samples + batch_size: 400 # Number of Monte Carlo samples cc_weight: 1.0 R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in cost functions config, as it plays a special role in the optimization algorithm as well as is used in cost functions! LBD: 100.0 # Cost parameter lambda @@ -83,24 +114,8 @@ dist-adam-resamp2-tf: rtol: 1.0e-3 num_rollouts: 32 opt_keep_k_ratio: 0.25 - outer_its: 2 - resamp_per: 10 - sample_stdev: 0.5 - warmup: false - warmup_iterations: 250 -rpgd-tf: - seed: null # If null, random seed based on datetime is used - mpc_horizon: 40 # steps - SAMPLING_DISTRIBUTION: uniform # "normal" or "uniform" - period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated - learning_rate: 0.05 - adam_beta_1: 0.9 - adam_beta_2: 0.999 - adam_epsilon: 1.0e-08 - gradmax_clip: 5 - rtol: 1.0e-3 - num_rollouts: 32 - opt_keep_k_ratio: 0.25 + batch_size: 32 + opt_keep_k: 8 outer_its: 2 resamp_per: 10 sample_stdev: 0.5 @@ -137,6 +152,7 @@ rpgd-me-param-tf: maximum_entropy_alpha: 0.0 gradmax_clip: 10 rtol: 1.0e-3 + opt_keep_k: 0 num_rollouts: 32 opt_keep_k_ratio: 0.25 outer_its: 25 @@ -156,6 +172,7 @@ rpgd-ml-tf: maximum_entropy_alpha: 0.1 gradmax_clip: 10 rtol: 1.0e-3 + opt_keep_k: 8 num_rollouts: 32 opt_keep_k_ratio: 0.25 outer_its: 5 @@ -173,6 +190,7 @@ rpgd-particle-tf: adam_epsilon: 1.0e-08 gradmax_clip: 10 rtol: 1.0e-3 + opt_keep_k: 8 num_rollouts: 32 opt_keep_k_ratio: 0.25 outer_its: 5 @@ -183,7 +201,6 @@ rpgd-particle-tf: mppi-var-tf: seed: null # If null, random seed based on datetime is used mpc_horizon: 35 # steps - num_rollouts: 400 # Number of Monte Carlo samples period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated cc_weight: 1.0 R: 1.0 # How much to punish Q @@ -195,16 +212,6 @@ mppi-var-tf: STDEV_min: 0.01 # Maximal variance for sampling STDEV_max: 10 # Minimal sampling variance for sampling max_grad_norm: 100000 # max norm of gradient such that ||gradient||_2 -mppi: - seed: null # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!) - mpc_horizon: 35 # steps - num_rollouts: 3500 # Number of Monte Carlo samples - cc_weight: 1.0 - R: 1.0 # How much to punish Q, For MPPI YOU have to make sure that this is the same as in cost functions config, as it plays a special role in the optimization algorithm as well as is used in cost functions! - LBD: 100.0 # Cost parameter lambda - NU: 1000.0 # Exploration variance - SQRTRHOINV: 0.03 # Sampling variance - period_interpolation_inducing_points: 10 #interpolation stepsize when sampling, a random point is chosen every period_interpolation_inducing_points and horizon points in between are linearly interpolated random-action-tf: seed: null # Seed for rng, for MPPI only, put null to set random seed (do it when you generate data for training!) mpc_horizon: 35 # steps diff --git a/GUI/_CartPoleGUI_MainWindow.py b/GUI/_CartPoleGUI_MainWindow.py index 2acd397e..edd1eba0 100644 --- a/GUI/_CartPoleGUI_MainWindow.py +++ b/GUI/_CartPoleGUI_MainWindow.py @@ -1,6 +1,7 @@ """ Main window (and main class) of CartPole GUI """ +from typing import Any # Necessary only for debugging in Visual Studio Code IDE try: @@ -14,6 +15,8 @@ import sys from others.p_globals import ( + # k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, + TrackHalfLength, k, m_cart, m_pole, g, J_fric, M_fric, L, v_max, u_max, controlDisturbance, controlBias, TrackHalfLength, ) @@ -23,7 +26,7 @@ from PyQt6.QtWidgets import QMainWindow, QRadioButton, QApplication, QSlider, QVBoxLayout, \ QHBoxLayout, QLabel, QPushButton, QWidget, QCheckBox, \ QLineEdit, QMessageBox, QComboBox, QButtonGroup, QFrame -from PyQt6.QtCore import QThreadPool, QTimer, Qt +from PyQt6.QtCore import QThreadPool, QTimer, Qt, pyqtSignal, QObject, QRunnable # The main drawing functionalities are implemented in CartPole Class # Some more functions needed for interaction of matplotlib with PyQt6 from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas @@ -56,13 +59,47 @@ pass from GUI._ControllerGUI_NoiseOptionsWindow import NoiseOptionsWindow +from others.prefs import MyPreferences + +prefs=MyPreferences() # store and retrieve sticky values # Class implementing the main window of CartPole GUI -class MainWindow(QMainWindow): +class CartPoleMainWindow(QMainWindow): + + # class instance variable to let controllers etc put status into the window + CartPoleGuiStatusText:QLabel= None + + # class variable to allow objects to check if simulation is running, stopped, etc + CartPoleGuiSimulationState:str='stopped' # 'running', 'paused' + + # signal that emits CartPoleGuiSimulationState when simulation 'running' 'stopped' 'paused' state changes + # to use this signal, in class init, connect a method to process the signal, e.g. + # CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal.connect(self.process_signal) + # the process_signal(str) method gets passed one of the string signals when the main gui button is pressed. + # note that you MUST use the CartPoleMainWindow.CartPoleMainWindowInstance.CartPoleSimulationStateSignal signal that is bound by PyQt inside the *instance* of CartPoleMainWindow; + # do not try to bind to the unbound class variable CartPoleMainWindow.CartPoleSimulationStateSignal + CartPoleSimulationStateSignal:pyqtSignal=pyqtSignal(str) + + # class variable to access the one main window from any subclass, set in init + CartPoleMainWindowInstance:QMainWindow=None + + def set_status_text(text: str) -> None: + """ Sets the text at top of GUI window. Any class can use this via the static class method, e.g. like this: - def __init__(self, *args, **kwargs): - super(MainWindow, self).__init__(*args, **kwargs) + MainWindow.set_status_text(self.format_step(time)) + (the self.format_step(time) call formats the current dance step text) + + :param str: the string to show + """ + if not CartPoleMainWindow.CartPoleGuiStatusText is None and not text is None: + CartPoleMainWindow.CartPoleGuiStatusText.setText(text) + + def __init__(self, parent=None, *args, **kwargs): + super(CartPoleMainWindow, self).__init__(parent, *args, **kwargs) + self.speedup_measured = None # filled in by actual measured speedup relative to real wall clock + CartPoleMainWindow.CartPoleMainWindowInstance=self + # QObject.__init__(self) # region Create CartPole instance and load initial settings # Create CartPole instance @@ -144,16 +181,19 @@ def __init__(self, *args, **kwargs): layout = QVBoxLayout() # endregion + # region - Change geometry of the main window - self.setGeometry(300, 300, 2500, 1000) + self.setGeometry(300, 100, 1000, 800) # origin x,y size w,h # endregion # region - Matplotlib figures (CartPole drawing and Slider) # Draw Figure self.fig = Figure(figsize=(25, 10)) # Regulates the size of Figure in inches, before scaling to window size. + from matplotlib import gridspec + gs = gridspec.GridSpec(2,1, height_ratios=[4, 1]) # make pole plot 4 times larger than cart target position plot self.canvas = FigureCanvas(self.fig) - self.fig.AxCart = self.canvas.figure.add_subplot(211) - self.fig.AxSlider = self.canvas.figure.add_subplot(212) + self.fig.AxCart = self.canvas.figure.add_subplot(gs[0]) + self.fig.AxSlider = self.canvas.figure.add_subplot(gs[1]) self.fig.AxSlider.set_ylim(0, 1) self.CartPoleInstance.draw_constant_elements(self.fig, self.fig.AxCart, self.fig.AxSlider) @@ -162,6 +202,9 @@ def __init__(self, *args, **kwargs): lf = QVBoxLayout() lf.addWidget(self.canvas) + CartPoleMainWindow.CartPoleGuiStatusText = QLabel('(status)') + layout.addWidget(CartPoleMainWindow.CartPoleGuiStatusText) + # endregion # region - Radio buttons selecting current controller @@ -310,12 +353,12 @@ def __init__(self, *args, **kwargs): ip = QHBoxLayout() # Layout for initial position sliders self.initial_position_slider = QSlider(orientation=Qt.Orientation.Horizontal) self.initial_position_slider.setRange(-int(float(1000*TrackHalfLength)), int(float(1000*TrackHalfLength))) - self.initial_position_slider.setValue(0) + self.initial_position_slider.setValue(prefs.get('initial-position',0)) self.initial_position_slider.setSingleStep(1) self.initial_position_slider.valueChanged.connect(self.update_initial_position) self.initial_angle_slider = QSlider(orientation=Qt.Orientation.Horizontal) self.initial_angle_slider.setRange(-int(float(100*np.pi)), int(float(100*np.pi))) - self.initial_angle_slider.setValue(0) + self.initial_angle_slider.setValue(prefs.get('initial-angle',0)) self.initial_angle_slider.setSingleStep(1) self.initial_angle_slider.valueChanged.connect(self.update_initial_angle) ip.addWidget(QLabel("Initial position:")) @@ -400,6 +443,7 @@ def __init__(self, *args, **kwargs): l_text = QHBoxLayout() textbox_title = QLabel('CSV file name:') self.textbox = QLineEdit() + self.textbox.setText(prefs.get('last-csv-file','cartpole-history.csv')) l_text.addWidget(textbox_title) l_text.addWidget(self.textbox) layout.addLayout(l_text) @@ -461,8 +505,7 @@ def __init__(self, *args, **kwargs): # region -- Checkbox: Decide how the cartpole should be displayed self.cb_show_hanging_pole = QCheckBox('Show hanging pole', self) - if self.CartPoleInstance.show_hanging_pole: - self.cb_show_hanging_pole.toggle() + self.cb_show_hanging_pole.setChecked(self.CartPoleInstance.show_hanging_pole) self.cb_show_hanging_pole.toggled.connect(self.cb_show_hanging_pole_f) l_cb.addWidget(self.cb_show_hanging_pole) @@ -563,6 +606,7 @@ def experiment_thread(self): pass self.looper.start_loop() + while not self.terminate_experiment_or_replay_thread: if self.pause_experiment_or_replay_thread: time.sleep(0.1) @@ -591,6 +635,7 @@ def experiment_thread(self): decimals=2)) self.CartPoleInstance.save_history_csv(csv_name=csv_name, mode='save offline') + prefs.put('last-csv-file',csv_name) self.experiment_or_replay_thread_terminated = True @@ -747,6 +792,7 @@ def start_stop_button(self): else: self.start_thread() self.start_or_stop_action = "STOP!" + self.set_simulation_state('running') # If "START! / STOP!" button in "STOP!" mode... elif self.start_or_stop_action == 'STOP!': @@ -761,7 +807,17 @@ def start_stop_button(self): # It is implemented this way, because thread my terminate not only due "STOP!" button # (e.g. replay thread when whole experiment is replayed) self.start_or_stop_action = "START!" - + self.set_simulation_state('stopped') + + def set_simulation_state(self, state): + """ Used by CartPoleMainWindow to send signals to listeners for simulation state. + A signal is emitted on self.CartPoleSimulationStateSignal + + :param state: the str state + """ + CartPoleMainWindow.CartPoleGuiSimulationState = state + self.CartPoleSimulationStateSignal.emit(CartPoleMainWindow.CartPoleGuiSimulationState) + def pause_unpause_button(self): if self.simulator_mode == 'Physical CP': if self.pause_or_unpause_action == 'PAUSE' and self.start_or_stop_action == 'STOP!': @@ -778,10 +834,14 @@ def pause_unpause_button(self): self.pause_or_unpause_action = 'UNPAUSE' self.pause_experiment_or_replay_thread = True self.bp.setText("UNPAUSE") + self.set_simulation_state('paused') + elif self.pause_or_unpause_action == 'UNPAUSE' and self.start_or_stop_action == 'STOP!': self.pause_or_unpause_action = 'PAUSE' self.pause_experiment_or_replay_thread = False self.bp.setText("PAUSE") + self.set_simulation_state('running') + # Run thread. works for all simulator modes. def start_thread(self): @@ -804,9 +864,16 @@ def start_thread(self): # Set user-provided initial values for state (or its part) of the CartPole # Search implementation for more detail - # The following line is important as it let the user to set with the slider the starting target position + # The following line is important as it let the user set with the slider the starting target position # After the slider was reset at the end of last experiment # With the small sliders he can also adjust starting initial_state + self.initial_state = create_cartpole_state() + # don't reset the slider on restart experiment + # self.initial_position_slider.setValue(0) + # self.initial_angle_slider.setValue(0) + # reset state to slider values on restart + self.initial_state[ANGLE_IDX] = self.initial_angle_slider.value() / 100. + self.initial_state[POSITION_IDX] = self.initial_position_slider.value() / 1000. self.reset_variables(2, s=np.copy(self.initial_state), target_position=self.CartPoleInstance.target_position) if self.simulator_mode == 'Random Experiment': @@ -833,8 +900,10 @@ def start_thread(self): elif self.simulator_mode == 'Slider-Controlled Experiment' or self.simulator_mode == 'Random Experiment': worker = Worker(self.experiment_thread) worker.signals.finished.connect(self.finish_thread) + # Execute self.threadpool.start(worker) + # worker.my_worker_signal.emit('start') # finish_threads works for all simulation modes # Some lines mya be redundant for replay, @@ -845,10 +914,15 @@ def finish_thread(self): self.CartPoleInstance.use_pregenerated_target_position = False self.initial_state = create_cartpole_state() - self.initial_position_slider.setValue(0) - self.initial_angle_slider.setValue(0) + # don't reset the slider on restart experiment + # self.initial_position_slider.setValue(0) + # self.initial_angle_slider.setValue(0) + # reset state to slider values on restart + self.initial_state[ANGLE_IDX] = self.initial_angle_slider.value() / 100. + self.initial_state[POSITION_IDX] = self.initial_position_slider.value() / 1000. self.CartPoleInstance.s = self.initial_state + # Some controllers may collect they own statistics about their usage and print it after experiment terminated if self.simulator_mode != 'Replay': try: @@ -940,9 +1014,10 @@ def set_labels_thread(self): mean_dt_real = np.mean(self.looper.circ_buffer_dt_real) if mean_dt_real > 0: + self.speedup_measured=self.CartPoleInstance.dt_simulation / mean_dt_real self.labSpeedUp.setText('Speed-up (measured): x{:.2f}' - .format(self.CartPoleInstance.dt_simulation / mean_dt_real)) - sleep(0.1) + .format(self.speedup_measured)) + sleep(0.5) # rate of label update is 2Hz which is fast enough # Function to measure the time of simulation as experienced by user # It corresponds to the time of simulation according to equations only if real time mode is on @@ -1236,9 +1311,11 @@ def open_additional_noise_widget(self): def update_initial_position(self, value: str): self.initial_state[POSITION_IDX] = float(value) / 1000.0 + prefs.put('initial-position',value) def update_initial_angle(self, value: str): self.initial_state[ANGLE_IDX] = float(value) / 100.0 + prefs.put('initial-angle',value) # endregion @@ -1260,6 +1337,7 @@ def kick_pole(self): elif self.sender().text() == "Right": self.CartPoleInstance.s[ANGLED_IDX] -= .6 + # endregion diff --git a/GUI/_CartPoleGUI_worker_template.py b/GUI/_CartPoleGUI_worker_template.py index c8cf8637..a284ccb7 100644 --- a/GUI/_CartPoleGUI_worker_template.py +++ b/GUI/_CartPoleGUI_worker_template.py @@ -14,6 +14,10 @@ class WorkerSignals(QObject): class Worker(QRunnable): + # register to this signal to be updated about simulation start/stop/pause + my_worker_signal=pyqtSignal(str) + + def __init__(self, fn, *args, **kwargs): super(Worker, self).__init__() diff --git a/GUI/_ControllerGUI_MPPIOptionsWindow.py b/GUI/_ControllerGUI_MPPIOptionsWindow.py index 780bd8a2..e8d92c4f 100644 --- a/GUI/_ControllerGUI_MPPIOptionsWindow.py +++ b/GUI/_ControllerGUI_MPPIOptionsWindow.py @@ -34,7 +34,7 @@ def __init__(self): super(MPPIOptionsWindow, self).__init__() self.horizon_steps = controller_mppi_cartpole.mpc_horizon - self.num_rollouts = controller_mppi_cartpole.num_rollouts + self.batch_size = controller_mppi_cartpole.batch_size self.dd_weight = controller_mppi_cartpole.dd_weight self.ep_weight = controller_mppi_cartpole.ep_weight self.ekp_weight = controller_mppi_cartpole.ekp_weight * 1.0e1 @@ -73,13 +73,13 @@ def __init__(self): slider = QSlider(orientation=Qt.Orientation.Horizontal) slider.setRange(10, 3000) - slider.setValue(self.num_rollouts) + slider.setValue(self.batch_size) slider.setTickPosition(QSlider.TickPosition.TicksBelow) slider.setTickInterval(10) slider.setSingleStep(10) rollouts_options_layout.addWidget(slider) - slider.valueChanged.connect(self.num_rollouts_changed) + slider.valueChanged.connect(self.batch_size_changed) ### Set Cost Weights cost_weight_layout = QVBoxLayout() @@ -261,9 +261,9 @@ def horizon_length_changed(self, val: int): controller_mppi_cartpole.mpc_horizon = self.horizon_steps self.update_slider_labels() - def num_rollouts_changed(self, val: int): - self.num_rollouts = val - controller_mppi_cartpole.num_rollouts = self.num_rollouts + def batch_size_changed(self, val: int): + self.batch_size = val + controller_mppi_cartpole.batch_size = self.batch_size self.update_slider_labels() def dd_weight_changed(self, val: int): @@ -330,7 +330,7 @@ def update_slider_labels(self): f"Horizon: {self.horizon_steps} steps = {round(self.horizon_steps * controller_mppi_cartpole.dt, 2)} s" ) self.rollouts_label.setText( - f"Rollouts: {self.num_rollouts}" + f"Rollouts: {self.batch_size}" ) self.dd_weight_label.setText( f"Distance difference cost weight: {round(self.dd_weight, 2)}" diff --git a/GUI/__init__.py b/GUI/__init__.py index 15d475a4..a133189b 100644 --- a/GUI/__init__.py +++ b/GUI/__init__.py @@ -18,7 +18,7 @@ os.chdir("Driver") # Import custom made elements of GUI -from GUI._CartPoleGUI_MainWindow import MainWindow +from GUI._CartPoleGUI_MainWindow import CartPoleMainWindow # This piece of code gives a custom ID to our application # It is essential for packaging @@ -48,7 +48,7 @@ def run_gui(): # Set the default icon to use for all the windows of our application app.setWindowIcon(QIcon('GUI/gui_icon.ico')) # GUI Icon, feel free to change # Create an instance of the GUI window. - window = MainWindow() + window = CartPoleMainWindow() window.show() # Next line hands the control over to Python GUI app.exec() diff --git a/GUI/gui_default_params.py b/GUI/gui_default_params.py index e5f30f85..1d0f085a 100644 --- a/GUI/gui_default_params.py +++ b/GUI/gui_default_params.py @@ -19,9 +19,9 @@ # controller_init = 'custom-mpc-scipy' # Defines which controller is loaded at the start of the program import os if os.getcwd().split(os.sep)[-1] == 'Driver': - controller_init = 'mpc' # Load as default if loaded as part of physical-cartpole + controller_init = 'energy' # TODO shreyan controller CCNW2023 Load as default if loaded as part of physical-cartpole else: - controller_init = 'mpc' # Load as default if loaded as cartpole simulator stand alone + controller_init = 'energy' # TODO shreyan controller CCNW2023 # Load as default if loaded as cartpole simulator stand alone """ Possible choices for CartPole controller: @@ -51,7 +51,7 @@ # Action toggling between showing the ground level and above # and showing above and below ground level the length of the pole # Second option is good for visualizing swing-up -show_hanging_pole_init = False +show_hanging_pole_init = True # Variables for random trace generation - GUI only # Data Generator sets these parameters independently diff --git a/GUI/loop_timer.py b/GUI/loop_timer.py index 66be15bc..73e75a4c 100644 --- a/GUI/loop_timer.py +++ b/GUI/loop_timer.py @@ -10,10 +10,11 @@ class loop_timer(): """ Simple game loop timer that sleeps for leftover time (if any) at end of each iteration""" LOG_INTERVAL_SEC = 10 - NUM_SAMPLES = 1000 + NUM_SAMPLES = 200 # last few seconds def __init__(self, rate_hz: float = None, dt_target: float = None, do_diagnostics: bool = False) -> None: - """ Make a new loop_timer, specifying the target frame rate in Hz or time interval dt_target in seconds + """ Make a new loop_timer, specifying the target frame rate in Hz or time interval dt_target in seconds. + The actual average rate can be obtained from np.mean(self.looper.circ_buffer_dt_real). :param rate_hz: the target loop rate in Hz. The rate can be changed anytime by modifying rate_hz. :param dt_target: the time interval dt in seconds. It can be changed anytime by modifying dt. @@ -40,7 +41,7 @@ def __init__(self, rate_hz: float = None, dt_target: float = None, do_diagnostic self.last_log_time = 0 self.circ_buffer_dt = deque(iterable=np.zeros(self.NUM_SAMPLES), maxlen=self.NUM_SAMPLES) self.circ_buffer_leftover = deque(iterable=np.zeros(self.NUM_SAMPLES), maxlen=self.NUM_SAMPLES) - self.circ_buffer_dt_real = deque(iterable=np.zeros(50), maxlen=50) + self.circ_buffer_dt_real = deque(iterable=np.zeros(self.NUM_SAMPLES), maxlen=self.NUM_SAMPLES) @property def rate_hz(self): @@ -52,6 +53,9 @@ def dt_target(self): @rate_hz.setter def rate_hz(self, new_rate): + """ Sets the target loop rate. + :param new_rate: the target rate in Hz + """ if new_rate is None: self._rate_hz = None elif new_rate > 0.0: @@ -62,6 +66,9 @@ def rate_hz(self, new_rate): @dt_target.setter def dt_target(self, new_dt): + """ Set the target delta time + :param new_dt: the new target delta time in seconds for each loop iteration + """ if new_dt is None: self._dt_target = None elif new_dt > 0.0: diff --git a/GymlikeCartPole/CartPoleEnv_LTC.py b/GymlikeCartPole/CartPoleEnv_LTC.py index 08ecf403..ff89cf60 100644 --- a/GymlikeCartPole/CartPoleEnv_LTC.py +++ b/GymlikeCartPole/CartPoleEnv_LTC.py @@ -9,10 +9,10 @@ import numpy as np -from typing import Optional, Tuple, Union +from typing import Optional, Tuple -from others.globals_and_utils import load_config, my_logger -logger = my_logger(__name__) +from Control_Toolkit.others.get_logger import get_logger +log = get_logger(__name__) # FIXME: Set reset properly # FIXME: set random position - pregenerate diff --git a/GymlikeCartPole/play_gym.py b/GymlikeCartPole/play_gym.py index cba9016a..ce1b926b 100644 --- a/GymlikeCartPole/play_gym.py +++ b/GymlikeCartPole/play_gym.py @@ -1,6 +1,6 @@ from time import sleep -from others.globals_and_utils import load_config, my_logger +from others.globals_and_utils import load_config from Control_Toolkit.others.globals_and_utils import import_controller_by_name from GymlikeCartPole.CartPoleEnv_LTC import CartPoleEnv_LTC @@ -11,7 +11,8 @@ config = load_config("config.yml") -logger = my_logger(__name__) +from Control_Toolkit.others.get_logger import get_logger +log = get_logger(__name__) env = CartPoleEnv_LTC() controller = Controller(env.CartPoleInstance, **{**config["controller"][controller_name], **{"num_control_inputs": config["cartpole"]["num_control_inputs"]}}) @@ -23,7 +24,7 @@ action = controller.step(state) # action = env.action_space.sample() # take a random action state, reward, done, _ = env.step(action) - logger.info(f"Iteration {k}: Reward = {reward}") + log.info(f"Iteration {k}: Reward = {reward}") if done: break env.close() diff --git a/README.md b/README.md index 822033c1..23ffe56a 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,17 @@ on macOS with `pip list --format=freeze > requirements.txt`. It required some minor manual corrections (deleting the package versions) to make the re-installation smooth. It should be also possible to create this file with `conda list -e > requirements.txt`. +### Notes +**ASF** stands for Application Specific Files. There are ASF for controllers [Control_Toolkit_ASF](Control_Toolkit_ASF/) and System Identification [SI_Toolkit](SI_Toolkit/). + ## Basic Operation 1. **Run GUI:** Run `python run_cartpole_gui.py` from top-level path. -2. **Run a single experiment:** Open `run_data_generator.py`. In the marked section, you can define your experiment. For a single run, set `number_of_experiments = 1`. Then open `./config.yml` to modify controller-related parameters. For example, you can choose there whether MPPI should run with the true model ('predictor_ODE' == Euler Integrator) or with a neural network ('predictor_autoregressive_neural'). Once set, run `python -m run_data_generator`. It will create a new folder `./Experiment_Recordings/` and store a csv log file in it. + If tensoflow JIT compilation fails, turn it off in [SI_Toolkit_ASF/__init__.py](SI_Toolkit_ASF/__init__.py) by setting USE_TENSORFLOW_EAGER_MODE=True until you can sort out tensorflow woes. +2. **Run a single experiment:** Open `run_data_generator.py`. In the marked section, you can define your experiment. For a single run, set `number_of_experiments = 1`. Then open [config.yml](./config.yml) to modify controller-related parameters. For example, you can choose there whether MPPI should run with the true model ('predictor_ODE' == Euler Integrator) or with a neural network ('predictor_autoregressive_neural'). Once set, run `python -m run_data_generator`. Or you can use the pycharm target `Run Cartpole GUI`. It will create a new folder `./Experiment_Recordings/` and store a csv log file in it. + +### Getting Tensorflow Just In Time (JIT) compilation to work +You might need to install the CUDA toolkit and DNN toolkit if you want to accelerate (a LOT) the arithmetic. + See https://www.tensorflow.org/install/pip . Note that this installation can be INSIDE your virtual conda environment, not on the host computer, although you might need to use an NVIDIA graphic driver to give the tools access to the physical GPU. ## Run a Machine Learning Pipeline @@ -95,9 +103,10 @@ The "CSV file name" text box is used for naming a file to be saved or to be load Files regions are folded with #region #endregion syntax For Pycharm default, for Atom install -## Parameter exploration with NNI +## Parameter exploration with Neural Network Intelligence (NNI) + -For intelligent parameter space exploration with NNI, we have 2 special files : +For AutoML intelligent parameter space exploration with [NNI](https://nni.readthedocs.io/en/stable/), we have 2 special files : 1. modeling/rnn_tf/search_space.json : Search space for parameter search 2. config.yml : Configuring the NNI experiments. diff --git a/SI_Toolkit b/SI_Toolkit index 8f460926..89f4ead9 160000 --- a/SI_Toolkit +++ b/SI_Toolkit @@ -1 +1 @@ -Subproject commit 8f460926721c0ba7ff7acb7b91b6ca861f444015 +Subproject commit 89f4ead943a975fb5ce549b0462a234308febde7 diff --git a/SI_Toolkit_ASF/DataSelector.py b/SI_Toolkit_ASF/DataSelector.py index f0276116..f57ee4c0 100644 --- a/SI_Toolkit_ASF/DataSelector.py +++ b/SI_Toolkit_ASF/DataSelector.py @@ -131,8 +131,8 @@ def return_dataset_for_training(self, raw=False ): - if batch_size is None and self.args.batch_size is not None: - batch_size = self.args.batch_size + if batch_size is None and self.args.num_rollouts is not None: + batch_size = self.args.num_rollouts if inputs is None and self.args.inputs is not None: inputs = self.args.inputs diff --git a/SI_Toolkit_ASF/__init__.py b/SI_Toolkit_ASF/__init__.py index 51fa92b6..71b1ca42 100644 --- a/SI_Toolkit_ASF/__init__.py +++ b/SI_Toolkit_ASF/__init__.py @@ -1,2 +1,14 @@ -GLOBALLY_DISABLE_COMPILATION = False # Set to False to use tf.function -USE_JIT_COMPILATION = True # XLA ignores random seeds. Set to False for reproducibility +### Choose whether to run TensorFlow in eager mode (slow, interpreted) or graph mode (fast, compiled) +# Set `USE_TENSORFLOW_EAGER_MODE=False` to: +# 1. decorate functions in optimizers and predictors with `@tf.function`. +# 2. and thereby enable TensorFlow graph mode. This is orders of magnitude faster than the standard eager mode. +# Use USE_TENSORFLOW_EAGER_MODE=True to enable debugging +USE_TENSORFLOW_EAGER_MODE = False + + +### Choose whether to use TensorFlow Accelerated Linear Algebra (XLA). +# XLA uses machine-specific conversions to speed up the compiled TensorFlow graph. +# Set USE_TENSORFLOW_XLA to True to accelerate the execution (for real-time). +# If `USE_TENSORFLOW_XLA=True`, this adds `jit_compile=True` to the `tf.function` decorator. +# However, XLA ignores random seeds. Set to False for guaranteed reproducibility, such as for simulations. +USE_TENSORFLOW_XLA = True diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/config.yml b/config.yml index 3ec15478..343de2cc 100644 --- a/config.yml +++ b/config.yml @@ -1,3 +1,5 @@ +# the root config that specifies basic physical parameters of the cartpole. +# However, for physical-cartpole, we still use the legacy p_globals.py which overrides these parameters. cartpole: actuator_noise: 0.1 # Noise on top of the calculated control input by +/- this value, we usually set 0.5 to explore various controllers while collecting data for training, 0.1 to test an not-ideal case mode: stabilization @@ -5,7 +7,7 @@ cartpole: PATH_TO_EXPERIMENT_RECORDINGS_DEFAULT: './Experiment_Recordings/' # Where to save experiment recording per default m_pole: 0.087 # mass of pole, kg # Checked by Antonio & Tobi m_cart: 0.230 # mass of cart, kg # Checked by Antonio - L: "0.395/2.0" # HALF (!!!) length of pend, m # Checked by Antonio & Tobi + L: 0.199 # HALF (!!!) length of pole, full length is .398m, m # Checked by Antonio & Tobi u_max: 2.62 # max force produced by the motor, N # Checked by Marcin M_fric: 4.77 # cart friction on track, N/m/s # Checked by Marcin J_fric: 2.5e-4 # friction coefficient on angular velocity in pole joint, Nm/rad/s # Checked by Marcin @@ -15,7 +17,8 @@ cartpole: controlDisturbance: 0.0 # disturbance, as factor of u_max # I used 0.2-0.5 for data collection controlBias: 0.0 # bias of control input g: 9.81 # absolute value of gravity acceleration, m/s^2 - k: "1.0/3.0" # Dimensionless factor of moment of inertia of the pole with length 2L: I: (1/3)*m*(2L)^2 = (4/3)*m*(L)^2 + k: 0.33333333333 # 1/3: Dimensionless factor of moment of inertia of the pole with length 2L: I: (1/3)*m*(2L)^2 = (4/3)*m*(L)^2 + cart_bounce_factor: .8 # fraction of cart speed after bounce from end of track, tobi guesstimated, not yet measured latency: 0.0 # s noise: noise_mode: 'OFF' diff --git a/config_data_gen.yml b/config_data_gen.yml index 7fda622c..d5327d7f 100644 --- a/config_data_gen.yml +++ b/config_data_gen.yml @@ -1,7 +1,7 @@ controller: mpc PATH_TO_EXPERIMENT_RECORDINGS_DEFAULT: './Experiment_Recordings/' # Where to save experiment recording per default -seed: 1998 # If not assign random seed based on datetime is used -length_of_experiment: 3600 # Length of each experiment in s: +seed: 1998 # If not assigned (i.e. commented out) random seed based on datetime is used +length_of_experiment: 60 # Length of each experiment in s: split: [0.8, 0.1] # Train/Val/(Test) split - only matters if you run it in ML Pipeline mode random_initial_state: # Unassigned variables will be randomly initialized (see below) @@ -33,4 +33,4 @@ save_mode: 'online' # It was intended to save memory usage, but it doesn't seem # Show popup window in the end with summary of experiment? show_summary_plots: False show_controller_report: False -number_of_experiments: 10 # How many experiments will be generated \ No newline at end of file +number_of_experiments: 1 # How many experiments will be generated \ No newline at end of file diff --git a/others/Controllers-out-of-use/controller_mpc_opti.py b/others/Controllers-out-of-use/controller_mpc_opti.py index a35852e5..bc8942d1 100644 --- a/others/Controllers-out-of-use/controller_mpc_opti.py +++ b/others/Controllers-out-of-use/controller_mpc_opti.py @@ -130,9 +130,9 @@ def cost_function(self, Q_hat): return cost - def step(self, s, target_position, time=None): + def step(self, state, target_position, time=None): - self.s = s + self.s = state self.target_position = target_position opti = casadi.Opti() diff --git a/others/Controllers-out-of-use/controller_pid.py b/others/Controllers-out-of-use/controller_pid.py index 9d2d69ec..627842e6 100644 --- a/others/Controllers-out-of-use/controller_pid.py +++ b/others/Controllers-out-of-use/controller_pid.py @@ -24,12 +24,12 @@ def __init__(self): self.I_position = config["controller"]["pid"]["I_position"] self.D_position = config["controller"]["pid"]["D_position"] - def step(self, s: np.ndarray, target_position: np.ndarray, time=None): + def step(self, state: np.ndarray, target_position: np.ndarray, time=None): error = -np.array([ - [s[cartpole_state_varname_to_index('position')] - target_position], - [s[cartpole_state_varname_to_index('positionD')]], - [s[cartpole_state_varname_to_index('angle')]], - [s[cartpole_state_varname_to_index('angleD')]] + [state[cartpole_state_varname_to_index('position')] - target_position], + [state[cartpole_state_varname_to_index('positionD')]], + [state[cartpole_state_varname_to_index('angle')]], + [state[cartpole_state_varname_to_index('angleD')]] ]) positionCMD = self.P_position * error[0, :] + self.D_position * error[1] diff --git a/others/Media/01 Sookie_ Sookie.mp3 b/others/Media/01 Sookie_ Sookie.mp3 new file mode 100644 index 00000000..2e8ed82a Binary files /dev/null and b/others/Media/01 Sookie_ Sookie.mp3 differ diff --git a/others/Media/Rolling_Stones_Satisfaction.mp3 b/others/Media/Rolling_Stones_Satisfaction.mp3 new file mode 100644 index 00000000..3c624ed5 Binary files /dev/null and b/others/Media/Rolling_Stones_Satisfaction.mp3 differ diff --git a/others/PycharmRunConfigurations/Generate Data.run.xml b/others/PycharmRunConfigurations/Generate Data.run.xml index 2f40dabd..18d452d5 100644 --- a/others/PycharmRunConfigurations/Generate Data.run.xml +++ b/others/PycharmRunConfigurations/Generate Data.run.xml @@ -1,14 +1,15 @@ - +