diff --git a/.gitignore b/.gitignore index 56d5d8b..e12b846 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,8 @@ __pycache__ *.swp venv ipxe +node_modules +/test-results/ +/playwright-report/ +/playwright/.cache/ +settings.yaml diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8bf65d1..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -ansible -requests diff --git a/src/requirements.txt b/src/requirements.txt index 38841a2..198f420 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,7 +1,8 @@ Flask SQLAlchemy gunicorn - PyMySQL coloredlogs python-dotenv +ansible +strictyaml diff --git a/src/web-ui/README.md b/src/web-ui/README.md index bb0a6b8..1fd3019 100644 --- a/src/web-ui/README.md +++ b/src/web-ui/README.md @@ -13,6 +13,14 @@ pip install -r requirements.txt ``` ./run.sh ``` + +# Running from shell +``` +source ../../venv/bin/activate +sudo -E env PATH=$PATH python # See https://askubuntu.com/a/1342154 +``` +``` + # Visit http://127.0.0.1:5000/ diff --git a/src/web-ui/app.py b/src/web-ui/app.py index 0b6a56f..5f6e189 100644 --- a/src/web-ui/app.py +++ b/src/web-ui/app.py @@ -7,12 +7,30 @@ import json from time import sleep import platform +from settings import load_settings +from tenacity import retry, wait_exponential, before_log, stop_after_attempt +import logging +import sys +import time +from functools import wraps + + +logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) +log = logging.getLogger(__name__) + app = APIFlask(__name__) app.config.update(TESTING=True, SECRET_KEY=os.getenv("SECRET_KEY")) +settings = load_settings() IDRAC_HOST = None # noqa: F841 IDRAC_USERNAME = None # noqa: F841 IDRAC_PASSWORD = None # noqa: F841 +HOST_HEALTHCHECK_POLL_IP = None +DEFAULT_HTTP_REQ_TIMEOUT = os.getenv("DEFAULT_HTTP_REQ_TIMEOUT", 60) +IDRAC_HTTP_REQ_TIMEOUT = os.getenv("IDRAC_HTTP_REQ_TIMEOUT", 15) +IDRAC_SLEEP_AFTER_RESET_REQUEST_REQ = os.getenv( + "IDRAC_SLEEP_AFTER_RESET_REQUEST_REQ", 3 +) playwright_working_dir = os.getenv( "PLAYWRIGHT_WORKING_DIR", "../playwright-boostrap/" @@ -30,16 +48,158 @@ session_requests.verify = False +def countdown(seconds): + log.info(f"Sleeping for {seconds} seconds") + for remaining in range(seconds, 0, -1): + sys.stdout.write(f"\rTime left: {remaining} seconds") + sys.stdout.flush() + time.sleep(1) + # Clear the line after countdown ends + sys.stdout.write("\rCountdown finished!\n") + + +def ConnectToVPN(): + """ + Attempt to connect to VPN + Assumptions: + - Any existing VPN connection will be torn down + - Credentials for VPN will be fetched using secret(s) + required to fetch them + - VPN tunnel (wireguard) will be started + """ + log.info( + "Tear down any existing VPN connection " + "(assumes wg-quick is used for WireGuard" + ) + subprocess.run(["wg-quick", "down", "wg0"], check=False) + + log.info("Download the psonoci tool") + subprocess.run( + [ + "curl", + "https://get.psono.com/psono/psono-ci/x86_64-linux/psonoci", + "--output", + "./psonoci", + ], + check=True, + ) + + log.info("Mark psonoci as executable") + subprocess.run(["chmod", "+x", "./psonoci"], check=True) + + # Fetch credentials using the psonoci tool + PSONO_CI_VPN_SECRET_NOTE_ID = settings.get( + "PSONO_CI_VPN_SECRET_NOTE_ID" + ).value # noqa: E501 + + try: + os.environ["PSONO_CI_API_KEY_ID"] = settings.get( + "PSONO_CI_API_KEY_ID" + ).value # noqa: E501 + os.environ["PSONO_CI_API_SECRET_KEY_HEX"] = settings.get( + "PSONO_CI_API_SECRET_KEY_HEX" + ).value + os.environ["PSONO_CI_SERVER_URL"] = settings.get( + "PSONO_CI_SERVER_URL" + ).value # noqa: E501 + result = subprocess.run( + [ + "./psonoci", + "secret", + "get", + PSONO_CI_VPN_SECRET_NOTE_ID, + "notes", + ], # noqa: E501 + check=True, + capture_output=True, + text=True, + env=os.environ, + ) + log.info(result) + except Exception as e: + log.error(e) + + vpn_config = result.stdout.strip() + + log.debug("Write the VPN configuration to /etc/wireguard/wg0.conf") + with open("/etc/wireguard/wg0.conf", "w") as vpn_file: + vpn_file.write(vpn_config) + + try: + log.debug("Start the VPN tunnel") + sleep(3) + subprocess.run(["wg-quick", "up", "wg0"], check=False) + print("VPN connected successfully.") + + except subprocess.CalledProcessError as e: + print(f"An error occurred while executing a command: {e}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + + +def recover_from_error_vpn_not_active(retry_state): + """Attempt to recover from error VPN + not active. + """ + log.debug(retry_state) + ConnectToVPN() + + +@retry( + wait=wait_exponential(multiplier=1, min=5, max=10), + before=before_log(log, logging.DEBUG), + stop=stop_after_attempt(4), + retry_error_callback=recover_from_error_vpn_not_active, +) +def vpn_must_be_up(f): + """ + Checks for a route to the IDRAC_HOST + The/a valid VPN connection + must be up for the majority of the server + bootstrap process to work. + + If the VPN is *up*, then the IDRAC_HOST + will be reachable (there will be a route to + that host/IP). + If the VPN is *down* then the IDRAC_HOST will + likely be 'no route to host'. + """ + + @wraps(f) + def wrapper(*args, **kwds): + log.info("Calling wrapper vpn_must_be_up") + try: + url = f"https://{settings.get('IDRAC_HOST')}/start.html" + log.info(f"Contacting: {url}") + requests.get(url, verify=False, timeout=DEFAULT_HTTP_REQ_TIMEOUT) + except Exception as e: + log.error(f"Verify VPN connection is up & functioning. {e}") + log.debug("Attempting reconnect of VPN") + ConnectToVPN() + return f(*args, **kwds) + + return wrapper + + def api_response(req): + try: + resp = req.json() + except Exception: + resp = req.text return ( - jsonify({"resp": req.text, "status_code": req.status_code}), + jsonify({"resp": resp, "status_code": req.status_code}), req.status_code, ) -def api_call(path=None, method=None, payload=None, raw_payload=False): +def api_call( + path=None, method=None, payload=None, raw_payload=False, timeout=60 +): # noqa: E501 assert method is not None - url = f"https://{os.getenv('IDRAC_HOST')}/redfish/v1/{path}" + if "redfish" not in path and "http" not in path: + url = f"https://{os.getenv('IDRAC_HOST')}/redfish/v1/{path}" + if "redfish" in path: + url = f"https://{os.getenv('IDRAC_HOST')}/{path}" authHeaders = HTTPBasicAuth( os.getenv("IDRAC_USERNAME"), os.getenv("IDRAC_PASSWORD") ) # noqa: E501 @@ -50,6 +210,7 @@ def api_call(path=None, method=None, payload=None, raw_payload=False): url, auth=authHeaders, verify=False, + timeout=timeout, # noqa: E501 ) elif method == "POST": if raw_payload: @@ -68,6 +229,14 @@ def api_call(path=None, method=None, payload=None, raw_payload=False): json=payload, headers={"Content-Type": "application/json"}, ) + elif method == "PATCH": + req = requests.patch( + url, + auth=authHeaders, + verify=False, + json=payload, + headers={"Content-Type": "application/json"}, + ) return req @@ -88,7 +257,7 @@ def load_idrac_settings(): exec(f"{setting}=None") # Check if setting is set in environment variable if os.getenv(setting, None): - exec(f"{setting}={os.getenv(setting)}") + exec(f"{setting}='{os.getenv(setting)}'") # Check if setting is set in url get parameter if request.args.get(setting, None): session[setting] = request.args.get(setting) @@ -153,8 +322,8 @@ def ping(ip): attempt += 1 print( f"Attempt {attempt}/{max_attempts}: " - "No response from {ip}. " - "Retrying in {interval} seconds..." + f"No response from {ip}. " + f"Retrying in {interval} seconds..." ) sleep(interval) @@ -165,7 +334,7 @@ def ping(ip): return False -def justKeepRedeploying(max_repeated_deploys=-1, delayBetweenRedeploy=10): +def justKeepRedeploying(max_repeated_deploys=-1, delayBetweenRedeploy=180): print("Starting justKeepRedeploying") execute_redfish_command("Bootstrap") @@ -173,8 +342,7 @@ def justKeepRedeploying(max_repeated_deploys=-1, delayBetweenRedeploy=10): while deploy_count < max_repeated_deploys or max_repeated_deploys == -1: print(f"Deployment #{deploy_count + 1}") - print(f"Sleeping for {delayBetweenRedeploy} seconds") - sleep(delayBetweenRedeploy) + countdown(delayBetweenRedeploy) execute_redfish_command("Bootstrap") @@ -185,21 +353,20 @@ def justKeepRedeploying(max_repeated_deploys=-1, delayBetweenRedeploy=10): print("Deployment loop finished") -def execute_redfish_command(action): +def execute_redfish_command(action, redfish_uri=None): if action == "Bootstrap": + VerifyVPNAccess() VerifyiDRACAccess() ForceOff() sleepSecconds = 15 - print(f"Sleeping for {sleepSecconds}") - sleep(sleepSecconds) + countdown(sleepSecconds) # iDRACSetVirtualTerminalHTML5() UnmountISO() MountISO() SetBootFromVirtualMedia() GetPowerState() sleepSecconds = 10 - print(f"Sleeping for {sleepSecconds}") - sleep(sleepSecconds) + countdown(sleepSecconds) PowerOn() # Setup host disks # Run install @@ -223,6 +390,12 @@ def execute_redfish_command(action): command = f"python {IDRAC_SCRIPTS_BASE_PATH}ChangeBiosBootOrderREDFISH.py -ip {IDRAC_HOST} -u {IDRAC_USERNAME} -p {IDRAC_PASSWORD} --get" # noqa: E501 result = subprocess.run(command, capture_output=True, shell=True) + if action == "RawRequest": + req = api_call( + path=redfish_uri, + method="GET", + ) + return req else: command = f"python {IDRAC_SCRIPTS_BASE_PATH}GetSetPowerStateREDFISH.py -ip {IDRAC_HOST} -u {IDRAC_USERNAME} -p {IDRAC_PASSWORD} --set {action}" # noqa: E501 result = subprocess.run(command, capture_output=True, shell=True) @@ -242,8 +415,39 @@ def bootstrap(): return execute_redfish_command("Bootstrap") +@vpn_must_be_up +def VerifyVPNAccess(): + pass + + +# NOTE "max" does not think what you may think it +# means - you probably want "stop_after_attempt" +# See: +# https://tenacity.readthedocs.io/en/latest/index.html?highlight=max_attempts +@retry( + wait=wait_exponential(multiplier=1, min=4, max=20), + before=before_log(log, logging.DEBUG), + stop=stop_after_attempt(20), +) +@vpn_must_be_up def VerifyiDRACAccess(): - req = api_call(path="Systems/", method="GET") + try: + req = api_call( + path="Systems/", method="GET", timeout=IDRAC_HTTP_REQ_TIMEOUT + ) # noqa: E501 + if req.status_code == 200: + log.info(f"iDRACAccess is OK. Got status code {req.status_code}") + if req.status_code == 401: + msg = f"VerifyiDRACAccess returned 401 {req.text}" + log.error(msg) + raise Exception(msg) + + except requests.exceptions.ConnectionError as e: + log.error(f"Connection error occurred: {e}") + raise + except requests.exceptions.RequestException as e: + log.error(f"An HTTP error occurred: {e}") + raise return req @@ -253,9 +457,49 @@ def route_VerifyiDRACAccess(): return api_response(req) -@app.route("/api/v1/ResetiDRAC", methods=["POST"]) def ResetiDRAC(): - return api_response("Not implemented") + """ + Soft reset the iDRAC. + Since Dell iDRACs can easily get into a 'bad' state + the iDRAC reset often overcomes these bad states. + + For example, mounting virual media, even after unmounting + media and terminating sessions can leave the iDRAC is a + 'operations busy' state with no way to mount new media. + + Dell documents this behaviour as: + "Sometimes, iDRAC may become unresponsive due to various reasons." + https://www.dell.com/support/kbdoc/en-uk/000126703/how-to-reset-the-internal-dell-remote-access-controller-idrac-on-a-poweredge-server + + See also Rigor: https://oxide.computer/principles + + """ + print("ResetiDRAC") + data = {"ResetType": "GracefulRestart"} + req = api_call( + path="Managers/iDRAC.Embedded.1/Actions/Manager.Reset/", + method="POST", # noqa: E501 + payload=data, + ) + msg = ( + f"Sleeping {IDRAC_SLEEP_AFTER_RESET_REQUEST_REQ} " + "secconds to give iDRAC time to commence the reset " + "without sleeping, subsequent iDRAC api request may appear to " + "succeed before the iDRAC actually starts performing it's reset, " + "causing confusing.\n" + "Note this sleep has nothing to do with verifying the iDRAC has " + "completed it's reset. For that, calls to VerifyiDRACAccess may be " + "made." + ) + log.info(msg) + sleep(IDRAC_SLEEP_AFTER_RESET_REQUEST_REQ) + return req + + +@app.route("/api/v1/ResetiDRAC", methods=["POST"]) +def route_ResetiDRAC(): + req = ResetiDRAC() + return req def iDRACSetVirtualTerminalHTML5(): @@ -359,10 +603,11 @@ def set_power_graceful_shutdown(): return execute_redfish_command("GracefulShutdown") +@vpn_must_be_up def GetPowerState(): print("GetPowerState") req = api_call(path="Systems/System.Embedded.1", method="GET") - + req.raise_for_status() if req.status_code == 200: power_state = req.json().get("PowerState") print(f"PowerState is {power_state}") @@ -380,6 +625,7 @@ def get_bios_boot_order(): return execute_redfish_command("ChangeBiosBootOrderREDFISH") +@vpn_must_be_up def MountISO(): print("MountISO") data = {"Image": "http://138.201.59.208/sites/default/files/ipxe.iso"} @@ -400,8 +646,12 @@ def MountISO(): ): # noqa: E501 print("The Virtual Media image server is already connected.") else: - print(f"req.status_code: {req.status_code}") - pass + log.error(f"req.status_code: {req.status_code}") + log.error( + "Performing ResetiDRAC to attempt VirtualMedia state fix" + ) # noqa: E501 + ResetiDRAC() + VerifyiDRACAccess() except Exception as e: print(e) elif req.status_code == 204: @@ -415,6 +665,7 @@ def route_MountISO(): return api_response(req) +@vpn_must_be_up def UnmountISO(): print(UnmountISO) req = api_call( @@ -431,6 +682,23 @@ def route_UnmountISO(): return api_response(req) +@vpn_must_be_up +def EnableHostWatchdogTimer(): + req = api_call( + path="/redfish/v1/Systems/System.Embedded.1", # noqa: E501 + method="PATCH", + payload={"HostWatchdogTimer": {"FunctionEnabled": True}}, + ) + return req + + +@app.route("/api/v1/EnableHostWatchdogTimer", methods=["POST"]) +def route_EnableHostWatchdogTimer(): + req = EnableHostWatchdogTimer() + return api_response(req) + + +@vpn_must_be_up def SetBootFromVirtualMedia(): payload = { "ShareParameters": {"Target": "ALL"}, @@ -457,3 +725,21 @@ def route_SetBootFromVirtualMedia(): @app.route("/api/v1/GetOnetimeBootValue", methods=["POST"]) def get_current_onetime_boot_order(): return execute_redfish_command("GetOnetimeBootValue") + + +@vpn_must_be_up +def RawRequest(redfish_uri: str): + """ + Pass any valid Redfish api url and the response is returned + e.g. + https://192.168.1.1/redfish/v1/Systems/System.Embedded.1?$select=BootProgress/LastState + + """ + return execute_redfish_command("RawRequest", redfish_uri=redfish_uri) + + +@app.route("/api/v1/RawRequest", methods=["POST"]) +def route_RawRequest(): + redfish_uri = request.json.get("data") + req = RawRequest(redfish_uri) + return api_response(req) diff --git a/src/web-ui/requirements.txt b/src/web-ui/requirements.txt index f941d23..c289f97 100644 --- a/src/web-ui/requirements.txt +++ b/src/web-ui/requirements.txt @@ -1,3 +1,4 @@ flask gunicorn requests +tenacity diff --git a/src/web-ui/settings.yaml.example b/src/web-ui/settings.yaml.example new file mode 100644 index 0000000..d15837d --- /dev/null +++ b/src/web-ui/settings.yaml.example @@ -0,0 +1,12 @@ +--- +IDRAC_HOST: "https://192.168.1.1" +IDRAC_USERNAME: root +IDRAC_PASSWORD: calvin +HOST_HEALTHCHECK_POLL_IP: 10.0.0.1 +DEFAULT_HTTP_REQ_TIMEOUT: 20 +IDRAC_SLEEP_AFTER_RESET_REQUEST_REQ: 3 + +PSONO_CI_API_KEY_ID: changeme +PSONO_CI_API_SECRET_KEY_HEX: changeme +PSONO_CI_SERVER_URL: https://psono.example.com +PSONO_CI_VPN_SECRET_NOTE_ID: changeme diff --git a/src/web-ui/templates/index.html b/src/web-ui/templates/index.html index f1bd42b..b6d6d85 100644 --- a/src/web-ui/templates/index.html +++ b/src/web-ui/templates/index.html @@ -99,20 +99,31 @@ } } + // Define GetPowerState + function GetPowerState() { + console.log("Checking GetPowerState"); + const timeNow = new Date().getTime(); + fetch('{{ url_for('route_GetPowerState') }}', {method: 'POST'}) + .then(response => response.json()) + .then(api => { + console.log("Got GetPowerState response"); + const timeSince = timeAgo(timeNow); + console.log(api); + let PowerState = api.resp['PowerState']; + document.querySelector("#SystemUUIDOutput").textContent = api.resp['UUID']; + GetPowerStateOutput.textContent= `${PowerState} (${timeSince})`; + }); + } + + // GetPowerState as soon as DOMContentLoaded + document.addEventListener("DOMContentLoaded", (event) => { + console.log("DOM fully loaded and parsed"); + GetPowerState(); + }); + + // GetPowerState every x interval - window.setInterval( function() { - console.log("Logging.."); - console.log("Checking GetPowerState"); - const timeNow = new Date().getTime(); - fetch('/api/v1/GetPowerState', {method: 'POST'}) - .then(response => response.json()) - .then(api => { - console.log("Got GetPowerState response"); - const timeSince = timeAgo(timeNow); - console.log(api); - GetPowerStateOutput.textContent= `${api['resp']['PowerState']} (${timeSince})`; - }); - }, 15000) + window.setInterval( GetPowerState, 15000); function displayError(error) { const errorDiv = document.getElementById('error'); @@ -132,7 +143,17 @@ } button.classList.add('disabled'); button.classList.add('loading'); - fetch(`/api/v1/${button.textContent}`, {method: 'POST'}) + let postData = ''; + if (button.textContent == "RawRequest") { + postData = document.querySelector("#RawRequestURL").value; + } + fetch(`/api/v1/${button.textContent}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({data: postData}) + }) .then(response => response.json()) .then(api => { console.log("reply"); @@ -146,13 +167,17 @@ document.getElementById("output").appendChild(errorContainer); } else { let output = document.getElementById('output') - output.innerHTML = api.resp.split('\n').map(line => `
${line}
`).join(''); - console.log(api); - } + if (typeof(api) == 'object') { + output.innerHTML = "
" + JSON.stringify(api.resp, null, 2) + "
"; + } else { + output.innerHTML = api.resp.split('\n').map(line => `
${line}
`).join(''); + } + console.log(api); + } }) .catch(error => console.error(error)) .finally(() => { - const buttons = document.getElementsByClassName('button'); + const buttons = document.querySelectorAll('.button, .rawRequestBtn'); for (let i = 0; i < buttons.length; i++) { buttons[i].removeAttribute('disabled'); buttons[i].classList.remove('disabled'); @@ -168,7 +193,10 @@

Web Scale Console

Control the server via the browser

Last known Server State

Loaded IDRAC settings:

+

Make a raw request

+ +

Api responses


...
Bootstrap
@@ -186,6 +217,7 @@

Api responses

ForceRestart
MountISO
UnmountISO
+
EnableHostWatchdogTimer
SetBootFromVirtualMedia
GetPowerState
PowerOn