From b97c9fe49bb079a6d7b3cb06ecb5a993a47a063b Mon Sep 17 00:00:00 2001 From: Sam Tannous Date: Mon, 18 Jul 2022 14:41:03 -0400 Subject: [PATCH 1/2] Added nv-mlnxipcfg.py --- README.md | 91 +++++++++++- nv-mlnxipcfg.py | 363 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 453 insertions(+), 1 deletion(-) create mode 100755 nv-mlnxipcfg.py diff --git a/README.md b/README.md index 35962bd..11e77d3 100644 --- a/README.md +++ b/README.md @@ -9,4 +9,93 @@ pages and provided as a reference for recipe implementation It is recommended for use during bring-up and that you implement only required components for deployment in production environments -use "-h" for more details \ No newline at end of file +use "-h" for more details + + +# nv-mlnxipcfg.py + +nv-mlnxipcfg.py configures and sets persistency behavior for IP addresses, IP rules, IP route, and ARP settings + +Requires: Python 3.x and python3-click + +nv-mlnxipcfg.py --help + +Usage: nv-mlnxipcfg.py [OPTIONS] + +Options: + -v, --verbose verbose logging + -i, --ipaddr TEXT Starting IP address with netmask (for example: + 192.168.1.1/24 or 2001::66/64) [required] + -f, --flush Flush IP addresses before adding new ones + -r, --dryrun Dry run. Do not actually assign IP addresses or add + netplan cfg + -d, --devices TEXT comma separated device list (for example: + enp225s0f0,enp225s0f1) if '-d' not provided, tool will + configure all found Ethernet devices + --help Show this message and exit. +``` +Debian/Ubuntu Based OS +====================== +1. A required configuration of a starting IP address (IPv4 or IPv6) with netmask (i.e. 192.168.1.1/24) is provided. +2. It then looks at all the Infiniband interfaces it can find (under /sys/class/infiniband/*) +3. It then checks to make sure they are "Ethernet" link_type by checking /sys/class/infiniband/{}/ports/*/link_layer +4. If the flush option is provided (-f or --flush) it will remove all the IP address first on the interface (ip addr flush ...) +5. Now for all the Ethernet type interfaces, it adds an IP address starting with the one provided. +6. It will then run 3 ip route and one ip rule commands using the source IP address and a default gateway (Note: I assume the default gateway is one IP address less then the broadcast address for the network provided): + a. ip route add 0.0.0.0/1 via {} dev {} table {} proto static metric {} + b. ip route add {} dev {} table {} proto static scope link src {} metric {} + c. ip route add 128.0.0.0/1 via {} dev {} table {} proto static metric {} + d. ip rule add from {} table {} priority 32761 +7. It will then run the sysctl command and set the following for each interface + a. net.ipv4.conf.{}.arp_accept=1 + b. net.ipv4.conf.{}.arp_announce = 1 + c. net.ipv4.conf.{}.arp_filter = 0 + d. net.ipv4.conf.{}.rp_filter = 2 + e. net.ipv4.conf.{}.arp_ignore = 1 +8. After that, it will create a netplan configuration with the IP addresses, routes, and route-policy as shown in the docs I was provided with the same config used in the ip addr add , ip route and ip rule commands. The file it will write to maintain persistence is called /etc/netplan/55-nvidia-autoconfig.yaml. +9. The sysctl configuration file is constructed with the same values for each interface for the 5 ARP and RP settings and written to /etc/sysctl.d/55-nvidia-arpdefaults.conf + +RHEL Based OS +============= +To support for RHEL and CENTOS servers, the ifcfg and route scripts are written to + + /etc/sysconfig/network-scripts/ + +These are needed for persistence upon reboot. + +In addition to the previous configs (ip addr add, ip route, ip rule, sysctl ARP settings) for RHEL servers, +this script adds an ifcfg and route config file for each interface on RHEL and CENTOS servers instead of a NETPLAN config file. + +For example, for the single interface enp225s0f0 with address 66.66.66.66/24: + +[lab@cl1-fair-01 ~]$ cat /etc/sysconfig/network-scripts/ifcfg-enp225s0f0 +BOOTPROTO=none +NAME=enp225s0f0 +DEVICE=enp225s0f0 +ONBOOT=yes +IPADDR=66.66.66.66 +PREFIX=24 +DEFROUTE=yes +GATEWAY=66.66.66.254 +ROUTING_RULE="priority 32761 from 66.66.66.66 table 101" +IPV4_FAILURE_FATAL=no +IPV6INIT=yes +IPV6_AUTOCONF=yes +IPV6_DEFROUTE=yes +IPV6_FAILURE_FATAL=no + +[lab@cl1-fair-01 ~]$ cat /etc/sysconfig/network-scripts/route-enp225s0f0 +ADDRESS0=0.0.0.0 +NETMASK0=128.0.0.0 +GATEWAY0=66.66.66.254 +METRIC0=101 +OPTIONS0="table 101" +ADDRESS1=128.0.0.0 +NETMASK1=128.0.0.0 +GATEWAY1=66.66.66.254 +METRIC1=101 +OPTIONS1="table 101" +ADDRESS2=66.66.66.0 +NETMASK2=255.255.255.0 +METRIC2=101 +OPTIONS2="onlink src 66.66.66.66 table 101" diff --git a/nv-mlnxipcfg.py b/nv-mlnxipcfg.py new file mode 100755 index 0000000..55e84b8 --- /dev/null +++ b/nv-mlnxipcfg.py @@ -0,0 +1,363 @@ +#!/usr/bin/env python3 + +# The MIT License (MIT) +# +# Copyright (c) 2020, NVIDIA CORPORATION +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import click +import glob +import ipaddress +import os +import re +import platform +import sys +import subprocess +import yaml +import pprint + +@click.command() +#@click.argument('name') +@click.option('--verbose', '-v', help="verbose logging", is_flag=True) +@click.option('--ipaddr', '-i', required=True, help="Starting IP address with netmask (for example: 192.168.1.1/24 or 2001::66/64)") +@click.option('--flush', '-f', help="Flush IP addresses before adding new ones", is_flag=True) +@click.option('--dryrun', '-r', help="Dry run. Do not actually assign IP addresses or add netplan cfg", is_flag=True) +@click.option('--devices', '-d', help="""comma separated device list (for example: enp225s0f0,enp225s0f1) if '-d' not provided, tool will configure all found Ethernet devices""") +def main(ipaddr, devices, verbose, flush, dryrun): + if devices is None: + devices = get_mlnx_ethernet_ifnames() + else: + devices = devices.split(',') + + if verbose: + print("devices are: {}".format(devices)) + if devices == []: + print("Error: missing network devices to configure.") + print(" Script could not find devices or -d") + print(" was not provided. Please specify devices to work on.") + sys.exit(1) + + try: + ipaddr = ipaddress.ip_interface(ipaddr) + network = ipaddr.network + default = str(network.broadcast_address - 1) + prefixlen = network.prefixlen + network = str(network) + subnet = str(ipaddr.network.network_address) + netmask = str(ipaddr.netmask) + except ValueError: + print("Error: address/netmask {} is invalid:".format(ipaddr)) + sys.exit(1) + + osid = get_osid() + count = 0 + netplanname = "/etc/netplan/55-nvidia-autoconfig.yaml" + sysctlname = "/etc/sysctl.d/55-nvidia-arpdefaults.conf" + netplancfg = {'network': {'version': 2, 'renderer': 'networkd', 'ethernets': {}}} + sysctl_buf = "" + currentip = ipaddr.ip + tableid = 101 + for dev in devices: + newip = str(currentip) + if flush: + flush_ip(dev, verbose) + config_ip(dev, newip, prefixlen, default, network, tableid, verbose, dryrun) + config_arp(dev, verbose, dryrun) + config_netplan(dev, newip, prefixlen, default, network, tableid, netplancfg) + if osid == 'rhel' or osid == 'centos': + write_networkscripts(dev, newip, prefixlen, default, subnet, netmask, tableid, verbose, dryrun) + sysctl_buf = sysctl_buf + get_sysctl(dev) + + count += 1 + currentip = currentip + 1 + tableid += 1 + + if osid == 'ubuntu': + write_netplan(netplancfg, netplanname, verbose, dryrun) + write_sysctl(sysctl_buf, sysctlname, verbose, dryrun) + +def get_osid(): + # Get the OS ID (rhel, ubuntu, or other)" + release = '/etc/os-release' + osid = "" + if os.path.exists(release): + with open('/etc/os-release') as f: + read_data = f.read() + osid = re.findall('(\nID=)"?(\w+)"?\n', read_data, re.M) + # a tuple is returned with ID,value + if len(osid) == 1: + id,osid = osid[0] + else: + osid = "" + return osid + +def write_networkscripts(dev, newip, prefixlen, default, subnet, netmask, tableid, verbose, dryrun): + """This function handles rhel network-scripts for each device""" + ifcfgname = '/etc/sysconfig/network-scripts/ifcfg-{}'.format(dev) + ifcfgbuf = """ +BOOTPROTO=none +NAME={} +DEVICE={} +ONBOOT=yes +IPADDR={} +PREFIX={} +DEFROUTE=yes +GATEWAY={} +ROUTING_RULE="priority 32761 from {} table {}" +IPV4_FAILURE_FATAL=no +IPV6INIT=yes +IPV6_AUTOCONF=yes +IPV6_DEFROUTE=yes +IPV6_FAILURE_FATAL=no +""".format(dev, dev, newip, prefixlen, default, newip, tableid) + if verbose: + pprint.pprint("ifcfg cfg\n-----------\n{}".format(ifcfgbuf)) + try: + if not dryrun: + with open(ifcfgname, "w") as ifcfg: + ifcfg.write(ifcfgbuf) + print("wrote: ifcfg file {}".format(ifcfgname)) + except: + print("Error: could not write ifcfg file {}".format(ifcfgname)) + + routename = '/etc/sysconfig/network-scripts/route-{}'.format(dev) + routebuf = """ +ADDRESS0=0.0.0.0 +NETMASK0=128.0.0.0 +GATEWAY0={} +METRIC0={} +OPTIONS0="table {}" +ADDRESS1=128.0.0.0 +NETMASK1=128.0.0.0 +GATEWAY1={} +METRIC1={} +OPTIONS1="table {}" +ADDRESS2={} +NETMASK2={} +METRIC2={} +OPTIONS2="onlink src {} table {}" + +""".format(default, tableid, tableid, default, tableid, tableid, subnet, netmask, tableid, newip, tableid) + if verbose: + pprint.pprint("route cfg\n-----------\n{}".format(routebuf)) + try: + if not dryrun: + with open(routename, "w") as route: + route.write(routebuf) + print("wrote: route file {}".format(routename)) + except: + print("Error: could not write route file {}".format(routename)) + + return + +def write_netplan(netplancfg, netplanname, verbose, dryrun): + if verbose: + pprint.pprint("netplan cfg\n-----------\n{}".format(yaml.safe_dump(netplancfg))) + try: + if not dryrun: + with open(netplanname, "w") as netplan: + netplan.write(yaml.safe_dump(netplancfg)) + except: + print("Error: could not write netplan file {}".format(netplanname)) + return + + print("nv-mlnsipcfg: wrote persistent config {}".format(netplanname)) + return + +def write_sysctl(sysctl_buf, sysctlname, verbose, dryrun): + if verbose: + pprint.pprint("sysctl ARP settings\n-----------\n{}".format(sysctl_buf)) + try: + if not dryrun: + with open(sysctlname, "w") as sysctl: + sysctl.write(sysctl_buf) + except: + print("Error: could not write sysctl ARP config file {}".format(sysctlname)) + return + + print("nv-mlnsipcfg: configured sysctl ARP settings") + return + +def config_netplan(dev, currentip, prefixlen, default, network, tableid, netplancfg): + hostprefix = "{}/{}".format(currentip, prefixlen) + netplancfg['network']['ethernets'][dev] = {} + netplancfg['network']['ethernets'][dev]['addresses'] = [hostprefix] + netplancfg['network']['ethernets'][dev]['routes'] = [ + {'metric': tableid, 'table': tableid, 'to': '0.0.0.0/1', 'via': default}, + {'metric': tableid, 'table': tableid, 'to': '128.0.0.0/1', 'via': default}, + {'from': currentip, 'metric': tableid, 'scope': 'link', 'table': tableid, 'to': network}] + netplancfg['network']['ethernets'][dev]['routing-policy'] = [ + {'from': currentip, 'priority': 32761, 'table': tableid}] + return + +def get_sysctl(dev): + return(""" +net.ipv4.conf.{}.arp_accept = 1 +net.ipv4.conf.{}.arp_announce = 1 +net.ipv4.conf.{}.arp_filter = 0 +net.ipv4.conf.{}.rp_filter = 2 +net.ipv4.conf.{}.arp_ignore = 1 +""".format(dev, dev, dev, dev, dev)) + + +def config_arp(dev, verbose, dryrun): + commandlist = [] + + cmd = "/usr/sbin/sysctl -w net.ipv4.conf.{}.arp_accept=1".format(dev) + commandlist.append(cmd) + + cmd = "/usr/sbin/sysctl -w net.ipv4.conf.{}.arp_announce=1".format(dev) + commandlist.append(cmd) + + cmd = "/usr/sbin/sysctl -w net.ipv4.conf.{}.arp_filter=0".format(dev) + commandlist.append(cmd) + + cmd = "/usr/sbin/sysctl -w net.ipv4.conf.{}.rp_filter=2".format(dev) + commandlist.append(cmd) + + cmd = "/usr/sbin/sysctl -w net.ipv4.conf.{}.arp_ignore=1".format(dev) + commandlist.append(cmd) + + for cmd in commandlist: + try: + if verbose: + print(cmd) + if not dryrun: + rc = subprocess.check_output(cmd.split()) + except: + print("Error: could not run command {}".format(cmd)) + print(" Continuing.") + + print("Configured sysctl ARP settings") + return + + +def config_ip(dev, currentip, prefixlen, default, network, tableid, verbose, dryrun): + commandlist = [] + hostprefix = "{}/{}".format(currentip, prefixlen) + + cmd = "/usr/sbin/ip addr add {} dev {}".format(hostprefix, dev) + commandlist.append(cmd) + + cmd = "/usr/sbin/ip route add 0.0.0.0/1 via {} dev {} table {} proto static metric {}".format(default, dev, tableid, tableid) + commandlist.append(cmd) + + cmd = "/usr/sbin/ip route add {} dev {} table {} proto static scope link src {} metric {}".format(network, dev, tableid, currentip, tableid) + commandlist.append(cmd) + + cmd = "/usr/sbin/ip route add 128.0.0.0/1 via {} dev {} table {} proto static metric {}".format(default, dev, tableid, tableid) + commandlist.append(cmd) + + cmd = "/usr/sbin/ip rule add from {} table {} priority 32761".format(currentip, tableid) + commandlist.append(cmd) + + for cmd in commandlist: + try: + if verbose: + print(cmd) + if not dryrun: + rc = subprocess.check_output(cmd.split()) + except: + print("Error: could not run command {}".format(cmd)) + print(" Continuing.") + + print("Configured ip addr, ip route, and ip rule settings") + return + + +def config_ip_rhel(dev, currentip, prefixlen, default, network, tableid, verbose): + """This is not used but kept here for reference.""" + commandlist = [] + hostprefix = "{}/{}".format(currentip, prefixlen) + + cmd = "/usr/bin/nmcli connection mod {} +ipv4.addresses {}".format(dev, hostprefix) + commandlist.append(cmd) + + cmd = '/usr/bin/nmcli connection mod {} +ipv4.routes "0.0.0.0/1 {} {} table={}"'.format(dev, default, tableid, tableid) + commandlist.append(cmd) + + cmd = '/usr/bin/nmcli connection mod {} +ipv4.routes "128.0.0.0/1 {} {} table={}"'.format(dev, default, tableid, tableid) + commandlist.append(cmd) + + cmd = '/usr/bin/nmcli connection mod {} +ipv4.routes "{} {} table={} src={} onlink=true"'.format(dev, network, tableid, tableid, currentip) + commandlist.append(cmd) + + cmd = '/usr/bin/nmcli connection mod {} +ipv4.routing-rules "priority 32761 from {} table {}"'.format(dev, currentip, tableid) + commandlist.append(cmd) + + cmd = "/usr/bin/nmcli connection up {}".format(dev) + commandlist.append(cmd) + + for cmd in commandlist: + try: + if verbose: + print(cmd) + rc = subprocess.check_output(cmd.split()) + except: + print("Error: could not run command {}".format(cmd)) + print(" Continuing.") + return + + +def flush_ip(dev, verbose): + flushcmd = "/usr/sbin/ip addr flush dev {}".format(dev) + if verbose: + print(flushcmd) + + try: + fc = subprocess.check_output(flushcmd.split()) + except: + print("Error: could not run command {}".format(flushcmd)) + print(" Continuing.") + return + +def get_mlnx_ifname(device): + # get the ifname from the device + netpath = "/sys/class/infiniband/{}/device/net/*".format(device) + try: + ifname = os.path.basename(glob.glob(netpath)[0]) + except: + ifname = None + return ifname + +def is_device_ethernet(device): + # Return True if the link_layer of the device is ethernet + linkpath = "/sys/class/infiniband/{}/ports/*/link_layer".format(device) + try: + with open(glob.glob(linkpath)[0], 'r') as reader: + linktype = reader.read() + if "ethernet" in linktype.lower(): + return True + else: + return False + except: + return False + return False + +def get_mlnx_ethernet_ifnames(): + # We return a list of all device ifnames + ifnames = [] + for device in [os.path.basename(x) for x in glob.glob("/sys/class/infiniband/*")]: + if is_device_ethernet(device) and get_mlnx_ifname(device): + ifnames.append(get_mlnx_ifname(device)) + return ifnames + +if __name__ == "__main__": + main() + From cf6e53bd1b75ff09690d882eff0cd4c4a8289ae3 Mon Sep 17 00:00:00 2001 From: Sam Tannous Date: Mon, 18 Jul 2022 15:58:49 -0400 Subject: [PATCH 2/2] Fixed copyright date. --- nv-mlnxipcfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nv-mlnxipcfg.py b/nv-mlnxipcfg.py index 55e84b8..d789076 100755 --- a/nv-mlnxipcfg.py +++ b/nv-mlnxipcfg.py @@ -2,7 +2,7 @@ # The MIT License (MIT) # -# Copyright (c) 2020, NVIDIA CORPORATION +# Copyright (c) 2022, NVIDIA CORPORATION # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in