diff --git a/Documentation/cmdref/cilium-operator-azure.md b/Documentation/cmdref/cilium-operator-azure.md index 5b764d70dd0b5..e3a6140746f99 100644 --- a/Documentation/cmdref/cilium-operator-azure.md +++ b/Documentation/cmdref/cilium-operator-azure.md @@ -12,6 +12,7 @@ cilium-operator-azure [flags] ``` --auto-create-cilium-pod-ip-pools map Automatically create CiliumPodIPPool resources on startup. Specify pools in the form of =ipv4-cidrs:,[...];ipv4-mask-size: (multiple pools can also be passed by repeating the CLI flag) + --azure-release-excess-ips Enable releasing excess free IP addresses from Azure network interfaces. --azure-resource-group string Resource group to use for Azure IPAM --azure-subscription-id string Subscription ID to access Azure API --azure-use-primary-address Use Azure IP address from interface's primary IPConfigurations @@ -65,6 +66,7 @@ cilium-operator-azure [flags] --enable-policy-secrets-sync Enables fan-in TLS secrets sync from multiple namespaces to singular namespace (specified by policy-secrets-namespace flag) --enable-ztunnel Use zTunnel as Cilium's encryption infrastructure --enforce-ingress-https Enforces https for host having matching TLS host in Ingress. Incoming traffic to http listener will return 308 http error code with respective location in header. (default true) + --excess-ip-release-delay int Number of seconds operator would wait before it releases an IP previously marked as excess (default 180) --gateway-api-hostnetwork-enabled Exposes Gateway listeners on the host network. --gateway-api-hostnetwork-nodelabelselector string Label selector that matches the nodes where the gateway listeners should be exposed. It's a list of comma-separated key-value label pairs. e.g. 'kubernetes.io/os=linux,kubernetes.io/hostname=kind-worker' --gateway-api-secrets-namespace string Namespace having tls secrets used by CEC for Gateway API (default "cilium-secrets") diff --git a/Documentation/cmdref/cilium-operator.md b/Documentation/cmdref/cilium-operator.md index a582a59c607ae..224288df29926 100644 --- a/Documentation/cmdref/cilium-operator.md +++ b/Documentation/cmdref/cilium-operator.md @@ -17,6 +17,7 @@ cilium-operator [flags] --aws-max-results-per-call int32 Maximum results per AWS API call for DescribeNetworkInterfaces and DescribeSecurityGroups. Set to 0 to let AWS determine optimal page size (default). If set to 0 and AWS returns OperationNotPermitted errors, automatically switches to 1000 for all future requests --aws-release-excess-ips Enable releasing excess free IP addresses from AWS ENI. --aws-use-primary-address Allows for using primary address of the ENI for allocations on the node + --azure-release-excess-ips Enable releasing excess free IP addresses from Azure network interfaces. --azure-resource-group string Resource group to use for Azure IPAM --azure-subscription-id string Subscription ID to access Azure API --azure-use-primary-address Use Azure IP address from interface's primary IPConfigurations diff --git a/operator/cmd/provider_azure_flags.go b/operator/cmd/provider_azure_flags.go index a9a66e4e0fc66..188b4af7c5e80 100644 --- a/operator/cmd/provider_azure_flags.go +++ b/operator/cmd/provider_azure_flags.go @@ -34,5 +34,11 @@ func (hook *azureFlagsHooks) RegisterProviderFlag(cmd *cobra.Command, vp *viper. flags.Bool(operatorOption.AzureUsePrimaryAddress, false, "Use Azure IP address from interface's primary IPConfigurations") option.BindEnvWithLegacyEnvFallback(vp, operatorOption.AzureUsePrimaryAddress, "AZURE_USE_PRIMARY_ADDRESS") + flags.Bool(operatorOption.AzureReleaseExcessIPs, false, "Enable releasing excess free IP addresses from Azure network interfaces.") + option.BindEnv(vp, operatorOption.AzureReleaseExcessIPs) + + flags.Int(operatorOption.ExcessIPReleaseDelay, 180, "Number of seconds operator would wait before it releases an IP previously marked as excess") + option.BindEnv(vp, operatorOption.ExcessIPReleaseDelay) + vp.BindPFlags(flags) } diff --git a/operator/option/config.go b/operator/option/config.go index 9929b382b1da1..e15de724d6dbc 100644 --- a/operator/option/config.go +++ b/operator/option/config.go @@ -148,6 +148,11 @@ const ( // primary IPConfiguration AzureUsePrimaryAddress = "azure-use-primary-address" + // AzureReleaseExcessIPs allows releasing excess free IP addresses from + // Azure network interfaces. Enabling this option reduces waste of IP + // addresses but may increase the number of API calls to Azure. + AzureReleaseExcessIPs = "azure-release-excess-ips" + // LeaderElectionLeaseDuration is the duration that non-leader candidates will wait to // force acquire leadership LeaderElectionLeaseDuration = "leader-election-lease-duration" @@ -349,6 +354,10 @@ type OperatorConfig struct { // primary IPConfiguration AzureUsePrimaryAddress bool + // AzureReleaseExcessIPs allows releasing excess free IP addresses from + // Azure network interfaces. + AzureReleaseExcessIPs bool + // AlibabaCloud options // AlibabaCloudVPCID allow user to specific vpc @@ -465,6 +474,7 @@ func (c *OperatorConfig) Populate(logger *slog.Logger, vp *viper.Viper) { c.AzureSubscriptionID = vp.GetString(AzureSubscriptionID) c.AzureResourceGroup = vp.GetString(AzureResourceGroup) c.AzureUsePrimaryAddress = vp.GetBool(AzureUsePrimaryAddress) + c.AzureReleaseExcessIPs = vp.GetBool(AzureReleaseExcessIPs) c.AzureUserAssignedIdentityID = vp.GetString(AzureUserAssignedIdentityID) // AlibabaCloud options diff --git a/pkg/azure/api/api.go b/pkg/azure/api/api.go index 7f657918ce8c3..0b1b1d0b53c98 100644 --- a/pkg/azure/api/api.go +++ b/pkg/azure/api/api.go @@ -342,6 +342,14 @@ func parseInterface(iface *armnetwork.Interface, subnets ipamTypes.SubnetMap, us State: strings.ToLower(string(*ip.Properties.ProvisioningState)), } + if ip.Name != nil { + addr.SetIPConfigName(*ip.Name) + } + + if ip.Properties.Primary != nil { + addr.SetPrimary(*ip.Properties.Primary) + } + if ip.Properties.Subnet != nil { addr.Subnet = *ip.Properties.Subnet.ID if subnet, ok := subnets[addr.Subnet]; ok { @@ -837,6 +845,216 @@ func (c *Client) AssignPrivateIpAddressesVM(ctx context.Context, subnetID, inter return nil } +// PrimaryReleaseError is returned by Unassign* when the requested release set +// would drop one or more primary IPConfigurations from a NIC. Azure ARM +// rejects such updates with an opaque NetworkingInternalOperationError, so +// we detect the case pre-flight and refuse to issue the update. +// +// Returning this error keeps the IPAM framework from marking the IPs as +// released in CiliumNode.Status.IPAM.ReleaseIPs, leaving the CRD in sync +// with the live NIC state. +type PrimaryReleaseError struct { + // InterfaceName is the NIC the primary IPConfiguration belongs to. + InterfaceName string + // Items is either the IP addresses (VM path) or IPConfiguration names + // (VMSS path) of the primaries that were requested for release. + Items []string +} + +func (e *PrimaryReleaseError) Error() string { + return fmt.Sprintf("interface %s: refusing to release primary IPConfiguration(s) %v", e.InterfaceName, e.Items) +} + +// dropMatchingIPConfigsVM partitions ipConfigs into those to keep and those +// to drop based on releaseSet (set of IP addresses). Primary IPConfigurations +// are always retained even if their IP appears in releaseSet; their IPs are +// returned via primaryBlocked so the caller can refuse the update entirely. +func dropMatchingIPConfigsVM( + ipConfigs []*armnetwork.InterfaceIPConfiguration, + releaseSet map[string]struct{}, +) (kept []*armnetwork.InterfaceIPConfiguration, dropped int, primaryBlocked []string) { + kept = make([]*armnetwork.InterfaceIPConfiguration, 0, len(ipConfigs)) + for _, c := range ipConfigs { + if c == nil || c.Properties == nil || c.Properties.PrivateIPAddress == nil { + kept = append(kept, c) + continue + } + ip := *c.Properties.PrivateIPAddress + _, requested := releaseSet[ip] + isPrimary := c.Properties.Primary != nil && *c.Properties.Primary + switch { + case requested && isPrimary: + primaryBlocked = append(primaryBlocked, ip) + kept = append(kept, c) + case requested: + dropped++ + default: + kept = append(kept, c) + } + } + return +} + +// dropMatchingIPConfigsVMSS partitions ipConfigs into those to keep and those +// to drop based on releaseNames (set of IPConfiguration resource names). The +// VMSS compute model only carries names and the Primary flag — IPs live in +// the network model — so the caller passes names. Primary IPConfigurations +// are always retained. +func dropMatchingIPConfigsVMSS( + ipConfigs []*armcompute.VirtualMachineScaleSetIPConfiguration, + releaseNames map[string]struct{}, +) (kept []*armcompute.VirtualMachineScaleSetIPConfiguration, dropped int, primaryBlocked []string) { + kept = make([]*armcompute.VirtualMachineScaleSetIPConfiguration, 0, len(ipConfigs)) + for _, c := range ipConfigs { + if c == nil || c.Name == nil { + kept = append(kept, c) + continue + } + name := *c.Name + _, requested := releaseNames[name] + isPrimary := c.Properties != nil && c.Properties.Primary != nil && *c.Properties.Primary + switch { + case requested && isPrimary: + primaryBlocked = append(primaryBlocked, name) + kept = append(kept, c) + case requested: + dropped++ + default: + kept = append(kept, c) + } + } + return +} + +// UnassignPrivateIpAddressesVM unassigns the given private IP addresses from +// the named NIC of a standalone VM. +// +// The Azure network model carries privateIPAddress on each IPConfiguration, +// so matching by IP is straightforward. If any requested IP backs a primary +// IPConfiguration the function returns *PrimaryReleaseError without issuing +// the update. +func (c *Client) UnassignPrivateIpAddressesVM(ctx context.Context, interfaceName string, addresses []string) error { + if len(addresses) == 0 { + return nil + } + + c.limiter.Limit(ctx, interfacesGet) + sinceStart := spanstat.Start() + + iface, err := c.interfaces.Get(ctx, c.resourceGroup, interfaceName, nil) + c.metricsAPI.ObserveAPICall(interfacesGet, deriveStatus(err), sinceStart.Seconds()) + if err != nil { + return fmt.Errorf("failed to get standalone instance's interface %s: %w", interfaceName, err) + } + + releaseSet := make(map[string]struct{}, len(addresses)) + for _, ip := range addresses { + releaseSet[ip] = struct{}{} + } + + kept, dropped, primaryBlocked := dropMatchingIPConfigsVM(iface.Properties.IPConfigurations, releaseSet) + if len(primaryBlocked) > 0 { + return &PrimaryReleaseError{InterfaceName: interfaceName, Items: primaryBlocked} + } + if dropped == 0 { + return nil + } + iface.Properties.IPConfigurations = kept + + c.limiter.Limit(ctx, interfacesCreateOrUpdate) + sinceStart = spanstat.Start() + + poller, err := c.interfaces.BeginCreateOrUpdate(ctx, c.resourceGroup, interfaceName, iface.Interface, nil) + defer func() { + c.metricsAPI.ObserveAPICall(interfacesCreateOrUpdate, deriveStatus(err), sinceStart.Seconds()) + }() + if err != nil { + return fmt.Errorf("unable to update interface %s: %w", interfaceName, err) + } + if _, err := poller.PollUntilDone(ctx, nil); err != nil { + return fmt.Errorf("error while waiting for interface CreateOrUpdate to complete for %s: %w", interfaceName, err) + } + + return nil +} + +// UnassignPrivateIpAddressesVMSS unassigns the IPConfigurations identified by +// ipConfigNames from the named NIC of a VMSS instance. +// +// The Azure compute model exposes IPConfiguration name and Primary but not +// privateIPAddress, so the caller must translate IPs to IPConfiguration names +// using the in-memory mapping populated by parseInterface. If any requested +// name backs a primary IPConfiguration the function returns +// *PrimaryReleaseError without issuing the update. +func (c *Client) UnassignPrivateIpAddressesVMSS(ctx context.Context, instanceID, vmssName, interfaceName string, ipConfigNames []string) error { + if len(ipConfigNames) == 0 { + return nil + } + + vmssGetOptions := &armcompute.VirtualMachineScaleSetVMsClientGetOptions{ + Expand: to.Ptr(armcompute.InstanceViewTypesInstanceView), + } + + c.limiter.Limit(ctx, virtualMachineScaleSetVMsGet) + sinceStart := spanstat.Start() + + result, err := c.virtualMachineScaleSetVMs.Get(ctx, c.resourceGroup, vmssName, instanceID, vmssGetOptions) + c.metricsAPI.ObserveAPICall(virtualMachineScaleSetVMsGet, deriveStatus(err), sinceStart.Seconds()) + if err != nil { + return fmt.Errorf("failed to get VM %s from VMSS %s: %w", instanceID, vmssName, err) + } + + var netIfConfig *armcompute.VirtualMachineScaleSetNetworkConfiguration + if result.Properties.NetworkProfileConfiguration != nil { + for _, nic := range result.Properties.NetworkProfileConfiguration.NetworkInterfaceConfigurations { + if nic.Name != nil && *nic.Name == interfaceName { + netIfConfig = nic + break + } + } + } + if netIfConfig == nil { + return fmt.Errorf("interface %s does not exist in VM %s", interfaceName, instanceID) + } + + releaseNames := make(map[string]struct{}, len(ipConfigNames)) + for _, name := range ipConfigNames { + releaseNames[name] = struct{}{} + } + + kept, dropped, primaryBlocked := dropMatchingIPConfigsVMSS(netIfConfig.Properties.IPConfigurations, releaseNames) + if len(primaryBlocked) > 0 { + return &PrimaryReleaseError{InterfaceName: interfaceName, Items: primaryBlocked} + } + if dropped == 0 { + return nil + } + netIfConfig.Properties.IPConfigurations = kept + + // Unset imageReference for the same reason as AssignPrivateIpAddressesVMSS: + // preserves a possibly Azure-Compute-Gallery image reference on update. + // See https://github.com/Azure/AKS/issues/1819. + if result.Properties.StorageProfile != nil { + result.Properties.StorageProfile.ImageReference = nil + } + + c.limiter.Limit(ctx, virtualMachineScaleSetVMsUpdate) + sinceStart = spanstat.Start() + + poller, err := c.virtualMachineScaleSetVMs.BeginUpdate(ctx, c.resourceGroup, vmssName, instanceID, result.VirtualMachineScaleSetVM, nil) + defer func() { + c.metricsAPI.ObserveAPICall(virtualMachineScaleSetVMsUpdate, deriveStatus(err), sinceStart.Seconds()) + }() + if err != nil { + return fmt.Errorf("unable to update virtualMachineScaleSetVMs: %w", err) + } + if _, err := poller.PollUntilDone(ctx, nil); err != nil { + return fmt.Errorf("error while waiting for virtualMachineScaleSetVMs Update to complete: %w", err) + } + + return nil +} + // AssignPublicIPAddressesVMSS assigns a public IP to a VMSS instance. // The public IP is allocated from a Public IP Prefix matching publicIpTags func (c *Client) AssignPublicIPAddressesVMSS(ctx context.Context, instanceID, vmssName string, publicIpTags ipamTypes.Tags) (string, error) { diff --git a/pkg/azure/api/api_test.go b/pkg/azure/api/api_test.go index 2b53d98cda7f5..27882d05997a2 100644 --- a/pkg/azure/api/api_test.go +++ b/pkg/azure/api/api_test.go @@ -230,3 +230,145 @@ func TestParseSubnetID(t *testing.T) { }) } } + +func TestDropMatchingIPConfigsVM(t *testing.T) { + primary := func(ip string) *armnetwork.InterfaceIPConfiguration { + return &armnetwork.InterfaceIPConfiguration{ + Name: to.Ptr("primary-cfg"), + Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{ + PrivateIPAddress: to.Ptr(ip), + Primary: to.Ptr(true), + }, + } + } + secondary := func(name, ip string) *armnetwork.InterfaceIPConfiguration { + return &armnetwork.InterfaceIPConfiguration{ + Name: to.Ptr(name), + Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{ + PrivateIPAddress: to.Ptr(ip), + Primary: to.Ptr(false), + }, + } + } + + t.Run("drops requested non-primary IPs only", func(t *testing.T) { + input := []*armnetwork.InterfaceIPConfiguration{ + primary("10.0.0.1"), + secondary("a", "10.0.0.2"), + secondary("b", "10.0.0.3"), + secondary("c", "10.0.0.4"), + } + releaseSet := map[string]struct{}{"10.0.0.2": {}, "10.0.0.4": {}} + kept, dropped, blocked := dropMatchingIPConfigsVM(input, releaseSet) + require.Equal(t, 2, dropped) + require.Empty(t, blocked) + require.Len(t, kept, 2) + require.Equal(t, "10.0.0.1", *kept[0].Properties.PrivateIPAddress) + require.Equal(t, "10.0.0.3", *kept[1].Properties.PrivateIPAddress) + }) + + t.Run("primary in release set is reported and retained", func(t *testing.T) { + input := []*armnetwork.InterfaceIPConfiguration{ + primary("10.0.0.1"), + secondary("a", "10.0.0.2"), + } + releaseSet := map[string]struct{}{"10.0.0.1": {}, "10.0.0.2": {}} + kept, dropped, blocked := dropMatchingIPConfigsVM(input, releaseSet) + require.Equal(t, []string{"10.0.0.1"}, blocked) + require.Equal(t, 1, dropped) + require.Len(t, kept, 1) + require.Equal(t, "10.0.0.1", *kept[0].Properties.PrivateIPAddress) + }) + + t.Run("no overlap drops nothing", func(t *testing.T) { + input := []*armnetwork.InterfaceIPConfiguration{ + primary("10.0.0.1"), + secondary("a", "10.0.0.2"), + } + releaseSet := map[string]struct{}{"10.0.0.99": {}} + kept, dropped, blocked := dropMatchingIPConfigsVM(input, releaseSet) + require.Equal(t, 0, dropped) + require.Empty(t, blocked) + require.Len(t, kept, 2) + }) + + t.Run("nil properties retained", func(t *testing.T) { + input := []*armnetwork.InterfaceIPConfiguration{ + {Name: to.Ptr("nilprops")}, + secondary("a", "10.0.0.2"), + } + releaseSet := map[string]struct{}{"10.0.0.2": {}} + kept, dropped, blocked := dropMatchingIPConfigsVM(input, releaseSet) + require.Equal(t, 1, dropped) + require.Empty(t, blocked) + require.Len(t, kept, 1) + require.Equal(t, "nilprops", *kept[0].Name) + }) +} + +func TestDropMatchingIPConfigsVMSS(t *testing.T) { + primary := func(name string) *armcompute.VirtualMachineScaleSetIPConfiguration { + return &armcompute.VirtualMachineScaleSetIPConfiguration{ + Name: to.Ptr(name), + Properties: &armcompute.VirtualMachineScaleSetIPConfigurationProperties{ + Primary: to.Ptr(true), + }, + } + } + secondary := func(name string) *armcompute.VirtualMachineScaleSetIPConfiguration { + return &armcompute.VirtualMachineScaleSetIPConfiguration{ + Name: to.Ptr(name), + Properties: &armcompute.VirtualMachineScaleSetIPConfigurationProperties{ + Primary: to.Ptr(false), + }, + } + } + + t.Run("drops requested non-primary names only", func(t *testing.T) { + input := []*armcompute.VirtualMachineScaleSetIPConfiguration{ + primary("pods"), + secondary("pod-01"), + secondary("pod-02"), + secondary("pod-03"), + } + releaseNames := map[string]struct{}{"pod-01": {}, "pod-03": {}} + kept, dropped, blocked := dropMatchingIPConfigsVMSS(input, releaseNames) + require.Equal(t, 2, dropped) + require.Empty(t, blocked) + require.Len(t, kept, 2) + require.Equal(t, "pods", *kept[0].Name) + require.Equal(t, "pod-02", *kept[1].Name) + }) + + t.Run("primary name in release set is reported and retained", func(t *testing.T) { + input := []*armcompute.VirtualMachineScaleSetIPConfiguration{ + primary("pods"), + secondary("pod-01"), + } + releaseNames := map[string]struct{}{"pods": {}, "pod-01": {}} + kept, dropped, blocked := dropMatchingIPConfigsVMSS(input, releaseNames) + require.Equal(t, []string{"pods"}, blocked) + require.Equal(t, 1, dropped) + require.Len(t, kept, 1) + require.Equal(t, "pods", *kept[0].Name) + }) + + t.Run("nil name retained", func(t *testing.T) { + input := []*armcompute.VirtualMachineScaleSetIPConfiguration{ + {}, + secondary("pod-01"), + } + releaseNames := map[string]struct{}{"pod-01": {}} + kept, dropped, blocked := dropMatchingIPConfigsVMSS(input, releaseNames) + require.Equal(t, 1, dropped) + require.Empty(t, blocked) + require.Len(t, kept, 1) + require.Nil(t, kept[0].Name) + }) +} + +func TestPrimaryReleaseError(t *testing.T) { + err := &PrimaryReleaseError{InterfaceName: "pods", Items: []string{"pods", "pod-99"}} + require.Contains(t, err.Error(), "pods") + require.Contains(t, err.Error(), "pod-99") +} diff --git a/pkg/azure/api/mock/mock.go b/pkg/azure/api/mock/mock.go index d57379c267c93..ce1775356292b 100644 --- a/pkg/azure/api/mock/mock.go +++ b/pkg/azure/api/mock/mock.go @@ -13,6 +13,7 @@ import ( "golang.org/x/time/rate" "github.com/cilium/cilium/pkg/api/helpers" + azureAPI "github.com/cilium/cilium/pkg/azure/api" "github.com/cilium/cilium/pkg/azure/types" "github.com/cilium/cilium/pkg/ipam/service/ipallocator" ipamTypes "github.com/cilium/cilium/pkg/ipam/types" @@ -29,6 +30,8 @@ const ( GetVpcsAndSubnets GetSubnetsByIDs AssignPrivateIpAddressesVMSS + UnassignPrivateIpAddressesVM + UnassignPrivateIpAddressesVMSS MaxOperation ) @@ -45,15 +48,21 @@ type API struct { errors map[Operation]error delaySim *helpers.DelaySimulator limiter *rate.Limiter + // primaryIPs records IPs (or IPConfig names) that the mock should treat + // as primary IPConfigurations. Keyed by interface ID. When the release + // set intersects this set, the mock returns *api.PrimaryReleaseError to + // emulate the real client's pre-flight guard. + primaryIPs map[string]map[string]struct{} } func NewAPI(subnets []*ipamTypes.Subnet, vnets []*ipamTypes.VirtualNetwork) *API { api := &API{ - instances: ipamTypes.NewInstanceMap(), - subnets: map[string]*subnet{}, - vnets: map[string]*ipamTypes.VirtualNetwork{}, - errors: map[Operation]error{}, - delaySim: helpers.NewDelaySimulator(), + instances: ipamTypes.NewInstanceMap(), + subnets: map[string]*subnet{}, + vnets: map[string]*ipamTypes.VirtualNetwork{}, + errors: map[Operation]error{}, + delaySim: helpers.NewDelaySimulator(), + primaryIPs: map[string]map[string]struct{}{}, } api.UpdateSubnets(subnets) @@ -285,6 +294,187 @@ func (a *API) AssignPrivateIpAddressesVMSS(ctx context.Context, vmName, vmssName return nil } +// SetPrimaryIPs marks the given identifiers (IPs for the VM path or +// IPConfig names for the VMSS path) as primary on the named interface. +// Calls to UnassignPrivateIpAddresses* that intersect this set return +// *api.PrimaryReleaseError without mutating the mock state, mirroring the +// pre-flight guard in the real client. +func (a *API) SetPrimaryIPs(interfaceID string, items ...string) { + a.mutex.Lock() + defer a.mutex.Unlock() + if a.primaryIPs == nil { + a.primaryIPs = map[string]map[string]struct{}{} + } + set, ok := a.primaryIPs[interfaceID] + if !ok { + set = map[string]struct{}{} + a.primaryIPs[interfaceID] = set + } + for _, item := range items { + set[item] = struct{}{} + } +} + +// findPrimaryBlocked returns the subset of items that are recorded as +// primary on interfaceID via SetPrimaryIPs. +// Caller must hold a.mutex. +func (a *API) findPrimaryBlocked(interfaceID string, items []string) []string { + set, ok := a.primaryIPs[interfaceID] + if !ok { + return nil + } + var blocked []string + for _, item := range items { + if _, isPrimary := set[item]; isPrimary { + blocked = append(blocked, item) + } + } + return blocked +} + +func (a *API) UnassignPrivateIpAddressesVM(ctx context.Context, interfaceName string, addresses []string) error { + a.rateLimit() + a.delaySim.Delay(UnassignPrivateIpAddressesVM) + + a.mutex.Lock() + defer a.mutex.Unlock() + + if err, ok := a.errors[UnassignPrivateIpAddressesVM]; ok { + return err + } + if len(addresses) == 0 { + return nil + } + + if blocked := a.findPrimaryBlocked(interfaceName, addresses); len(blocked) > 0 { + return &azureAPI.PrimaryReleaseError{InterfaceName: interfaceName, Items: blocked} + } + + releaseSet := make(map[string]struct{}, len(addresses)) + for _, ip := range addresses { + releaseSet[ip] = struct{}{} + } + + instances := a.instances.DeepCopy() + foundInterface := false + err := instances.ForeachInterface("", func(_, _ string, iface ipamTypes.InterfaceRevision) error { + intf, ok := iface.Resource.(*types.AzureInterface) + if !ok { + return fmt.Errorf("invalid interface object") + } + if intf.Name != interfaceName || intf.GetVMScaleSetName() != "" { + return nil + } + foundInterface = true + intf.Addresses = a.dropAddressesByIP(intf, releaseSet) + return nil + }) + if err != nil { + return err + } + if !foundInterface { + return fmt.Errorf("interface %s not found", interfaceName) + } + + a.updateInstancesLocked(instances) + return nil +} + +func (a *API) UnassignPrivateIpAddressesVMSS(ctx context.Context, instanceID, vmssName, interfaceName string, ipConfigNames []string) error { + a.rateLimit() + a.delaySim.Delay(UnassignPrivateIpAddressesVMSS) + + a.mutex.Lock() + defer a.mutex.Unlock() + + if err, ok := a.errors[UnassignPrivateIpAddressesVMSS]; ok { + return err + } + if len(ipConfigNames) == 0 { + return nil + } + + releaseNames := make(map[string]struct{}, len(ipConfigNames)) + for _, name := range ipConfigNames { + releaseNames[name] = struct{}{} + } + + instances := a.instances.DeepCopy() + foundInterface := false + var ifaceID string + err := instances.ForeachInterface("", func(_, _ string, iface ipamTypes.InterfaceRevision) error { + intf, ok := iface.Resource.(*types.AzureInterface) + if !ok { + return fmt.Errorf("invalid interface object") + } + if intf.Name != interfaceName || intf.GetVMID() != instanceID || intf.GetVMScaleSetName() != vmssName { + return nil + } + foundInterface = true + ifaceID = intf.ID + intf.Addresses = a.dropAddressesByIPConfigName(intf, releaseNames) + return nil + }) + if err != nil { + return err + } + if !foundInterface { + return fmt.Errorf("interface %s not found on VM %s in VMSS %s", interfaceName, instanceID, vmssName) + } + + if blocked := a.findPrimaryBlocked(ifaceID, ipConfigNames); len(blocked) > 0 { + return &azureAPI.PrimaryReleaseError{InterfaceName: interfaceName, Items: blocked} + } + + a.updateInstancesLocked(instances) + return nil +} + +// dropAddressesByIP returns intf.Addresses with addresses in releaseSet +// removed, freeing their IPs back to the subnet allocator. +// Caller must hold a.mutex. +func (a *API) dropAddressesByIP(intf *types.AzureInterface, releaseSet map[string]struct{}) []types.AzureAddress { + kept := make([]types.AzureAddress, 0, len(intf.Addresses)) + for _, addr := range intf.Addresses { + if _, drop := releaseSet[addr.IP]; drop { + a.releaseToSubnetAllocator(addr) + continue + } + kept = append(kept, addr) + } + return kept +} + +// dropAddressesByIPConfigName returns intf.Addresses with addresses whose +// IPConfigName is in releaseNames removed, freeing their IPs back to the +// subnet allocator. +// Caller must hold a.mutex. +func (a *API) dropAddressesByIPConfigName(intf *types.AzureInterface, releaseNames map[string]struct{}) []types.AzureAddress { + kept := make([]types.AzureAddress, 0, len(intf.Addresses)) + for _, addr := range intf.Addresses { + if _, drop := releaseNames[addr.IPConfigName()]; drop { + a.releaseToSubnetAllocator(addr) + continue + } + kept = append(kept, addr) + } + return kept +} + +// releaseToSubnetAllocator returns addr.IP to the subnet allocator if known. +// Caller must hold a.mutex. +func (a *API) releaseToSubnetAllocator(addr types.AzureAddress) { + s, ok := a.subnets[addr.Subnet] + if !ok { + return + } + parsed := net.ParseIP(addr.IP) + if parsed == nil { + return + } + s.allocator.Release(parsed) +} + func (a *API) AssignPublicIPAddressesVMSS(ctx context.Context, instanceID, vmssName string, publicIpTags ipamTypes.Tags) (string, error) { a.rateLimit() return "mock-public-ip-prefix-id", nil diff --git a/pkg/azure/api/mock/mock_test.go b/pkg/azure/api/mock/mock_test.go index 0ca4e63a0f839..a6ada9784d551 100644 --- a/pkg/azure/api/mock/mock_test.go +++ b/pkg/azure/api/mock/mock_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/require" + azureAPI "github.com/cilium/cilium/pkg/azure/api" "github.com/cilium/cilium/pkg/azure/types" ipamTypes "github.com/cilium/cilium/pkg/ipam/types" ) @@ -109,3 +110,116 @@ func TestSetLimiter(t *testing.T) { _, err := api.GetInstances(t.Context(), ipamTypes.SubnetMap{}) require.NoError(t, err) } + +// addrWithName builds an AzureAddress with both IP and IPConfig name set. +// Helper for unassign tests. +func addrWithName(ip, name, subnet string) types.AzureAddress { + addr := types.AzureAddress{IP: ip, Subnet: subnet, State: types.StateSucceeded} + addr.SetIPConfigName(name) + return addr +} + +func TestUnassignPrivateIpAddressesVMSS(t *testing.T) { + cidr := netip.MustParsePrefix("10.0.0.0/16") + subnet := &ipamTypes.Subnet{ID: "s-1", CIDR: cidr, AvailableAddresses: 65534} + api := NewAPI([]*ipamTypes.Subnet{subnet}, []*ipamTypes.VirtualNetwork{{ID: "v-1"}}) + + const vmFullID = "/subscriptions/xxx/resourceGroups/g1/providers/Microsoft.Compute/virtualMachineScaleSets/vmss1/virtualMachines/0" + const ifaceID = vmFullID + "/networkInterfaces/pods" + // Per AzureInterface.extractIDs, GetVMID returns just the instance index. + const vmIndex = "0" + + resource := &types.AzureInterface{ + Name: "pods", + State: types.StateSucceeded, + Addresses: []types.AzureAddress{ + addrWithName("10.0.0.1", "pods", "s-1"), + addrWithName("10.0.0.2", "pod-01", "s-1"), + addrWithName("10.0.0.3", "pod-02", "s-1"), + }, + } + resource.SetID(ifaceID) + + instances := ipamTypes.NewInstanceMap() + instances.Update(vmFullID, ipamTypes.InterfaceRevision{Resource: resource.DeepCopy()}) + api.UpdateInstances(instances) + + t.Run("removes requested IPConfigurations", func(t *testing.T) { + err := api.UnassignPrivateIpAddressesVMSS(t.Context(), vmIndex, "vmss1", "pods", []string{"pod-02"}) + require.NoError(t, err) + + got, err := api.GetInstances(t.Context(), ipamTypes.SubnetMap{}) + require.NoError(t, err) + _ = got.ForeachInterface("", func(_, _ string, rev ipamTypes.InterfaceRevision) error { + intf := rev.Resource.(*types.AzureInterface) + require.Len(t, intf.Addresses, 2) + for _, a := range intf.Addresses { + require.NotEqual(t, "pod-02", a.IPConfigName()) + } + return nil + }) + }) + + t.Run("primary block returns PrimaryReleaseError without mutating", func(t *testing.T) { + api.SetPrimaryIPs(ifaceID, "pods") + err := api.UnassignPrivateIpAddressesVMSS(t.Context(), vmIndex, "vmss1", "pods", []string{"pods"}) + var pErr *azureAPI.PrimaryReleaseError + require.ErrorAs(t, err, &pErr) + require.Equal(t, "pods", pErr.InterfaceName) + require.Contains(t, pErr.Items, "pods") + + got, err := api.GetInstances(t.Context(), ipamTypes.SubnetMap{}) + require.NoError(t, err) + _ = got.ForeachInterface("", func(_, _ string, rev ipamTypes.InterfaceRevision) error { + intf := rev.Resource.(*types.AzureInterface) + names := make([]string, 0, len(intf.Addresses)) + for _, a := range intf.Addresses { + names = append(names, a.IPConfigName()) + } + require.Contains(t, names, "pods", "primary IPConfig must remain on the NIC") + return nil + }) + }) +} + +func TestUnassignPrivateIpAddressesVM(t *testing.T) { + cidr := netip.MustParsePrefix("10.0.0.0/16") + subnet := &ipamTypes.Subnet{ID: "s-1", CIDR: cidr, AvailableAddresses: 65534} + api := NewAPI([]*ipamTypes.Subnet{subnet}, []*ipamTypes.VirtualNetwork{{ID: "v-1"}}) + + const vmIfaceID = "/subscriptions/xxx/resourceGroups/g1/providers/Microsoft.Network/networkInterfaces/vm-if" + + resource := &types.AzureInterface{ + Name: "vm-if", + State: types.StateSucceeded, + Addresses: []types.AzureAddress{ + addrWithName("10.0.0.5", "primary", "s-1"), + addrWithName("10.0.0.6", "secondary", "s-1"), + }, + } + resource.SetID(vmIfaceID) + + instances := ipamTypes.NewInstanceMap() + instances.Update("vm-instance", ipamTypes.InterfaceRevision{Resource: resource.DeepCopy()}) + api.UpdateInstances(instances) + + t.Run("removes requested IP", func(t *testing.T) { + err := api.UnassignPrivateIpAddressesVM(t.Context(), "vm-if", []string{"10.0.0.6"}) + require.NoError(t, err) + got, err := api.GetInstances(t.Context(), ipamTypes.SubnetMap{}) + require.NoError(t, err) + _ = got.ForeachInterface("", func(_, _ string, rev ipamTypes.InterfaceRevision) error { + intf := rev.Resource.(*types.AzureInterface) + require.Len(t, intf.Addresses, 1) + require.Equal(t, "10.0.0.5", intf.Addresses[0].IP) + return nil + }) + }) + + t.Run("primary block returns error without mutation", func(t *testing.T) { + api.SetPrimaryIPs("vm-if", "10.0.0.5") + err := api.UnassignPrivateIpAddressesVM(t.Context(), "vm-if", []string{"10.0.0.5"}) + var pErr *azureAPI.PrimaryReleaseError + require.ErrorAs(t, err, &pErr) + }) +} diff --git a/pkg/azure/ipam/instances.go b/pkg/azure/ipam/instances.go index 3887c0e93bd81..8be67d324e930 100644 --- a/pkg/azure/ipam/instances.go +++ b/pkg/azure/ipam/instances.go @@ -27,6 +27,8 @@ type AzureAPI interface { GetSubnetsByIDs(ctx context.Context, nodeSubnetIDs []string) (ipamTypes.SubnetMap, error) AssignPrivateIpAddressesVM(ctx context.Context, subnetID, interfaceName string, addresses int) error AssignPrivateIpAddressesVMSS(ctx context.Context, instanceID, vmssName, subnetID, interfaceName string, addresses int) error + UnassignPrivateIpAddressesVM(ctx context.Context, interfaceName string, addresses []string) error + UnassignPrivateIpAddressesVMSS(ctx context.Context, instanceID, vmssName, interfaceName string, ipConfigNames []string) error AssignPublicIPAddressesVM(ctx context.Context, instanceID string, publicIpTags ipamTypes.Tags) (string, error) AssignPublicIPAddressesVMSS(ctx context.Context, instanceID, vmssName string, publicIpTags ipamTypes.Tags) (string, error) // New methods for optimization: fetch network interfaces once and parse multiple times diff --git a/pkg/azure/ipam/node.go b/pkg/azure/ipam/node.go index 73f59fe18a708..fba6daa069f7c 100644 --- a/pkg/azure/ipam/node.go +++ b/pkg/azure/ipam/node.go @@ -57,9 +57,55 @@ func (n *Node) PopulateStatusFields(k8sObj *v2.CiliumNode) { }) } -// PrepareIPRelease prepares the release of IPs +// PrepareIPRelease selects up to excessIPs free IP addresses on a single +// interface attached to this node and returns them as a ReleaseAction. An +// IP is considered "free" if it is in Succeeded state, not the NIC's +// primary IPConfiguration, and not present in CiliumNode.Status.IPAM.Used. +// +// Primary IPConfigurations are excluded here because Azure ARM rejects +// updates that drop them with an opaque NetworkingInternalOperationError +// 500 — and the failure is atomic for the entire batch, so a primary in +// the release set jams every secondary selected alongside it. The +// SDK-level Unassign* methods keep an additional pre-flight check as +// defense-in-depth in case a stale cache slips a primary through. func (n *Node) PrepareIPRelease(excessIPs int, scopedLog *slog.Logger) *ipam.ReleaseAction { - return &ipam.ReleaseAction{} + r := &ipam.ReleaseAction{} + requiredIfaceName := n.k8sObj.Spec.Azure.InterfaceName + usedIPs := n.k8sObj.Status.IPAM.Used + + n.manager.mutex.RLock() + defer n.manager.mutex.RUnlock() + + _ = n.manager.instances.ForeachInterface(n.node.InstanceID(), + func(_, _ string, ifaceObj ipamTypes.InterfaceRevision) error { + iface, ok := ifaceObj.Resource.(*types.AzureInterface) + if !ok { + return nil + } + if requiredIfaceName != "" && iface.Name != requiredIfaceName { + return nil + } + + free := freeIPsOnInterface(iface, usedIPs) + if len(free) == 0 { + return nil + } + maxRelease := min(len(free), excessIPs) + // Pick the interface with the most releasable IPs (matches AWS). + if r.IPsToRelease == nil || maxRelease > len(r.IPsToRelease) { + r.InterfaceID = iface.ID + r.PoolID = ipamTypes.PoolID(primarySubnetForRelease(iface, free[:maxRelease])) + r.IPsToRelease = free[:maxRelease] + scopedLog.Debug( + "Interface has unused IPs that can be released", + logfields.ID, iface.ID, + logfields.ExcessIPs, excessIPs, + logfields.IPAddrs, r.IPsToRelease, + ) + } + return nil + }) + return r } // ReleaseIPPrefixes is a no-op on Azure since Azure ENIs don't @@ -69,9 +115,113 @@ func (n *Node) ReleaseIPPrefixes(ctx context.Context, r *ipam.ReleaseAction) err return nil } -// ReleaseIPs performs the IP release operation +// ReleaseIPs performs the IP release operation. For VM (non-VMSS) +// interfaces, the network model has the IP on each IPConfiguration so we +// pass IPs to the SDK directly. For VMSS, the compute model only has +// IPConfiguration names, so we translate IPs to names using the in-memory +// mapping populated by parseInterface and pass the names to the SDK. func (n *Node) ReleaseIPs(ctx context.Context, r *ipam.ReleaseAction) error { - return fmt.Errorf("not implemented") + if len(r.IPsToRelease) == 0 { + return nil + } + + iface, err := n.findInterface(r.InterfaceID) + if err != nil { + return err + } + + if iface.GetVMScaleSetName() == "" { + return n.manager.api.UnassignPrivateIpAddressesVM(ctx, iface.Name, r.IPsToRelease) + } + + names, missing := ipsToConfigNames(iface, r.IPsToRelease) + if len(missing) > 0 { + return fmt.Errorf("interface %s: missing IPConfig name mapping for IPs %v (cache out of sync, will retry after next resync)", iface.Name, missing) + } + return n.manager.api.UnassignPrivateIpAddressesVMSS(ctx, iface.GetVMID(), iface.GetVMScaleSetName(), iface.Name, names) +} + +// freeIPsOnInterface returns the IPs on iface that are in Succeeded state, +// not the NIC's primary IPConfiguration, and absent from used. Order is the +// order returned by parseInterface, which is the order Azure listed the +// IPConfigurations. +func freeIPsOnInterface(iface *types.AzureInterface, used ipamTypes.AllocationMap) []string { + free := make([]string, 0, len(iface.Addresses)) + for _, a := range iface.Addresses { + if a.State != types.StateSucceeded { + continue + } + if a.Primary() { + continue + } + if _, inUse := used[a.IP]; inUse { + continue + } + free = append(free, a.IP) + } + return free +} + +// primarySubnetForRelease returns the subnet ID associated with the first +// IP in toRelease that has a known subnet, or "" if none. The IPAM +// framework uses PoolID for metric labelling and subnet pool accounting +// only — it does not affect correctness of the release. +func primarySubnetForRelease(iface *types.AzureInterface, toRelease []string) string { + wanted := make(map[string]struct{}, len(toRelease)) + for _, ip := range toRelease { + wanted[ip] = struct{}{} + } + for _, a := range iface.Addresses { + if _, ok := wanted[a.IP]; !ok { + continue + } + if a.Subnet != "" { + return a.Subnet + } + } + return "" +} + +// findInterface returns the AzureInterface with the given ID on this node, +// or an error if not found. Caller does not need to hold the manager mutex. +func (n *Node) findInterface(interfaceID string) (*types.AzureInterface, error) { + var found *types.AzureInterface + n.manager.mutex.RLock() + defer n.manager.mutex.RUnlock() + _ = n.manager.instances.ForeachInterface(n.node.InstanceID(), + func(_, _ string, ifaceObj ipamTypes.InterfaceRevision) error { + iface, ok := ifaceObj.Resource.(*types.AzureInterface) + if !ok { + return nil + } + if iface.ID == interfaceID { + found = iface + } + return nil + }) + if found == nil { + return nil, fmt.Errorf("interface %s not found on instance %s", interfaceID, n.node.InstanceID()) + } + return found, nil +} + +// ipsToConfigNames maps each IP in ips to its IPConfiguration name on iface. +// IPs without a known mapping are returned in missing. +func ipsToConfigNames(iface *types.AzureInterface, ips []string) (names, missing []string) { + byIP := make(map[string]string, len(iface.Addresses)) + for _, a := range iface.Addresses { + if name := a.IPConfigName(); name != "" { + byIP[a.IP] = name + } + } + for _, ip := range ips { + if name, ok := byIP[ip]; ok { + names = append(names, name) + } else { + missing = append(missing, ip) + } + } + return } // PrepareIPAllocation returns the number of IPs that can be allocated/created. diff --git a/pkg/azure/ipam/node_test.go b/pkg/azure/ipam/node_test.go index da0318d7bd793..4c75cc7d473ab 100644 --- a/pkg/azure/ipam/node_test.go +++ b/pkg/azure/ipam/node_test.go @@ -6,12 +6,160 @@ package ipam import ( "testing" + "github.com/cilium/hive/hivetest" "github.com/stretchr/testify/require" "github.com/cilium/cilium/pkg/azure/types" + ipamTypes "github.com/cilium/cilium/pkg/ipam/types" + v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" ) func TestGetMaximumAllocatableIPv4(t *testing.T) { n := &Node{} require.Equal(t, types.InterfaceAddressLimit, n.GetMaximumAllocatableIPv4()) } + +// fakeIpamNode is a minimal ipamNodeActions stub used by PrepareIPRelease tests. +type fakeIpamNode struct { + id string +} + +func (f *fakeIpamNode) InstanceID() string { return f.id } + +// addrWithName builds an AzureAddress with both IP and IPConfig name set. +func addrWithName(ip, name, subnet string) types.AzureAddress { + addr := types.AzureAddress{IP: ip, Subnet: subnet, State: types.StateSucceeded} + addr.SetIPConfigName(name) + return addr +} + +// addrPrimary builds a primary AzureAddress with IP and IPConfig name set. +func addrPrimary(ip, name, subnet string) types.AzureAddress { + addr := addrWithName(ip, name, subnet) + addr.SetPrimary(true) + return addr +} + +func TestPrepareIPRelease(t *testing.T) { + const instanceID = "/subscriptions/sub/resourceGroups/g/providers/Microsoft.Compute/virtualMachineScaleSets/vmss/virtualMachines/0" + + mkIface := func(name string, addresses []types.AzureAddress) *types.AzureInterface { + iface := &types.AzureInterface{Name: name, State: types.StateSucceeded, Addresses: addresses} + iface.SetID("/subscriptions/sub/resourceGroups/g/providers/Microsoft.Compute/virtualMachineScaleSets/vmss/virtualMachines/0/networkInterfaces/" + name) + return iface + } + mkNode := func(t *testing.T, ifaceFilter string, used ipamTypes.AllocationMap, ifaces ...*types.AzureInterface) *Node { + t.Helper() + mgr := &InstancesManager{instances: ipamTypes.NewInstanceMap()} + for _, iface := range ifaces { + mgr.instances.Update(instanceID, ipamTypes.InterfaceRevision{Resource: iface}) + } + k8sObj := &v2.CiliumNode{ + Spec: v2.NodeSpec{Azure: types.AzureSpec{InterfaceName: ifaceFilter}}, + Status: v2.NodeStatus{IPAM: ipamTypes.IPAMStatus{Used: used}}, + } + return &Node{k8sObj: k8sObj, manager: mgr, node: &fakeIpamNode{id: instanceID}} + } + log := hivetest.Logger(t) + + t.Run("includes free IPs and excludes used", func(t *testing.T) { + used := ipamTypes.AllocationMap{"10.0.0.3": ipamTypes.AllocationIP{}} + iface := mkIface("pods", []types.AzureAddress{ + addrWithName("10.0.0.1", "pods", "subnet-a"), + addrWithName("10.0.0.2", "pod-01", "subnet-a"), + addrWithName("10.0.0.3", "pod-02", "subnet-a"), + addrWithName("10.0.0.4", "pod-03", "subnet-a"), + }) + n := mkNode(t, "", used, iface) + r := n.PrepareIPRelease(2, log) + require.Equal(t, iface.ID, r.InterfaceID) + require.Equal(t, ipamTypes.PoolID("subnet-a"), r.PoolID) + require.Equal(t, []string{"10.0.0.1", "10.0.0.2"}, r.IPsToRelease) + }) + + t.Run("respects excessIPs cap", func(t *testing.T) { + used := ipamTypes.AllocationMap{} + iface := mkIface("pods", []types.AzureAddress{ + addrWithName("10.0.0.1", "pods", "subnet-a"), + addrWithName("10.0.0.2", "pod-01", "subnet-a"), + addrWithName("10.0.0.3", "pod-02", "subnet-a"), + addrWithName("10.0.0.4", "pod-03", "subnet-a"), + }) + n := mkNode(t, "", used, iface) + r := n.PrepareIPRelease(1, log) + require.Len(t, r.IPsToRelease, 1) + }) + + t.Run("excludes primary IPConfigurations to avoid jamming the batch", func(t *testing.T) { + // Mirrors the live arbok layout: primary first, free, on the + // filter-matched interface. Must not be selected. + used := ipamTypes.AllocationMap{} + iface := mkIface("pods", []types.AzureAddress{ + addrPrimary("10.0.0.1", "pods", "subnet-a"), + addrWithName("10.0.0.2", "pod-01", "subnet-a"), + addrWithName("10.0.0.3", "pod-02", "subnet-a"), + }) + n := mkNode(t, "", used, iface) + r := n.PrepareIPRelease(5, log) + require.NotContains(t, r.IPsToRelease, "10.0.0.1") + require.ElementsMatch(t, []string{"10.0.0.2", "10.0.0.3"}, r.IPsToRelease) + }) + + t.Run("Spec.Azure.InterfaceName filters interfaces", func(t *testing.T) { + used := ipamTypes.AllocationMap{} + host := mkIface("primary", []types.AzureAddress{ + addrWithName("10.0.0.1", "hosts", "subnet-host"), + }) + pods := mkIface("pods", []types.AzureAddress{ + addrWithName("10.1.0.1", "pods", "subnet-pods"), + addrWithName("10.1.0.2", "pod-01", "subnet-pods"), + }) + n := mkNode(t, "pods", used, host, pods) + r := n.PrepareIPRelease(5, log) + require.Equal(t, pods.ID, r.InterfaceID) + for _, ip := range r.IPsToRelease { + require.NotEqual(t, "10.0.0.1", ip, "must not pick host IP") + } + }) + + t.Run("picks interface with the most freeable IPs", func(t *testing.T) { + used := ipamTypes.AllocationMap{} + few := mkIface("few", []types.AzureAddress{ + addrWithName("10.0.0.1", "few-1", "subnet-a"), + }) + many := mkIface("many", []types.AzureAddress{ + addrWithName("10.1.0.1", "many-1", "subnet-b"), + addrWithName("10.1.0.2", "many-2", "subnet-b"), + addrWithName("10.1.0.3", "many-3", "subnet-b"), + }) + n := mkNode(t, "", used, few, many) + r := n.PrepareIPRelease(5, log) + require.Equal(t, many.ID, r.InterfaceID) + require.Len(t, r.IPsToRelease, 3) + }) + + t.Run("excludes addresses not in Succeeded state", func(t *testing.T) { + used := ipamTypes.AllocationMap{} + bad := types.AzureAddress{IP: "10.0.0.99", Subnet: "subnet-a", State: "failed"} + bad.SetIPConfigName("broken") + iface := mkIface("pods", []types.AzureAddress{ + addrWithName("10.0.0.1", "pods", "subnet-a"), + bad, + }) + n := mkNode(t, "", used, iface) + r := n.PrepareIPRelease(5, log) + require.NotContains(t, r.IPsToRelease, "10.0.0.99") + }) +} + +func TestIPsToConfigNames(t *testing.T) { + iface := &types.AzureInterface{ + Addresses: []types.AzureAddress{ + addrWithName("10.0.0.1", "pods", "s"), + addrWithName("10.0.0.2", "pod-01", "s"), + }, + } + names, missing := ipsToConfigNames(iface, []string{"10.0.0.1", "10.0.0.99"}) + require.Equal(t, []string{"pods"}, names) + require.Equal(t, []string{"10.0.0.99"}, missing) +} diff --git a/pkg/azure/types/types.go b/pkg/azure/types/types.go index cb5d7b8ebc242..3dcdb9148b24e 100644 --- a/pkg/azure/types/types.go +++ b/pkg/azure/types/types.go @@ -69,6 +69,47 @@ type AzureAddress struct { // State is the provisioning state of the address State string `json:"state,omitempty"` + + // ipConfigName is the Azure IPConfiguration resource name (e.g. "pods", + // "pod-01", or "Cilium-AB12CD34"). Required to drop IPConfigurations from + // VMSS compute models, which expose name + primary but not + // privateIPAddress. Populated at parse time from the network model and + // preserved across DeepCopy via the slice-element value copy. Not + // serialized to the CRD. + ipConfigName string `json:"-"` + + // primary mirrors IPConfiguration.Properties.Primary on the live Azure + // NIC. When --azure-use-primary-address=true the primary IP is added to + // the IPAM pool, so PrepareIPRelease has to filter it out — Azure ARM + // rejects updates that drop the primary IPConfiguration with an opaque + // 500 error, and that error is atomic for the entire batch (jamming any + // secondaries selected alongside it). Populated at parse time from the + // network model. Not serialized to the CRD. + primary bool `json:"-"` +} + +// SetIPConfigName records the Azure IPConfiguration resource name backing +// this address. +func (a *AzureAddress) SetIPConfigName(name string) { + a.ipConfigName = name +} + +// IPConfigName returns the Azure IPConfiguration resource name backing this +// address, or "" if unknown. +func (a AzureAddress) IPConfigName() string { + return a.ipConfigName +} + +// SetPrimary records whether this address backs the NIC's primary +// IPConfiguration. +func (a *AzureAddress) SetPrimary(primary bool) { + a.primary = primary +} + +// Primary returns whether this address backs the NIC's primary +// IPConfiguration. +func (a AzureAddress) Primary() bool { + return a.primary } // AzureInterface represents an Azure Interface diff --git a/pkg/azure/types/zz_generated.deepequal.go b/pkg/azure/types/zz_generated.deepequal.go index 8761f19495136..6c3ebed6284bd 100644 --- a/pkg/azure/types/zz_generated.deepequal.go +++ b/pkg/azure/types/zz_generated.deepequal.go @@ -24,6 +24,12 @@ func (in *AzureAddress) DeepEqual(other *AzureAddress) bool { if in.State != other.State { return false } + if in.ipConfigName != other.ipConfigName { + return false + } + if in.primary != other.primary { + return false + } return true } diff --git a/pkg/ipam/allocator/azure/azure.go b/pkg/ipam/allocator/azure/azure.go index 67453ec6c85a4..1bd905e5a2c36 100644 --- a/pkg/ipam/allocator/azure/azure.go +++ b/pkg/ipam/allocator/azure/azure.go @@ -83,7 +83,7 @@ func (a *AllocatorAzure) Start(ctx context.Context, getterUpdater ipam.CiliumNod return nil, fmt.Errorf("unable to create Azure client: %w", err) } instances := azureIPAM.NewInstancesManager(a.rootLogger, azureClient) - nodeManager, err := ipam.NewNodeManager(a.logger, instances, getterUpdater, iMetrics, operatorOption.Config.ParallelAllocWorkers, false, false) + nodeManager, err := ipam.NewNodeManager(a.logger, instances, getterUpdater, iMetrics, operatorOption.Config.ParallelAllocWorkers, operatorOption.Config.AzureReleaseExcessIPs, false) if err != nil { return nil, fmt.Errorf("unable to initialize Azure node manager: %w", err) }