Upgrade 3.3.x with latest from main (#672)

sebrandon1 · edcdavid · greyerof · web-flow · commit c52a77418743 · 2022-04-19T14:42:22.000-05:00
* Fix timeout (#663) * Bug fix for the NodeSelector handler. (#664) + Some traces fixed in the networking ts. * Diagnostic mode fix: close debug oc sessions before removing node lab… (#666) * Diagnostic mode fix: close debug oc sessions before removing node labels. This commit tries to fix the crash that happens when running the container in diagnostic mode: race condition when the program finishes, as the oc debug session were not closed before the labels are being removed from the nodes. * Revert "Fix timeout (#663)" This reverts commit 8710512. Co-authored-by: Salaheddine Hamadi <shamadi@redhat.com> * Timeout for debug pods increased to 5mins. (#669) + Retry interval also increased to 5 secs. * Container testing log level set to trace. (#665) Co-authored-by: Brandon Palm <bpalm@redhat.com> * lifecycle's terminationGracePeriod TC removed. (#667) Due to the issues on both the original and current implementation, the terminationGracePeriod TC will be removed. A new and improved version of this TC should be done, checking that after the configured (or defaulted) value, the pods were removed without any error code. Co-authored-by: Brandon Palm <bpalm@redhat.com> * Solution for issue on Nokia (#671) * Solution for issue on Nokia * Fixed the error Co-authored-by: Your Name <you@example.com> * Update Go to 1.17.9 (#668) * Fix missed version changes (#670) Co-authored-by: edcdavid <86730676+edcdavid@users.noreply.github.com> Co-authored-by: Gonzalo Reyero Ferreras <87083379+greyerof@users.noreply.github.com> Co-authored-by: Salaheddine Hamadi <shamadi@redhat.com> Co-authored-by: Shimrit Peretz <34240686+shimritproj@users.noreply.github.com> Co-authored-by: Your Name <you@example.com>
diff --git a/.github/workflows/merge.yaml b/.github/workflows/merge.yaml
@@ -15,7 +15,7 @@ jobs:
       - name: Set up Go 1.17
         uses: actions/setup-go@v2
         with:
-          go-version: 1.17.8
+          go-version: 1.17.9
 
       - name: Check out code into the Go module directory
         uses: actions/checkout@v2
diff --git a/.github/workflows/pre-main.yaml b/.github/workflows/pre-main.yaml
@@ -18,6 +18,7 @@ env:
   TNF_OUTPUT_DIR: /tmp/tnf/output
   TNF_SRC_URL: 'https://github.com/${{ github.repository }}'
   TESTING_CMD_PARAMS: '-n host -i ${REGISTRY_LOCAL}/${IMAGE_NAME}:${IMAGE_TAG} -t ${TNF_CONFIG_DIR} -o ${TNF_OUTPUT_DIR}'
+  CONTAINER_DIAGNOSTIC_LOG_LEVEL: trace
   TNF_PARTNER_DIR: '/usr/tnf-partner'
   TNF_PARTNER_SRC_DIR: '${TNF_PARTNER_DIR}/src'
   TERM: xterm-color
@@ -31,7 +32,7 @@ jobs:
       - name: Set up Go 1.17
         uses: actions/setup-go@v2
         with:
-          go-version: 1.17.8
+          go-version: 1.17.9
 
       - name: Disable default go problem matcher
         run: echo "::remove-matcher owner=go::"
@@ -82,7 +83,7 @@ jobs:
       - name: Set up Go 1.17
         uses: actions/setup-go@v2
         with:
-          go-version: 1.17.8
+          go-version: 1.17.9
 
       - name: Disable default go problem matcher
         run: echo "::remove-matcher owner=go::"
@@ -127,7 +128,7 @@ jobs:
       - name: Set up Go 1.17
         uses: actions/setup-go@v2
         with:
-          go-version: 1.17.8
+          go-version: 1.17.9
 
       - name: Disable default go problem matcher
         run: echo "::remove-matcher owner=go::"
@@ -141,7 +142,7 @@ jobs:
         run: go install github.com/golang/mock/mockgen@v1.6.0 && make mocks
 
       - name: Install ginkgo
-        run: go install github.com/onsi/ginkgo/ginkgo@v1.16.5
+        run: go install github.com/onsi/ginkgo/v2/ginkgo@v2.1.3
 
       - name: Execute `make build`
         run: make build
@@ -199,7 +200,7 @@ jobs:
         shell: bash
 
       - name: 'Test: Run without any TS, just get diagnostic information'
-        run: ./run-tnf-container.sh ${{ env.TESTING_CMD_PARAMS }}
+        run: LOG_LEVEL=${CONTAINER_DIAGNOSTIC_LOG_LEVEL} ./run-tnf-container.sh ${{ env.TESTING_CMD_PARAMS }}
           
       - name: 'Test: Run generic test suite in a TNF container'
         run: ./run-tnf-container.sh ${{ env.TESTING_CMD_PARAMS }} -f access-control lifecycle platform observability networking affiliated-certification operator
diff --git a/CATALOG.md b/CATALOG.md
@@ -221,18 +221,6 @@ Description|http://test-network-function.com/testcases/lifecycle/pod-scheduling
 Result Type|informative
 Suggested Remediation|In most cases, Pod's should not specify their host Nodes through nodeSelector or nodeAffinity.  However, there are cases in which CNFs require specialized hardware specific to a particular class of Node.  As such, this test is purely informative, and will not prevent a CNF from being certified. However, one should have an appropriate justification as to why nodeSelector and/or nodeAffinity is utilized by a CNF.
 Best Practice Reference|[CNF Best Practice V1.2](https://connect.redhat.com/sites/default/files/2021-03/Cloud%20Native%20Network%20Function%20Requirements.pdf) Section 6.2
-#### pod-termination-grace-period
-
-Property|Description
----|---
-Test Case Name|pod-termination-grace-period
-Test Case Label|lifecycle-pod-termination-grace-period
-Unique ID|http://test-network-function.com/testcases/lifecycle/pod-termination-grace-period
-Version|v1.0.0
-Description|http://test-network-function.com/testcases/lifecycle/pod-termination-grace-period tests whether the terminationGracePeriod is CNF-specific, or if the default (30s) is utilized.  This test is informative, and will not affect CNF Certification.  In many cases, the default terminationGracePeriod is perfectly acceptable for a CNF.
-Result Type|informative
-Suggested Remediation|Choose a terminationGracePeriod that is appropriate for your given CNF.  If the default (30s) is appropriate, then feel free to ignore this informative message.  This test is meant to raise awareness around how Pods are terminated, and to suggest that a CNF is configured based on its requirements.  In addition to a terminationGracePeriod, consider utilizing a termination hook in the case that your application requires special shutdown instructions.
-Best Practice Reference|[CNF Best Practice V1.2](https://connect.redhat.com/sites/default/files/2021-03/Cloud%20Native%20Network%20Function%20Requirements.pdf) Section 6.2
 #### readiness
 
 Property|Description
diff --git a/Dockerfile b/Dockerfile
@@ -13,12 +13,12 @@ ENV TEMP_DIR=/tmp
 
 # Install dependencies
 RUN yum install -y gcc git jq make wget
-RUN wget https://get.helm.sh/helm-v3.8.1-linux-amd64.tar.gz && \
-    tar -xvf helm-v3.8.1-linux-amd64.tar.gz && \
+RUN wget https://get.helm.sh/helm-v3.8.2-linux-amd64.tar.gz && \
+    tar -xvf helm-v3.8.2-linux-amd64.tar.gz && \
     cp linux-amd64/helm /usr/bin/helm
 # Install Go binary
 ENV GO_DL_URL="https://golang.org/dl"
-ENV GO_BIN_TAR="go1.17.8.linux-amd64.tar.gz"
+ENV GO_BIN_TAR="go1.17.9.linux-amd64.tar.gz"
 ENV GO_BIN_URL_x86_64=${GO_DL_URL}/${GO_BIN_TAR}
 ENV GOPATH="/root/go"
 RUN if [[ "$(uname -m)" -eq "x86_64" ]] ; then \
diff --git a/Makefile b/Makefile
@@ -128,8 +128,8 @@ install-tools:
 	go install github.com/onsi/ginkgo/v2/ginkgo@v2.1.3
 	go install github.com/onsi/gomega
 	go install github.com/golang/mock/mockgen@v1.6.0
-	wget https://get.helm.sh/helm-v3.8.1-linux-amd64.tar.gz && \
-    tar -xvf helm-v3.8.1-linux-amd64.tar.gz && \
+	wget https://get.helm.sh/helm-v3.8.2-linux-amd64.tar.gz && \
+    tar -xvf helm-v3.8.2-linux-amd64.tar.gz && \
     cp linux-amd64/helm /usr/local/bin/helm
 	
 # Install golangci-lint	
diff --git a/pkg/config/autodiscover/autodiscover_debug.go b/pkg/config/autodiscover/autodiscover_debug.go
@@ -31,14 +31,16 @@ import (
 )
 
 const (
-	defaultNamespace   = "default"
-	debugDaemonSet     = "debug"
-	debugLabelName     = "test-network-function.com/app"
-	debugLabelValue    = "debug"
-	nodeLabelName      = "test-network-function.com/node"
-	nodeLabelValue     = "target"
-	addlabelCommand    = "oc label node %s %s=%s --overwrite=true"
-	deletelabelCommand = "oc label node %s %s- --overwrite=true"
+	defaultNamespace    = "default"
+	debugDaemonSet      = "debug"
+	debugLabelName      = "test-network-function.com/app"
+	debugLabelValue     = "debug"
+	nodeLabelName       = "test-network-function.com/node"
+	nodeLabelValue      = "target"
+	addlabelCommand     = "oc label node %s %s=%s --overwrite=true"
+	deletelabelCommand  = "oc label node %s %s- --overwrite=true"
+	dsTimeoutMins       = 5
+	dsRetryIntervalSecs = 5
 )
 
 // FindDebugPods completes a `configsections.TestPartner.ContainersDebugList` from the current state of the cluster,
@@ -81,7 +83,7 @@ func CheckDebugDaemonset(expectedDebugPods int) {
 	gomega.Eventually(func() bool {
 		log.Debug("check debug daemonset status")
 		return checkDebugPodsReadiness(expectedDebugPods)
-	}, 60*time.Second, 2*time.Second).Should(gomega.Equal(true)) //nolint: gomnd
+	}, dsTimeoutMins*time.Minute, dsRetryIntervalSecs*time.Second).Should(gomega.Equal(true))
 }
 
 // checkDebugPodsReadiness helper function that returns true if the daemonset debug is deployed properly
diff --git a/pkg/tnf/handlers/nodeselector/nodeselector.go b/pkg/tnf/handlers/nodeselector/nodeselector.go
@@ -40,7 +40,7 @@ func NewNodeSelector(timeout time.Duration, podName, podNamespace string) *NodeS
 	return &NodeSelector{
 		timeout: timeout,
 		result:  tnf.ERROR,
-		args:    []string{"oc", "-n", podNamespace, "get", "pods", podName, "-o", "custom-columns=nodeselector:.spec.nodeSelector,nodeaffinity:.spec.nodeAffinity"},
+		args:    []string{"oc", "-n", podNamespace, "get", "pods", podName, "-o", "custom-columns=nodeselector:.spec.nodeSelector,nodeaffinity:.spec.affinity.nodeAffinity"},
 	}
 }
 
diff --git a/test-network-function/common/suite.go b/test-network-function/common/suite.go
@@ -31,12 +31,7 @@ func RemoveLabelsFromAllNodes() {
 	}
 }
 
-var _ = ginkgo.BeforeSuite(func() {
-})
-
-var _ = ginkgo.AfterSuite(func() {
-	// clean up added label to nodes
-	log.Info("Clean up added labels to nodes")
+func RemoveDebugPods() {
 	env = configpkg.GetTestEnvironment()
 	env.LoadAndRefresh()
 	for name, node := range env.NodesUnderTest {
@@ -46,4 +41,13 @@ var _ = ginkgo.AfterSuite(func() {
 		node.DebugContainer.CloseOc()
 		autodiscover.DeleteDebugLabel(name)
 	}
+}
+
+var _ = ginkgo.BeforeSuite(func() {
+})
+
+var _ = ginkgo.AfterSuite(func() {
+	// clean up added label to nodes
+	log.Info("Clean up added labels to nodes")
+	RemoveDebugPods()
 })
diff --git a/test-network-function/identifiers/identifiers.go b/test-network-function/identifiers/identifiers.go
@@ -101,11 +101,6 @@ var (
 		Url:     formTestURL(common.AccessControlTestKey, "namespace"),
 		Version: versionOne,
 	}
-	// TestNonDefaultGracePeriodIdentifier tests best grace period practices.
-	TestNonDefaultGracePeriodIdentifier = claim.Identifier{
-		Url:     formTestURL(common.LifecycleTestKey, "pod-termination-grace-period"),
-		Version: versionOne,
-	}
 	// TestNonTaintedNodeKernelsIdentifier is the identifier for the test checking tainted nodes.
 	TestNonTaintedNodeKernelsIdentifier = claim.Identifier{
 		Url:     formTestURL(common.PlatformAlterationTestKey, "tainted-node-kernel"),
@@ -402,20 +397,6 @@ tag. (2) It doesn't have any of the following prefixes: default, openshift-, ist
 		BestPracticeReference: bestPracticeDocV1dot2URL + " Section 6.2, 16.3.8 & 16.3.9",
 	},
 
-	TestNonDefaultGracePeriodIdentifier: {
-		Identifier: TestNonDefaultGracePeriodIdentifier,
-		Type:       informativeResult,
-		Remediation: `Choose a terminationGracePeriod that is appropriate for your given CNF.  If the default (30s) is appropriate, then feel
-free to ignore this informative message.  This test is meant to raise awareness around how Pods are terminated, and to
-suggest that a CNF is configured based on its requirements.  In addition to a terminationGracePeriod, consider utilizing
-a termination hook in the case that your application requires special shutdown instructions.`,
-		Description: formDescription(TestNonDefaultGracePeriodIdentifier,
-			`tests whether the terminationGracePeriod is CNF-specific, or if the default (30s) is utilized.  This test is
-informative, and will not affect CNF Certification.  In many cases, the default terminationGracePeriod is perfectly
-acceptable for a CNF.`),
-		BestPracticeReference: bestPracticeDocV1dot2URL + " Section 6.2",
-	},
-
 	TestNonTaintedNodeKernelsIdentifier: {
 		Identifier: TestNonTaintedNodeKernelsIdentifier,
 		Type:       normativeResult,
diff --git a/test-network-function/lifecycle/suite.go b/test-network-function/lifecycle/suite.go
@@ -17,7 +17,6 @@
 package lifecycle
 
 import (
-	"encoding/json"
 	"fmt"
 	"path"
 	"strings"
@@ -44,12 +43,11 @@ import (
 )
 
 const (
-	defaultTerminationGracePeriod = 30
-	baseNodeDrainTimeout          = 5 * time.Minute
-	maxNodeDrainTimeout           = 30 * time.Minute
-	scalingTimeout                = 1 * time.Minute
-	scalingPollingPeriod          = 1 * time.Second
-	postNodeDrainRecoveryTimeOut  = 2 * time.Minute
+	baseNodeDrainTimeout         = 5 * time.Minute
+	maxNodeDrainTimeout          = 30 * time.Minute
+	scalingTimeout               = 1 * time.Minute
+	scalingPollingPeriod         = 1 * time.Second
+	postNodeDrainRecoveryTimeOut = 2 * time.Minute
 )
 
 var (
@@ -127,8 +125,6 @@ var _ = ginkgo.Describe(common.LifecycleTestKey, func() {
 
 		testNodeSelector(env)
 
-		testGracePeriod(env)
-
 		testShutdown(env)
 
 		testLiveness(env)
@@ -332,124 +328,6 @@ func testNodeSelector(env *config.TestEnvironment) {
 	})
 }
 
-func testTerminationGracePeriodOnPodSet(podsetsUnderTests []configsections.PodSet, context *interactive.Context) []configsections.PodSet {
-	const ocCommandTemplate = "oc get %s %s -n %s -o jsonpath={.metadata.annotations\\.\"kubectl\\.kubernetes\\.io/last-applied-configuration\"}"
-
-	type lastAppliedConfigType struct {
-		Spec struct {
-			Template struct {
-				Spec struct {
-					TerminationGracePeriodSeconds int
-				}
-			}
-		}
-	}
-
-	badPodsets := []configsections.PodSet{}
-	for _, podset := range podsetsUnderTests {
-		ocCommand := fmt.Sprintf(ocCommandTemplate, podset.Type, podset.Name, podset.Namespace)
-		lastAppliedConfigString, err := utils.ExecuteCommand(ocCommand, common.DefaultTimeout, context)
-		if err != nil {
-			ginkgo.Fail(fmt.Sprintf("%s %s (ns %s): failed to get last-applied-configuration field", podset.Type, podset.Name, podset.Namespace))
-		}
-		lastAppliedConfig := lastAppliedConfigType{}
-
-		// Use -1 as default value, in case the param was not set.
-		lastAppliedConfig.Spec.Template.Spec.TerminationGracePeriodSeconds = -1
-
-		err = json.Unmarshal([]byte(lastAppliedConfigString), &lastAppliedConfig)
-		if err != nil {
-			ginkgo.Fail(fmt.Sprintf("%s %s (ns %s): failed to unmarshall last-applied-configuration string (%s)", podset.Type, podset.Name, podset.Namespace, lastAppliedConfigString))
-		}
-
-		if lastAppliedConfig.Spec.Template.Spec.TerminationGracePeriodSeconds == -1 {
-			tnf.ClaimFilePrintf("%s %s (ns %s) template's spec does not have a terminationGracePeriodSeconds value set. Default value (%d) will be used.",
-				podset.Type, podset.Name, podset.Namespace, defaultTerminationGracePeriod)
-			badPodsets = append(badPodsets, podset)
-		} else {
-			log.Infof("%s %s (ns %s) last-applied-configuration's terminationGracePeriodSeconds: %d", podset.Type, podset.Name, podset.Namespace, lastAppliedConfig.Spec.Template.Spec.TerminationGracePeriodSeconds)
-		}
-	}
-
-	return badPodsets
-}
-
-func testTerminationGracePeriodOnPods(pods []*configsections.Pod, context *interactive.Context) []configsections.Pod {
-	const ocCommandTemplate = "oc get pod %s -n %s -o jsonpath={.metadata.annotations\\.\"kubectl\\.kubernetes\\.io/last-applied-configuration\"}"
-
-	type lastAppliedConfigType struct {
-		Spec struct {
-			TerminationGracePeriodSeconds int
-		}
-	}
-
-	badPods := []configsections.Pod{}
-	numUnmanagedPods := 0
-	for _, pod := range pods {
-		// We'll process only "unmanaged" pods (not belonging to any deployment/statefulset) here.
-		if pod.IsManaged {
-			continue
-		}
-
-		numUnmanagedPods++
-
-		ocCommand := fmt.Sprintf(ocCommandTemplate, pod.Name, pod.Namespace)
-		lastAppliedConfigString, err := utils.ExecuteCommand(ocCommand, common.DefaultTimeout, context)
-		if err != nil {
-			ginkgo.Fail(fmt.Sprintf("Pod %s (ns %s): failed to get last-applied-configuration field", pod.Name, pod.Namespace))
-		}
-		lastAppliedConfig := lastAppliedConfigType{}
-
-		// Use -1 as default value, in case the param was not set.
-		lastAppliedConfig.Spec.TerminationGracePeriodSeconds = -1
-
-		err = json.Unmarshal([]byte(lastAppliedConfigString), &lastAppliedConfig)
-		if err != nil {
-			ginkgo.Fail(fmt.Sprintf("Pod %s (ns %s): failed to unmarshall last-applied-configuration string (%s)", pod.Name, pod.Namespace, lastAppliedConfigString))
-		}
-
-		if lastAppliedConfig.Spec.TerminationGracePeriodSeconds == -1 {
-			tnf.ClaimFilePrintf("Pod %s (ns %s) spec does not have a terminationGracePeriodSeconds value set. Default value (%d) will be used.",
-				pod.Name, pod.Namespace, defaultTerminationGracePeriod)
-			badPods = append(badPods, *pod)
-		} else {
-			log.Infof("Pod %s (ns %s) last-applied-configuration's terminationGracePeriodSeconds: %d", pod.Name, pod.Namespace, lastAppliedConfig.Spec.TerminationGracePeriodSeconds)
-		}
-
-		log.Debugf("Number of unamanaged pods processed: %d", numUnmanagedPods)
-	}
-	return badPods
-}
-
-func testGracePeriod(env *config.TestEnvironment) {
-	testID := identifiers.XformToGinkgoItIdentifier(identifiers.TestNonDefaultGracePeriodIdentifier)
-	ginkgo.It(testID, ginkgo.Label(testID), func() {
-		ginkgo.By("Test terminationGracePeriod")
-		context := env.GetLocalShellContext()
-
-		badDeployments := testTerminationGracePeriodOnPodSet(env.DeploymentsUnderTest, context)
-		badStatefulsets := testTerminationGracePeriodOnPodSet(env.StateFulSetUnderTest, context)
-		badPods := testTerminationGracePeriodOnPods(env.PodsUnderTest, context)
-
-		numDeps := len(badDeployments)
-		if numDeps > 0 {
-			log.Debugf("Deployments found without terminationGracePeriodSeconds param set: %+v", badDeployments)
-		}
-		numSts := len(badStatefulsets)
-		if numSts > 0 {
-			log.Debugf("Statefulsets found without terminationGracePeriodSeconds param set: %+v", badStatefulsets)
-		}
-		numPods := len(badPods)
-		if numPods > 0 {
-			log.Debugf("Pods found without terminationGracePeriodSeconds param set: %+v", badPods)
-		}
-
-		if numDeps > 0 || numSts > 0 || numPods > 0 {
-			ginkgo.Fail(fmt.Sprintf("Found %d deployments, %d statefulsets and %d pods without terminationGracePeriodSeconds param set.", numDeps, numSts, numPods))
-		}
-	})
-}
-
 //nolint:dupl
 func testShutdown(env *config.TestEnvironment) {
 	testID := identifiers.XformToGinkgoItIdentifier(identifiers.TestShudtownIdentifier)
diff --git a/test-network-function/networking/suite.go b/test-network-function/networking/suite.go
diff --git a/test-network-function/networking/suite_test.go b/test-network-function/networking/suite_test.go
diff --git a/test-network-function/networking/testdata/test_listening_port.json b/test-network-function/networking/testdata/test_listening_port.json
diff --git a/test-network-function/suite_test.go b/test-network-function/suite_test.go

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ func NewNodeSelector(timeout time.Duration, podName, podNamespace string) *NodeS`
`40`	`40`	`return &NodeSelector{`
`41`	`41`	`timeout: timeout,`
`42`	`42`	`result: tnf.ERROR,`
`43`		`- args: []string{"oc", "-n", podNamespace, "get", "pods", podName, "-o", "custom-columns=nodeselector:.spec.nodeSelector,nodeaffinity:.spec.nodeAffinity"},`
	`43`	`+ args: []string{"oc", "-n", podNamespace, "get", "pods", podName, "-o", "custom-columns=nodeselector:.spec.nodeSelector,nodeaffinity:.spec.affinity.nodeAffinity"},`
`44`	`44`	`}`
`45`	`45`	`}`
`46`	`46`