-
Notifications
You must be signed in to change notification settings - Fork 134
Open
Labels
lifecycle/frozenIndicates that an issue or PR should not be auto-closed due to staleness.Indicates that an issue or PR should not be auto-closed due to staleness.
Description
1 Bug phenomenon:
- The kcm(kube-controller-manager) rootCA is generated by manageServiceAccountCABundle in targerconfigcontroller, and this func will get kube-apiserver-server-ca cm first , and then use it and other two cm to generate kcm rootCA
- when sometimes(very unlikely to happen , but i have met this just one time) the kube-apiserver-server-ca cm is missing, and the manageServiceAccountCABundle generate rootCA without kube-apiserver-server-ca , and finally the kcm leader holds the wrong rootCA, it will lead to the kube-root-ca problem in every pod, and the ocp release wil not work until i stop the wrong kcm leader
2 Bug fix:
2.1. this bug can be resolved by adding kube-apiserver-server-ca check in manageServiceAccountCABundle of targerconfigcontroller ,as follows
Lines 706 to 720 in 4ca346e
func manageServiceAccountCABundle(ctx context.Context, lister corev1listers.ConfigMapLister, client corev1client.ConfigMapsGetter, recorder events.Recorder) (*corev1.ConfigMap, bool, error) { | |
requiredConfigMap, err := resourcesynccontroller.CombineCABundleConfigMaps( | |
resourcesynccontroller.ResourceLocation{Namespace: operatorclient.TargetNamespace, Name: "serviceaccount-ca"}, | |
lister, | |
// include the ca bundle needed to recognize the server | |
resourcesynccontroller.ResourceLocation{Namespace: operatorclient.GlobalMachineSpecifiedConfigNamespace, Name: "kube-apiserver-server-ca"}, | |
// include the ca bundle needed to recognize default | |
// certificates generated by cluster-ingress-operator | |
resourcesynccontroller.ResourceLocation{Namespace: operatorclient.GlobalMachineSpecifiedConfigNamespace, Name: "default-ingress-cert"}, | |
) | |
if err != nil { | |
return nil, false, err | |
} | |
return resourceapply.ApplyConfigMap(ctx, client, recorder, requiredConfigMap) | |
} |

2.1 maybe like this , but this is not the best way to resolve this problem

2.3 this is also can be resolved by modifying the openshift library func CombineCABundleConfigMaps in resourcesynccontroller as
Lines 17 to 67 in 4ca346e
func CombineCABundleConfigMaps(destinationConfigMap ResourceLocation, lister corev1listers.ConfigMapLister, inputConfigMaps ...ResourceLocation) (*corev1.ConfigMap, error) { | |
certificates := []*x509.Certificate{} | |
for _, input := range inputConfigMaps { | |
inputConfigMap, err := lister.ConfigMaps(input.Namespace).Get(input.Name) | |
if apierrors.IsNotFound(err) { | |
continue | |
} | |
if err != nil { | |
return nil, err | |
} | |
// configmaps must conform to this | |
inputContent := inputConfigMap.Data["ca-bundle.crt"] | |
if len(inputContent) == 0 { | |
continue | |
} | |
inputCerts, err := cert.ParseCertsPEM([]byte(inputContent)) | |
if err != nil { | |
return nil, fmt.Errorf("configmap/%s in %q is malformed: %v", input.Name, input.Namespace, err) | |
} | |
certificates = append(certificates, inputCerts...) | |
} | |
certificates = crypto.FilterExpiredCerts(certificates...) | |
finalCertificates := []*x509.Certificate{} | |
// now check for duplicates. n^2, but super simple | |
for i := range certificates { | |
found := false | |
for j := range finalCertificates { | |
if reflect.DeepEqual(certificates[i].Raw, finalCertificates[j].Raw) { | |
found = true | |
break | |
} | |
} | |
if !found { | |
finalCertificates = append(finalCertificates, certificates[i]) | |
} | |
} | |
caBytes, err := crypto.EncodeCertificates(finalCertificates...) | |
if err != nil { | |
return nil, err | |
} | |
return &corev1.ConfigMap{ | |
ObjectMeta: metav1.ObjectMeta{Namespace: destinationConfigMap.Namespace, Name: destinationConfigMap.Name}, | |
Data: map[string]string{ | |
"ca-bundle.crt": string(caBytes), | |
}, | |
}, nil | |
} |

3 This is related to the openshift library-go issue # issue 1472 github.com/openshift/library-go missing key configmap
Metadata
Metadata
Assignees
Labels
lifecycle/frozenIndicates that an issue or PR should not be auto-closed due to staleness.Indicates that an issue or PR should not be auto-closed due to staleness.