deislabs · vbehar · Oct 8, 2019 · Oct 11, 2019 · Oct 14, 2019 · Oct 14, 2019
@@ -17,11 +17,11 @@ Various types of Kubernetes resources can be Osiris-enabled using an annotation.
 Osiris-enabled pods are automatically instrumented with a __metrics-collecting
 proxy__ deployed as a sidecar container.
 
-Osiris-enabled deployments (if _already_ scaled to a configurable minimum number
-of replicas-- one by default) automatically have metrics from their pods
-continuously scraped and analyzed by the __zeroscaler__ component. When the
-aggregated metrics reveal that all of the deployment's pods are idling, the
-zeroscaler scales the deployment to zero replicas.
+Osiris-enabled deployments or statefulSets (if _already_ scaled to a configurable
+minimum number of replicas-- one by default) automatically have metrics from
+their pods continuously scraped and analyzed by the __zeroscaler__ component.
+When the aggregated metrics reveal that all of the deployment's pods are idling,
+the zeroscaler scales the deployment to zero replicas.
 
 Under normal circumstances, scaling a deployment to zero replicas poses a
 problem: any services that select pods from that deployment (and only that
@@ -178,14 +178,14 @@ spec:
 
 Most of Osiris configuration is done with Kubernetes annotations - as seen in the Usage section.
 
-#### Deployment Annotations
+#### Deployment & StatefulSet Annotations
 
-The following table lists the supported annotations for Kubernetes `Deployments` and their default values.
+The following table lists the supported annotations for Kubernetes `Deployments` and `StatefulSets`, and their default values.
 
 | Annotation | Description | Default |
 | ---------- | ----------- | ------- |
-| `osiris.deislabs.io/enabled` | Enable the zeroscaler component to scrape and analyze metrics from the deployment's pods and scale the deployment to zero when idle. Allowed values: `y`, `yes`, `true`, `on`, `1`. | _no value_ (= disabled) |
-| `osiris.deislabs.io/minReplicas` | The minimum number of replicas to set on the deployment when Osiris will scale up. If you set `2`, Osiris will scale the deployment from `0` to `2` replicas directly. Osiris won't collect metrics from deployments which have more than `minReplicas` replicas - to avoid useless collections of metrics. | `1` |
+| `osiris.deislabs.io/enabled` | Enable the zeroscaler component to scrape and analyze metrics from the deployment's or statefulSet's pods and scale the deployment/statefulSet to zero when idle. Allowed values: `y`, `yes`, `true`, `on`, `1`. | _no value_ (= disabled) |
+| `osiris.deislabs.io/minReplicas` | The minimum number of replicas to set on the deployment/statefulSet when Osiris will scale up. If you set `2`, Osiris will scale the deployment/statefulSet from `0` to `2` replicas directly. Osiris won't collect metrics from deployments/statefulSets which have more than `minReplicas` replicas - to avoid useless collections of metrics. | `1` |
 | `osiris.deislabs.io/metricsCheckInterval` | The interval in which Osiris would repeatedly track the pod http request metrics. The value is the number of seconds of the interval. Note that this value override the global value defined by the `zeroscaler.metricsCheckInterval` Helm value. | _value of the `zeroscaler.metricsCheckInterval` Helm value_ |
 
 #### Pod Annotations
@@ -205,6 +205,7 @@ The following table lists the supported annotations for Kubernetes `Services` an
 | ---------- | ----------- | ------- |
 | `osiris.deislabs.io/enabled` | Enable this service's endpoints to be managed by the Osiris endpoints controller. Allowed values: `y`, `yes`, `true`, `on`, `1`. | _no value_ (= disabled) |
 | `osiris.deislabs.io/deployment` | Name of the deployment which is behind this service. This is _required_ to map the service with its deployment. | _no value_ |
+| `osiris.deislabs.io/statefulset` | Name of the statefulSet which is behind this service. This is _required_ to map the service with its statefulSet. | _no value_ |
 | `osiris.deislabs.io/loadBalancerHostname` | Map requests coming from a specific hostname to this service. Note that if you have multiple hostnames, you can set them with different annotations, using `osiris.deislabs.io/loadBalancerHostname-1`, `osiris.deislabs.io/loadBalancerHostname-2`, ... | _no value_ |
 | `osiris.deislabs.io/ingressHostname` | Map requests coming from a specific hostname to this service. If you use an ingress in front of your service, this is required to create a link between the ingress and the service. Note that if you have multiple hostnames, you can set them with different annotations, using `osiris.deislabs.io/ingressHostname-1`, `osiris.deislabs.io/ingressHostname-2`, ... | _no value_ |
 | `osiris.deislabs.io/ingressDefaultPort` | Custom service port when the request comes from an ingress. Default behaviour if there are more than 1 port on the service, is to look for a port named `http`, and fallback to the port `80`. Set this if you have multiple ports and using a non-standard port with a non-standard name. | _no value_ |

@@ -32,6 +32,7 @@ rules:
   - apps
   resources:
   - deployments
+  - statefulsets
   verbs:
   - get
   - list

@@ -12,23 +12,23 @@ import (
 
 func (a *activator) activateDeployment(
 	app *app,
-) (*deploymentActivation, error) {
+) (*appActivation, error) {
 	deploymentsClient := a.kubeClient.AppsV1().Deployments(app.namespace)
 	deployment, err := deploymentsClient.Get(
-		app.deploymentName,
+		app.name,
 		metav1.GetOptions{},
 	)
 	if err != nil {
 		return nil, err
 	}
-	da := &deploymentActivation{
+	da := &appActivation{
 		readyAppPodIPs: map[string]struct{}{},
 		successCh:      make(chan struct{}),
 		timeoutCh:      make(chan struct{}),
 	}
 	glog.Infof(
 		"Activating deployment %s in namespace %s",
-		app.deploymentName,
+		app.name,
 		app.namespace,
 	)
 	go da.watchForCompletion(
@@ -50,7 +50,54 @@ func (a *activator) activateDeployment(
 	}}
 	patchesBytes, _ := json.Marshal(patches)
 	_, err = deploymentsClient.Patch(
-		app.deploymentName,
+		app.name,
+		k8s_types.JSONPatchType,
+		patchesBytes,
+	)
+	return da, err
+}
+
+func (a *activator) activateStatefulSet(
+	app *app,
+) (*appActivation, error) {
+	statefulSetsClient := a.kubeClient.AppsV1().StatefulSets(app.namespace)
+	statefulSet, err := statefulSetsClient.Get(
+		app.name,
+		metav1.GetOptions{},
+	)
+	if err != nil {
+		return nil, err
+	}
+	da := &appActivation{
+		readyAppPodIPs: map[string]struct{}{},
+		successCh:      make(chan struct{}),
+		timeoutCh:      make(chan struct{}),
+	}
+	glog.Infof(
+		"Activating statefulSet %s in namespace %s",
+		app.name,
+		app.namespace,
+	)
+	go da.watchForCompletion(
+		a.kubeClient,
+		app,
+		labels.Set(statefulSet.Spec.Selector.MatchLabels).AsSelector(),
+	)
+	if statefulSet.Spec.Replicas == nil || *statefulSet.Spec.Replicas > 0 {
+		// We don't need to do this, as it turns out! Scaling is either already
+		// in progress-- perhaps initiated by another process-- or may even be
+		// completed already. Just return dr and allow the caller to move on to
+		// verifying / waiting for this activation to be complete.
+		return da, nil
+	}
+	patches := []kubernetes.PatchOperation{{
+		Op:    "replace",
+		Path:  "/spec/replicas",
+		Value: kubernetes.GetMinReplicas(statefulSet.Annotations, 1),
+	}}
+	patchesBytes, _ := json.Marshal(patches)
+	_, err = statefulSetsClient.Patch(
+		app.name,
 		k8s_types.JSONPatchType,
 		patchesBytes,
 	)

@@ -29,8 +29,8 @@ type activator struct {
 	nodeAddresses             map[string]struct{}
 	appsByHost                map[string]*app
 	indicesLock               sync.RWMutex
-	deploymentActivations     map[string]*deploymentActivation
-	deploymentActivationsLock sync.Mutex
+	appActivations            map[string]*appActivation
+	appActivationsLock        sync.Mutex
 	dynamicProxyListenAddrStr string
 	dynamicProxy              tcp.DynamicProxy
 	httpClient                *http.Client
@@ -56,7 +56,7 @@ func NewActivator(kubeClient kubernetes.Interface) (Activator, error) {
 		services:                  map[string]*corev1.Service{},
 		nodeAddresses:             map[string]struct{}{},
 		appsByHost:                map[string]*app{},
-		deploymentActivations:     map[string]*deploymentActivation{},
+		appActivations:            map[string]*appActivation{},
 		httpClient: &http.Client{
 			Timeout: time.Minute * 1,
 		},
@@ -127,7 +127,7 @@ func (a *activator) syncService(obj interface{}) {
 	a.indicesLock.Lock()
 	defer a.indicesLock.Unlock()
 	svc := obj.(*corev1.Service)
-	svcKey := getKey(svc.Namespace, svc.Name)
+	svcKey := getKey(svc.Namespace, "Service", svc.Name)
 	if k8s.ResourceIsOsirisEnabled(svc.Annotations) {
 		a.services[svcKey] = svc
 	} else {
@@ -140,7 +140,7 @@ func (a *activator) syncDeletedService(obj interface{}) {
 	a.indicesLock.Lock()
 	defer a.indicesLock.Unlock()
 	svc := obj.(*corev1.Service)
-	svcKey := getKey(svc.Namespace, svc.Name)
+	svcKey := getKey(svc.Namespace, "Service", svc.Name)
 	delete(a.services, svcKey)
 	a.updateIndex()
 }

@@ -1,9 +1,17 @@
 package activator
 
+type appKind string
+
+const (
+	appKindDeployment  appKind = "Deployment"
+	appKindStatefulSet appKind = "StatefulSet"
+)
+
 type app struct {
-	namespace      string
-	serviceName    string
-	deploymentName string
-	targetHost     string
-	targetPort     int
+	namespace   string
+	serviceName string
+	name        string
+	kind        appKind
+	targetHost  string
+	targetPort  int
 }
@@ -14,34 +14,34 @@ import (
 	"k8s.io/client-go/tools/cache"
 )
 
-type deploymentActivation struct {
+type appActivation struct {
 	readyAppPodIPs map[string]struct{}
 	endpoints      *corev1.Endpoints
 	lock           sync.Mutex
 	successCh      chan struct{}
 	timeoutCh      chan struct{}
 }
 
-func (d *deploymentActivation) watchForCompletion(
+func (a *appActivation) watchForCompletion(
 	kubeClient kubernetes.Interface,
 	app *app,
 	appPodSelector labels.Selector,
 ) {
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
-	// Watch the pods managed by this deployment
+	// Watch the pods managed by this deployment/statefulSet
 	podsInformer := k8s.PodsIndexInformer(
 		kubeClient,
 		app.namespace,
 		nil,
 		appPodSelector,
 	)
 	podsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
-		AddFunc: d.syncPod,
+		AddFunc: a.syncPod,
 		UpdateFunc: func(_, newObj interface{}) {
-			d.syncPod(newObj)
+			a.syncPod(newObj)
 		},
-		DeleteFunc: d.syncPod,
+		DeleteFunc: a.syncPod,
 	})
 	// Watch the corresponding endpoints resource for this service
 	endpointsInformer := k8s.EndpointsIndexInformer(
@@ -54,9 +54,9 @@ func (d *deploymentActivation) watchForCompletion(
 		nil,
 	)
 	endpointsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
-		AddFunc: d.syncEndpoints,
+		AddFunc: a.syncEndpoints,
 		UpdateFunc: func(_, newObj interface{}) {
-			d.syncEndpoints(newObj)
+			a.syncEndpoints(newObj)
 		},
 	})
 	go podsInformer.Run(ctx.Done())
@@ -65,23 +65,24 @@ func (d *deploymentActivation) watchForCompletion(
 	defer timer.Stop()
 	for {
 		select {
-		case <-d.successCh:
+		case <-a.successCh:
 			return
 		case <-timer.C:
 			glog.Errorf(
-				"Activation of deployment %s in namespace %s timed out",
-				app.deploymentName,
+				"Activation of %s %s in namespace %s timed out",
+				app.kind,
+				app.name,
 				app.namespace,
 			)
-			close(d.timeoutCh)
+			close(a.timeoutCh)
 			return
 		}
 	}
 }
 
-func (d *deploymentActivation) syncPod(obj interface{}) {
-	d.lock.Lock()
-	defer d.lock.Unlock()
+func (a *appActivation) syncPod(obj interface{}) {
+	a.lock.Lock()
+	defer a.lock.Unlock()
 	pod := obj.(*corev1.Pod)
 	var ready bool
 	for _, condition := range pod.Status.Conditions {
@@ -94,27 +95,27 @@ func (d *deploymentActivation) syncPod(obj interface{}) {
 	}
 	// Keep track of which pods are ready
 	if ready {
-		d.readyAppPodIPs[pod.Status.PodIP] = struct{}{}
+		a.readyAppPodIPs[pod.Status.PodIP] = struct{}{}
 	} else {
-		delete(d.readyAppPodIPs, pod.Status.PodIP)
+		delete(a.readyAppPodIPs, pod.Status.PodIP)
 	}
-	d.checkActivationComplete()
+	a.checkActivationComplete()
 }
 
-func (d *deploymentActivation) syncEndpoints(obj interface{}) {
-	d.lock.Lock()
-	defer d.lock.Unlock()
-	d.endpoints = obj.(*corev1.Endpoints)
-	d.checkActivationComplete()
+func (a *appActivation) syncEndpoints(obj interface{}) {
+	a.lock.Lock()
+	defer a.lock.Unlock()
+	a.endpoints = obj.(*corev1.Endpoints)
+	a.checkActivationComplete()
 }
 
-func (d *deploymentActivation) checkActivationComplete() {
-	if d.endpoints != nil {
-		for _, subset := range d.endpoints.Subsets {
+func (a *appActivation) checkActivationComplete() {
+	if a.endpoints != nil {
+		for _, subset := range a.endpoints.Subsets {
 			for _, address := range subset.Addresses {
-				if _, ok := d.readyAppPodIPs[address.IP]; ok {
+				if _, ok := a.readyAppPodIPs[address.IP]; ok {
 					glog.Infof("App pod with ip %s is in service", address.IP)
-					close(d.successCh)
+					close(a.successCh)
 					return
 				}
 			}
-Original file line number
+Diff line change
@@ Expand Up / @@ -32,6 +32,7 @@ rules: @@
       - apps
       resources:
       - deployments
+      - statefulsets
       verbs:
       - get
       - list
@@ Expand Down @@