From 9daa25e05ea667478587c9d31a2c0f7a94afe23d Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Fri, 22 Sep 2023 10:22:11 +0200 Subject: [PATCH 01/65] Initial Event Stream API based Proof of Concept --- cmd/icinga2-notification-source/main.go | 66 +++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 cmd/icinga2-notification-source/main.go diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go new file mode 100644 index 00000000..7f9a5932 --- /dev/null +++ b/cmd/icinga2-notification-source/main.go @@ -0,0 +1,66 @@ +package main + +import ( + "crypto/tls" + "encoding/json" + "io" + "log" + "net/http" + "os" + "strings" +) + +func main() { + req, err := http.NewRequest(http.MethodPost, "https://localhost:5665/v1/events", strings.NewReader(`{"queue":"icinga-notifications","types":["StateChange","AcknowledgementSet","AcknowledgementCleared"]}`)) + if err != nil { + panic(err) + } + + req.SetBasicAuth("root", "icinga") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + + client := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + }, + } + + res, err := client.Do(req) + if err != nil { + panic(err) + } + + jsonR, jsonW := io.Pipe() + go func() { + _, err = io.Copy(io.MultiWriter(os.Stdout, jsonW), res.Body) + if err != nil { + panic(err) + } + }() + + dec := json.NewDecoder(jsonR) + for { + var event Icinga2Event + err := dec.Decode(&event) + if err != nil { + panic(err) + } + log.Printf("%#v", &event) + } +} + +type Icinga2Event struct { + Acknowledgement bool `json:"acknowledgement"` + CheckResult struct { + Output string `json:"output"` + } `json:"check_result"` + Host string `json:"host"` + Service string `json:"service"` + State int `json:"state"` + StateType int `json:"state_type"` + Timestamp float64 `json:"timestamp"` + Type string `json:"type"` +} From 78d01e8d50f37b49c9cc683f237ba612e8bd27f1 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 13 Oct 2023 16:39:56 +0200 Subject: [PATCH 02/65] eventstream: represent of Icinga 2 API objects This initial representation of Icinga 2 API objects in their JSON format aims to use Icinga 2's Event Stream API to get future notifications. The Go struct representations are not complete yet, as it is not yet clear how much information will be needed. --- internal/eventstream/api_responses.go | 193 ++++++++++++++ internal/eventstream/api_responses_test.go | 277 +++++++++++++++++++++ 2 files changed, 470 insertions(+) create mode 100644 internal/eventstream/api_responses.go create mode 100644 internal/eventstream/api_responses_test.go diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go new file mode 100644 index 00000000..b04995ef --- /dev/null +++ b/internal/eventstream/api_responses.go @@ -0,0 +1,193 @@ +package eventstream + +import ( + "encoding/json" + "fmt" +) + +// Comment represents the Icinga 2 API Comment object. +// +// NOTE: An empty Service field indicates a host comment. +// +// https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-comment +type Comment struct { + Host string `json:"host_name"` + Service string `json:"service_name,omitempty"` + Author string `json:"author"` + Text string `json:"text"` +} + +// CheckResult represents the Icinga 2 API CheckResult object. +// +// https://icinga.com/docs/icinga-2/latest/doc/08-advanced-topics/#advanced-value-types-checkresult +type CheckResult struct { + ExitStatus int `json:"exit_status"` + Output string `json:"output"` +} + +// Downtime represents the Icinga 2 API Downtime object. +// +// https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-downtime +type Downtime struct { + Host string `json:"host_name"` + Service string `json:"service_name,omitempty"` + Author string `json:"author"` + Comment string `json:"comment"` +} + +// StateChange represents the Icinga 2 API Event Stream StateChange response for host/service state changes. +// +// NOTE: An empty Service field indicates a host service. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-statechange +type StateChange struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Host string `json:"host"` + Service string `json:"service,omitempty"` + State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) + StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) + CheckResult CheckResult `json:"check_result"` + DowntimeDepth int `json:"downtime_depth"` + Acknowledgement bool `json:"acknowledgement"` +} + +// AcknowledgementSet represents the Icinga 2 API Event Stream AcknowledgementSet response for acknowledgements set on hosts/services. +// +// NOTE: An empty Service field indicates a host acknowledgement. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementset +type AcknowledgementSet struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Host string `json:"host"` + Service string `json:"service,omitempty"` + State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) + StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) + Author string `json:"author"` + Comment string `json:"comment"` +} + +// AcknowledgementCleared represents the Icinga 2 API Event Stream AcknowledgementCleared response for acknowledgements cleared on hosts/services. +// +// NOTE: An empty Service field indicates a host acknowledgement. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared +type AcknowledgementCleared struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Host string `json:"host"` + Service string `json:"service,omitempty"` + State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) + StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) +} + +// CommentAdded represents the Icinga 2 API Event Stream CommentAdded response for added host/service comments. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentadded +type CommentAdded struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Comment Comment `json:"comment"` +} + +// CommentRemoved represents the Icinga 2 API Event Stream CommentRemoved response for removed host/service comments. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentremoved +type CommentRemoved struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Comment Comment `json:"comment"` +} + +// DowntimeAdded represents the Icinga 2 API Event Stream DowntimeAdded response for added downtimes on host/services. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimeadded +type DowntimeAdded struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Downtime Downtime `json:"downtime"` +} + +// DowntimeRemoved represents the Icinga 2 API Event Stream DowntimeRemoved response for removed downtimes on host/services. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentremoved +type DowntimeRemoved struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Downtime Downtime `json:"downtime"` +} + +// DowntimeStarted represents the Icinga 2 API Event Stream DowntimeStarted response for started downtimes on host/services. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimestarted +type DowntimeStarted struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Downtime Downtime `json:"downtime"` +} + +// DowntimeTriggered represents the Icinga 2 API Event Stream DowntimeTriggered response for triggered downtimes on host/services. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimetriggered +type DowntimeTriggered struct { + Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Downtime Downtime `json:"downtime"` +} + +// UnmarshalEventStreamResponse unmarshal a JSON response line from the Icinga 2 API Event Stream. +func UnmarshalEventStreamResponse(data []byte) (any, error) { + // Due to the overlapping fields of the different Event Stream response objects, a struct composition with + // decompositions in different variables will result in multiple manual fixes. Thus, a two-way deserialization + // was chosen which selects the target type based on the first parsed type field. + + var responseType string + err := json.Unmarshal(data, &struct { + Type *string `json:"type"` + }{&responseType}) + if err != nil { + return nil, err + } + + switch responseType { + case "StateChange": + resp := StateChange{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "AcknowledgementSet": + resp := AcknowledgementSet{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "AcknowledgementCleared": + resp := AcknowledgementCleared{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "CommentAdded": + resp := CommentAdded{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "CommentRemoved": + resp := CommentRemoved{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "DowntimeAdded": + resp := DowntimeAdded{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "DowntimeRemoved": + resp := DowntimeRemoved{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "DowntimeStarted": + resp := DowntimeStarted{} + err = json.Unmarshal(data, &resp) + return resp, err + + case "DowntimeTriggered": + resp := DowntimeTriggered{} + err = json.Unmarshal(data, &resp) + return resp, err + + default: + return nil, fmt.Errorf("unsupported type %q", responseType) + } +} diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go new file mode 100644 index 00000000..bb19ab89 --- /dev/null +++ b/internal/eventstream/api_responses_test.go @@ -0,0 +1,277 @@ +package eventstream + +import ( + "reflect" + "testing" +) + +func TestApiResponseUnmarshal(t *testing.T) { + tests := []struct { + name string + jsonData string + isError bool + expected any + }{ + { + name: "invalid-json", + jsonData: `{":}"`, + isError: true, + }, + { + name: "unknown-type", + jsonData: `{"type": "ihopethisstringwillneverappearinicinga2asavalidtype"}`, + isError: true, + }, + { + name: "statechange-host-valid", + jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697188278.202986,"execution_start":1697188278.194409,"exit_status":2,"output":"If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway","performance_data":[],"previous_hard_state":99,"schedule_end":1697188278.203036,"schedule_start":1697188278.1938322,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":true,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":true,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-158","state":1,"state_type":0,"timestamp":1697188278.203504,"type":"StateChange"}`, + expected: StateChange{ + Timestamp: 1697188278.203504, + Host: "dummy-158", + State: 1, + StateType: 0, + CheckResult: CheckResult{ + ExitStatus: 2, + Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", + }, + DowntimeDepth: 0, + Acknowledgement: false, + }, + }, + { + name: "statechange-service-valid", + jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697184778.611465,"execution_start":1697184778.600973,"exit_status":2,"output":"You're growing out of some of your problems, but there are others that\nyou're growing into.","performance_data":[],"previous_hard_state":0,"schedule_end":1697184778.611557,"schedule_start":1697184778.6,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":false,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":false,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-280","service":"random fortune","state":2,"state_type":0,"timestamp":1697184778.612108,"type":"StateChange"}`, + expected: StateChange{ + Timestamp: 1697184778.612108, + Host: "dummy-280", + Service: "random fortune", + State: 2, + StateType: 0, + CheckResult: CheckResult{ + ExitStatus: 2, + Output: "You're growing out of some of your problems, but there are others that\nyou're growing into.", + }, + DowntimeDepth: 0, + Acknowledgement: false, + }, + }, + { + name: "acknowledgementset-host", + jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"working on it","expiry":0,"host":"dummy-805","notify":true,"persistent":false,"state":1,"state_type":1,"timestamp":1697201074.579106,"type":"AcknowledgementSet"}`, + expected: AcknowledgementSet{ + Timestamp: 1697201074.579106, + Host: "dummy-805", + State: 1, + StateType: 1, + Author: "icingaadmin", + Comment: "working on it", + }, + }, + { + name: "acknowledgementset-service", + jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"will be fixed soon","expiry":0,"host":"docker-master","notify":true,"persistent":false,"service":"ssh","state":2,"state_type":1,"timestamp":1697201107.64792,"type":"AcknowledgementSet"}`, + expected: AcknowledgementSet{ + Timestamp: 1697201107.64792, + Host: "docker-master", + Service: "ssh", + State: 2, + StateType: 1, + Author: "icingaadmin", + Comment: "will be fixed soon", + }, + }, + { + name: "acknowledgementcleared-host", + jsonData: `{"acknowledgement_type":0,"host":"dummy-805","state":1,"state_type":1,"timestamp":1697201082.440148,"type":"AcknowledgementCleared"}`, + expected: AcknowledgementCleared{ + Timestamp: 1697201082.440148, + Host: "dummy-805", + State: 1, + StateType: 1, + }, + }, + { + name: "acknowledgementcleared-service", + jsonData: `{"acknowledgement_type":0,"host":"docker-master","service":"ssh","state":2,"state_type":1,"timestamp":1697201110.220349,"type":"AcknowledgementCleared"}`, + expected: AcknowledgementCleared{ + Timestamp: 1697201110.220349, + Host: "docker-master", + Service: "ssh", + State: 2, + StateType: 1, + }, + }, + { + name: "commentadded-host", + jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191791.099201,"type":"CommentAdded"}`, + expected: CommentAdded{ + Timestamp: 1697191791.099201, + Comment: Comment{ + Host: "dummy-912", + Author: "icingaadmin", + Text: "oh noes", + }, + }, + }, + { + name: "commentadded-service", + jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197990.037244,"type":"CommentAdded"}`, + expected: CommentAdded{ + Timestamp: 1697197990.037244, + Comment: Comment{ + Host: "dummy-912", + Service: "ping4", + Author: "icingaadmin", + Text: "if in doubt, check ticket #23", + }, + }, + }, + { + name: "commentremoved-host", + jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191807.910093,"type":"CommentRemoved"}`, + expected: CommentRemoved{ + Timestamp: 1697191807.910093, + Comment: Comment{ + Host: "dummy-912", + Author: "icingaadmin", + Text: "oh noes", + }, + }, + }, + { + name: "commentremoved-service", + jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197996.584392,"type":"CommentRemoved"}`, + expected: CommentRemoved{ + Timestamp: 1697197996.584392, + Comment: Comment{ + Host: "dummy-912", + Service: "ping4", + Author: "icingaadmin", + Text: "if in doubt, check ticket #23", + }, + }, + }, + { + name: "downtimeadded-host", + jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511293,"type":"DowntimeAdded"}`, + expected: DowntimeAdded{ + Timestamp: 1697207050.511293, + Downtime: Downtime{ + Host: "dummy-157", + Author: "icingaadmin", + Comment: "updates", + }, + }, + }, + { + name: "downtimeadded-service", + jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217425,"type":"DowntimeAdded"}`, + expected: DowntimeAdded{ + Timestamp: 1697207141.217425, + Downtime: Downtime{ + Host: "docker-master", + Service: "http", + Author: "icingaadmin", + Comment: "broken until Monday", + }, + }, + }, + { + name: "downtimestarted-host", + jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511378,"type":"DowntimeStarted"}`, + expected: DowntimeStarted{ + Timestamp: 1697207050.511378, + Downtime: Downtime{ + Host: "dummy-157", + Author: "icingaadmin", + Comment: "updates", + }, + }, + }, + { + name: "downtimestarted-service", + jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217507,"type":"DowntimeStarted"}`, + expected: DowntimeStarted{ + Timestamp: 1697207141.217507, + Downtime: Downtime{ + Host: "docker-master", + Service: "http", + Author: "icingaadmin", + Comment: "broken until Monday", + }, + }, + }, + { + name: "downtimetriggered-host", + jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511608,"type":"DowntimeTriggered"}`, + expected: DowntimeTriggered{ + Timestamp: 1697207050.511608, + Downtime: Downtime{ + Host: "dummy-157", + Author: "icingaadmin", + Comment: "updates", + }, + }, + }, + { + name: "downtimetriggered-service", + jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217726,"type":"DowntimeTriggered"}`, + expected: DowntimeTriggered{ + Timestamp: 1697207141.217726, + Downtime: Downtime{ + Host: "docker-master", + Service: "http", + Author: "icingaadmin", + Comment: "broken until Monday", + }, + }, + }, + { + name: "downtimeremoved-host", + jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":1697207096.187718,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207096.187866,"type":"DowntimeRemoved"}`, + expected: DowntimeRemoved{ + Timestamp: 1697207096.187866, + Downtime: Downtime{ + Host: "dummy-157", + Author: "icingaadmin", + Comment: "updates", + }, + }, + }, + { + name: "downtimeremoved-service", + jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":1697207144.746117,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207144.746333,"type":"DowntimeRemoved"}`, + expected: DowntimeRemoved{ + Timestamp: 1697207144.746333, + Downtime: Downtime{ + Host: "docker-master", + Service: "http", + Author: "icingaadmin", + Comment: "broken until Monday", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + resp, err := UnmarshalEventStreamResponse([]byte(test.jsonData)) + if (err != nil) != test.isError { + t.Errorf("unexpected error state; got error: %t, expected: %t", err != nil, test.isError) + return + } + if err != nil { + if !test.isError { + t.Error(err) + } + return + } + + if !reflect.DeepEqual(resp, test.expected) { + t.Logf("got: %#v", resp) + t.Logf("expected: %#v", test.expected) + t.Error("unexpected response") + } + }) + } +} From c472f8d3f425e5d3f5d97049655b74627b16d500 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 16 Oct 2023 09:42:01 +0200 Subject: [PATCH 03/65] eventstream: custom Icinga2Time to parse time --- internal/eventstream/api_responses.go | 68 +++++++++----- internal/eventstream/api_responses_test.go | 101 +++++++++++++++++---- 2 files changed, 126 insertions(+), 43 deletions(-) diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index b04995ef..bd33c564 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -3,8 +3,26 @@ package eventstream import ( "encoding/json" "fmt" + "strconv" + "time" ) +// Icinga2Time is a custom time.Time type for JSON unmarshalling from Icinga 2's unix timestamp type. +type Icinga2Time struct { + time.Time +} + +func (iciTime *Icinga2Time) UnmarshalJSON(data []byte) error { + unixTs, err := strconv.ParseFloat(string(data), 64) + if err != nil { + return err + } + + unixMicro := int64(unixTs * 1_000_000) + iciTime.Time = time.UnixMicro(unixMicro) + return nil +} + // Comment represents the Icinga 2 API Comment object. // // NOTE: An empty Service field indicates a host comment. @@ -41,7 +59,7 @@ type Downtime struct { // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-statechange type StateChange struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp + Timestamp Icinga2Time `json:"timestamp"` Host string `json:"host"` Service string `json:"service,omitempty"` State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) @@ -57,13 +75,13 @@ type StateChange struct { // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementset type AcknowledgementSet struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Host string `json:"host"` - Service string `json:"service,omitempty"` - State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) - StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) - Author string `json:"author"` - Comment string `json:"comment"` + Timestamp Icinga2Time `json:"timestamp"` + Host string `json:"host"` + Service string `json:"service,omitempty"` + State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) + StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) + Author string `json:"author"` + Comment string `json:"comment"` } // AcknowledgementCleared represents the Icinga 2 API Event Stream AcknowledgementCleared response for acknowledgements cleared on hosts/services. @@ -72,59 +90,59 @@ type AcknowledgementSet struct { // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared type AcknowledgementCleared struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Host string `json:"host"` - Service string `json:"service,omitempty"` - State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) - StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) + Timestamp Icinga2Time `json:"timestamp"` + Host string `json:"host"` + Service string `json:"service,omitempty"` + State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) + StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) } // CommentAdded represents the Icinga 2 API Event Stream CommentAdded response for added host/service comments. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentadded type CommentAdded struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Comment Comment `json:"comment"` + Timestamp Icinga2Time `json:"timestamp"` + Comment Comment `json:"comment"` } // CommentRemoved represents the Icinga 2 API Event Stream CommentRemoved response for removed host/service comments. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentremoved type CommentRemoved struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Comment Comment `json:"comment"` + Timestamp Icinga2Time `json:"timestamp"` + Comment Comment `json:"comment"` } // DowntimeAdded represents the Icinga 2 API Event Stream DowntimeAdded response for added downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimeadded type DowntimeAdded struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Downtime Downtime `json:"downtime"` + Timestamp Icinga2Time `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // DowntimeRemoved represents the Icinga 2 API Event Stream DowntimeRemoved response for removed downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentremoved type DowntimeRemoved struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Downtime Downtime `json:"downtime"` + Timestamp Icinga2Time `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // DowntimeStarted represents the Icinga 2 API Event Stream DowntimeStarted response for started downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimestarted type DowntimeStarted struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Downtime Downtime `json:"downtime"` + Timestamp Icinga2Time `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // DowntimeTriggered represents the Icinga 2 API Event Stream DowntimeTriggered response for triggered downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimetriggered type DowntimeTriggered struct { - Timestamp float64 `json:"timestamp"` // TODO: own type for float64 UNIX time stamp - Downtime Downtime `json:"downtime"` + Timestamp Icinga2Time `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // UnmarshalEventStreamResponse unmarshal a JSON response line from the Icinga 2 API Event Stream. diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index bb19ab89..1974cffe 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -1,8 +1,10 @@ package eventstream import ( + "encoding/json" "reflect" "testing" + "time" ) func TestApiResponseUnmarshal(t *testing.T) { @@ -26,7 +28,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "statechange-host-valid", jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697188278.202986,"execution_start":1697188278.194409,"exit_status":2,"output":"If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway","performance_data":[],"previous_hard_state":99,"schedule_end":1697188278.203036,"schedule_start":1697188278.1938322,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":true,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":true,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-158","state":1,"state_type":0,"timestamp":1697188278.203504,"type":"StateChange"}`, expected: StateChange{ - Timestamp: 1697188278.203504, + Timestamp: Icinga2Time{time.UnixMicro(1697188278203504)}, Host: "dummy-158", State: 1, StateType: 0, @@ -42,7 +44,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "statechange-service-valid", jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697184778.611465,"execution_start":1697184778.600973,"exit_status":2,"output":"You're growing out of some of your problems, but there are others that\nyou're growing into.","performance_data":[],"previous_hard_state":0,"schedule_end":1697184778.611557,"schedule_start":1697184778.6,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":false,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":false,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-280","service":"random fortune","state":2,"state_type":0,"timestamp":1697184778.612108,"type":"StateChange"}`, expected: StateChange{ - Timestamp: 1697184778.612108, + Timestamp: Icinga2Time{time.UnixMicro(1697184778612108)}, Host: "dummy-280", Service: "random fortune", State: 2, @@ -59,7 +61,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementset-host", jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"working on it","expiry":0,"host":"dummy-805","notify":true,"persistent":false,"state":1,"state_type":1,"timestamp":1697201074.579106,"type":"AcknowledgementSet"}`, expected: AcknowledgementSet{ - Timestamp: 1697201074.579106, + Timestamp: Icinga2Time{time.UnixMicro(1697201074579106)}, Host: "dummy-805", State: 1, StateType: 1, @@ -71,7 +73,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementset-service", jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"will be fixed soon","expiry":0,"host":"docker-master","notify":true,"persistent":false,"service":"ssh","state":2,"state_type":1,"timestamp":1697201107.64792,"type":"AcknowledgementSet"}`, expected: AcknowledgementSet{ - Timestamp: 1697201107.64792, + Timestamp: Icinga2Time{time.UnixMicro(1697201107647920)}, Host: "docker-master", Service: "ssh", State: 2, @@ -84,7 +86,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementcleared-host", jsonData: `{"acknowledgement_type":0,"host":"dummy-805","state":1,"state_type":1,"timestamp":1697201082.440148,"type":"AcknowledgementCleared"}`, expected: AcknowledgementCleared{ - Timestamp: 1697201082.440148, + Timestamp: Icinga2Time{time.UnixMicro(1697201082440148)}, Host: "dummy-805", State: 1, StateType: 1, @@ -94,7 +96,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementcleared-service", jsonData: `{"acknowledgement_type":0,"host":"docker-master","service":"ssh","state":2,"state_type":1,"timestamp":1697201110.220349,"type":"AcknowledgementCleared"}`, expected: AcknowledgementCleared{ - Timestamp: 1697201110.220349, + Timestamp: Icinga2Time{time.UnixMicro(1697201110220349)}, Host: "docker-master", Service: "ssh", State: 2, @@ -105,7 +107,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentadded-host", jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191791.099201,"type":"CommentAdded"}`, expected: CommentAdded{ - Timestamp: 1697191791.099201, + Timestamp: Icinga2Time{time.UnixMicro(1697191791099201)}, Comment: Comment{ Host: "dummy-912", Author: "icingaadmin", @@ -117,7 +119,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentadded-service", jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197990.037244,"type":"CommentAdded"}`, expected: CommentAdded{ - Timestamp: 1697197990.037244, + Timestamp: Icinga2Time{time.UnixMicro(1697197990037244)}, Comment: Comment{ Host: "dummy-912", Service: "ping4", @@ -130,7 +132,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentremoved-host", jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191807.910093,"type":"CommentRemoved"}`, expected: CommentRemoved{ - Timestamp: 1697191807.910093, + Timestamp: Icinga2Time{time.UnixMicro(1697191807910093)}, Comment: Comment{ Host: "dummy-912", Author: "icingaadmin", @@ -142,7 +144,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentremoved-service", jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197996.584392,"type":"CommentRemoved"}`, expected: CommentRemoved{ - Timestamp: 1697197996.584392, + Timestamp: Icinga2Time{time.UnixMicro(1697197996584392)}, Comment: Comment{ Host: "dummy-912", Service: "ping4", @@ -155,7 +157,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeadded-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511293,"type":"DowntimeAdded"}`, expected: DowntimeAdded{ - Timestamp: 1697207050.511293, + Timestamp: Icinga2Time{time.UnixMicro(1697207050511293)}, Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -167,7 +169,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeadded-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217425,"type":"DowntimeAdded"}`, expected: DowntimeAdded{ - Timestamp: 1697207141.217425, + Timestamp: Icinga2Time{time.UnixMicro(1697207141217425)}, Downtime: Downtime{ Host: "docker-master", Service: "http", @@ -180,7 +182,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimestarted-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511378,"type":"DowntimeStarted"}`, expected: DowntimeStarted{ - Timestamp: 1697207050.511378, + Timestamp: Icinga2Time{time.UnixMicro(1697207050511378)}, Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -192,7 +194,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimestarted-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217507,"type":"DowntimeStarted"}`, expected: DowntimeStarted{ - Timestamp: 1697207141.217507, + Timestamp: Icinga2Time{time.UnixMicro(1697207141217507)}, Downtime: Downtime{ Host: "docker-master", Service: "http", @@ -205,7 +207,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimetriggered-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511608,"type":"DowntimeTriggered"}`, expected: DowntimeTriggered{ - Timestamp: 1697207050.511608, + Timestamp: Icinga2Time{time.UnixMicro(1697207050511608)}, Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -217,7 +219,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimetriggered-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217726,"type":"DowntimeTriggered"}`, expected: DowntimeTriggered{ - Timestamp: 1697207141.217726, + Timestamp: Icinga2Time{time.UnixMicro(1697207141217726)}, Downtime: Downtime{ Host: "docker-master", Service: "http", @@ -230,7 +232,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeremoved-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":1697207096.187718,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207096.187866,"type":"DowntimeRemoved"}`, expected: DowntimeRemoved{ - Timestamp: 1697207096.187866, + Timestamp: Icinga2Time{time.UnixMicro(1697207096187866)}, Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -242,7 +244,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeremoved-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":1697207144.746117,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207144.746333,"type":"DowntimeRemoved"}`, expected: DowntimeRemoved{ - Timestamp: 1697207144.746333, + Timestamp: Icinga2Time{time.UnixMicro(1697207144746333)}, Downtime: Downtime{ Host: "docker-master", Service: "http", @@ -275,3 +277,66 @@ func TestApiResponseUnmarshal(t *testing.T) { }) } } + +func TestIcinga2Time(t *testing.T) { + tests := []struct { + name string + jsonData string + isError bool + expected Icinga2Time + }{ + { + name: "json-empty", + jsonData: "", + isError: true, + }, + { + name: "json-invalid", + jsonData: "{", + isError: true, + }, + { + name: "json-wrong-type", + jsonData: `"AAA"`, + isError: true, + }, + { + name: "epoch-time", + jsonData: "0.0", + expected: Icinga2Time{time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + name: "example-time", + jsonData: "1697207144.746333", + expected: Icinga2Time{time.Date(2023, time.October, 13, 14, 25, 44, 746333000, time.UTC)}, + }, + { + name: "example-time-location", + jsonData: "1697207144.746333", + expected: Icinga2Time{time.Date(2023, time.October, 13, 16, 25, 44, 746333000, + time.FixedZone("Europe/Berlin summer", 2*60*60))}, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var ici2time Icinga2Time + err := json.Unmarshal([]byte(test.jsonData), &ici2time) + if (err != nil) != test.isError { + t.Errorf("unexpected error state; got error: %t, expected: %t", err != nil, test.isError) + return + } + if err != nil { + if !test.isError { + t.Error(err) + } + return + } + + if ici2time.Compare(test.expected.Time) != 0 { + t.Logf("got: %#v", ici2time) + t.Logf("expected: %#v", test.expected) + t.Error("unexpected response") + } + }) + } +} From 83933cef077e13992e2d71f93bcb4d31f9fe8741 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 16 Oct 2023 15:06:02 +0200 Subject: [PATCH 04/65] eventstream: API response types for /v1/objects In preparation for fetching potentially lost objects due to reconnects through Icinga 2's /v1/objects API, the response types and their wrappers were generated. Additionally, some other small refactorings were performed. --- internal/eventstream/api_responses.go | 163 ++++++---- internal/eventstream/api_responses_test.go | 340 +++++++++++++++------ 2 files changed, 346 insertions(+), 157 deletions(-) diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index bd33c564..e7891fd1 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -25,14 +25,17 @@ func (iciTime *Icinga2Time) UnmarshalJSON(data []byte) error { // Comment represents the Icinga 2 API Comment object. // -// NOTE: An empty Service field indicates a host comment. +// NOTE: +// - An empty Service field indicates a host comment. +// - The optional EntryType should be User = 1, Downtime = 2, Flapping = 3, Acknowledgement = 4. // // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-comment type Comment struct { - Host string `json:"host_name"` - Service string `json:"service_name,omitempty"` - Author string `json:"author"` - Text string `json:"text"` + Host string `json:"host_name"` + Service string `json:"service_name"` + Author string `json:"author"` + Text string `json:"text"` + EntryType int `json:"entry_type"` } // CheckResult represents the Icinga 2 API CheckResult object. @@ -45,25 +48,88 @@ type CheckResult struct { // Downtime represents the Icinga 2 API Downtime object. // +// NOTE: +// - An empty Service field indicates a host downtime. +// // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-downtime type Downtime struct { Host string `json:"host_name"` - Service string `json:"service_name,omitempty"` + Service string `json:"service_name"` Author string `json:"author"` Comment string `json:"comment"` } +// HostServiceRuntimeAttributes are common attributes of both Host and Service objects. +// +// When catching up potentially missed changes, the following fields are holding relevant changes which, fortunately, +// are identical for Icinga 2 Host and Service objects. +// +// According to the documentation, neither the Host nor the Service name is part of the attributes. However, next to +// being part of the wrapping API response, see ObjectQueriesResult, it is also available in the "__name" attribute, +// reflected in the Name field. For Service objects, it is "${host}!${service}". +// +// NOTE: +// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. +// - Acknowledgement type is 0 = NONE, 1 = NORMAL, 2 = STICKY. +// +// https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#host +// https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#service +type HostServiceRuntimeAttributes struct { + Name string `json:"__name"` + State int `json:"state"` + LastCheckResult CheckResult `json:"last_check_result"` + DowntimeDepth int `json:"downtime_depth"` + Acknowledgement int `json:"acknowledgement"` +} + +// ObjectQueriesResult represents the Icinga 2 API Object Queries Result wrapper object. +// +// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#object-queries-result +type ObjectQueriesResult struct { + Name string `json:"name"` + Type string `json:"type"` + Attrs any `json:"attrs"` +} + +func (objQueriesRes *ObjectQueriesResult) UnmarshalJSON(bytes []byte) error { + var responseAttrs json.RawMessage + err := json.Unmarshal(bytes, &struct { + Name *string `json:"name"` + Type *string `json:"type"` + Attrs *json.RawMessage `json:"attrs"` + }{&objQueriesRes.Name, &objQueriesRes.Type, &responseAttrs}) + if err != nil { + return err + } + + switch objQueriesRes.Type { + case "Comment": + objQueriesRes.Attrs = new(Comment) + case "Downtime": + objQueriesRes.Attrs = new(Downtime) + case "Host", "Service": + objQueriesRes.Attrs = new(HostServiceRuntimeAttributes) + default: + return fmt.Errorf("unsupported type %q", objQueriesRes.Type) + } + + return json.Unmarshal(responseAttrs, objQueriesRes.Attrs) +} + // StateChange represents the Icinga 2 API Event Stream StateChange response for host/service state changes. // -// NOTE: An empty Service field indicates a host service. +// NOTE: +// - An empty Service field indicates a host state change. +// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. +// - StateType might be 0 = SOFT, 1 = HARD. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-statechange type StateChange struct { Timestamp Icinga2Time `json:"timestamp"` Host string `json:"host"` - Service string `json:"service,omitempty"` - State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) - StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) + Service string `json:"service"` + State int `json:"state"` + StateType int `json:"state_type"` CheckResult CheckResult `json:"check_result"` DowntimeDepth int `json:"downtime_depth"` Acknowledgement bool `json:"acknowledgement"` @@ -71,30 +137,36 @@ type StateChange struct { // AcknowledgementSet represents the Icinga 2 API Event Stream AcknowledgementSet response for acknowledgements set on hosts/services. // -// NOTE: An empty Service field indicates a host acknowledgement. +// NOTE: +// - An empty Service field indicates a host acknowledgement. +// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. +// - StateType might be 0 = SOFT, 1 = HARD. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementset type AcknowledgementSet struct { Timestamp Icinga2Time `json:"timestamp"` Host string `json:"host"` - Service string `json:"service,omitempty"` - State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) - StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) + Service string `json:"service"` + State int `json:"state"` + StateType int `json:"state_type"` Author string `json:"author"` Comment string `json:"comment"` } // AcknowledgementCleared represents the Icinga 2 API Event Stream AcknowledgementCleared response for acknowledgements cleared on hosts/services. // -// NOTE: An empty Service field indicates a host acknowledgement. +// NOTE: +// - An empty Service field indicates a host acknowledgement. +// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. +// - StateType might be 0 = SOFT, 1 = HARD. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared type AcknowledgementCleared struct { Timestamp Icinga2Time `json:"timestamp"` Host string `json:"host"` - Service string `json:"service,omitempty"` - State int `json:"state"` // TODO: own type for states (OK Warning Critical Unknown Up Down) - StateType int `json:"state_type"` // TODO: own type for state types (0 = SOFT, 1 = HARD) + Service string `json:"service"` + State int `json:"state"` + StateType int `json:"state_type"` } // CommentAdded represents the Icinga 2 API Event Stream CommentAdded response for added host/service comments. @@ -146,66 +218,45 @@ type DowntimeTriggered struct { } // UnmarshalEventStreamResponse unmarshal a JSON response line from the Icinga 2 API Event Stream. -func UnmarshalEventStreamResponse(data []byte) (any, error) { +// +// The function expects an Icinga 2 API Event Stream Response in its JSON form and tries to unmarshal it into one of the +// implemented types based on its type argument. Thus, the returned any value will be a pointer to such a struct type. +func UnmarshalEventStreamResponse(bytes []byte) (any, error) { // Due to the overlapping fields of the different Event Stream response objects, a struct composition with // decompositions in different variables will result in multiple manual fixes. Thus, a two-way deserialization // was chosen which selects the target type based on the first parsed type field. var responseType string - err := json.Unmarshal(data, &struct { + err := json.Unmarshal(bytes, &struct { Type *string `json:"type"` }{&responseType}) if err != nil { return nil, err } + var resp any switch responseType { case "StateChange": - resp := StateChange{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(StateChange) case "AcknowledgementSet": - resp := AcknowledgementSet{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(AcknowledgementSet) case "AcknowledgementCleared": - resp := AcknowledgementCleared{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(AcknowledgementCleared) case "CommentAdded": - resp := CommentAdded{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(CommentAdded) case "CommentRemoved": - resp := CommentRemoved{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(CommentRemoved) case "DowntimeAdded": - resp := DowntimeAdded{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(DowntimeAdded) case "DowntimeRemoved": - resp := DowntimeRemoved{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(DowntimeRemoved) case "DowntimeStarted": - resp := DowntimeStarted{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(DowntimeStarted) case "DowntimeTriggered": - resp := DowntimeTriggered{} - err = json.Unmarshal(data, &resp) - return resp, err - + resp = new(DowntimeTriggered) default: return nil, fmt.Errorf("unsupported type %q", responseType) } + err = json.Unmarshal(bytes, resp) + return resp, err } diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 1974cffe..322cbbc4 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -7,6 +7,207 @@ import ( "time" ) +func TestIcinga2Time_UnmarshalJSON(t *testing.T) { + tests := []struct { + name string + jsonData string + isError bool + expected Icinga2Time + }{ + { + name: "json-empty", + jsonData: "", + isError: true, + }, + { + name: "json-invalid", + jsonData: "{", + isError: true, + }, + { + name: "json-wrong-type", + jsonData: `"AAA"`, + isError: true, + }, + { + name: "epoch-time", + jsonData: "0.0", + expected: Icinga2Time{time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + name: "example-time", + jsonData: "1697207144.746333", + expected: Icinga2Time{time.Date(2023, time.October, 13, 14, 25, 44, 746333000, time.UTC)}, + }, + { + name: "example-time-location", + jsonData: "1697207144.746333", + expected: Icinga2Time{time.Date(2023, time.October, 13, 16, 25, 44, 746333000, + time.FixedZone("Europe/Berlin summer", 2*60*60))}, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var ici2time Icinga2Time + err := json.Unmarshal([]byte(test.jsonData), &ici2time) + if (err != nil) != test.isError { + t.Errorf("unexpected error state; got error: %t, expected: %t; %v", err != nil, test.isError, err) + return + } else if err != nil { + return + } + + if ici2time.Compare(test.expected.Time) != 0 { + t.Logf("got: %#v", ici2time) + t.Logf("expected: %#v", test.expected) + t.Error("unexpected response") + } + }) + } +} + +func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { + tests := []struct { + name string + jsonData string + isError bool + expected any + }{ + { + name: "invalid-json", + jsonData: `{":}"`, + isError: true, + }, + { + name: "invalid-typed-json", + jsonData: `{"name": 23, "type": [], "attrs": null}`, + isError: true, + }, + { + name: "unknown-type", + jsonData: `{"type": "ihopethisstringwillneverappearinicinga2asavalidtype"}`, + isError: true, + }, + { + // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/comments' | jq -c '[.results[] | select(.attrs.service_name == "")][0]' + name: "comment-host", + jsonData: `{"attrs":{"__name":"dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80","active":true,"author":"icingaadmin","entry_time":1697454753.536457,"entry_type":1,"expire_time":0,"ha_mode":0,"host_name":"dummy-0","legacy_id":3,"name":"f1239b7d-6e13-4031-b7dd-4055fdd2cd80","original_attributes":null,"package":"_api","paused":false,"persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80.conf"},"templates":["f1239b7d-6e13-4031-b7dd-4055fdd2cd80"],"text":"foo bar","type":"Comment","version":1697454753.53647,"zone":"master"},"joins":{},"meta":{},"name":"dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80","type":"Comment"}`, + expected: ObjectQueriesResult{ + Name: "dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80", + Type: "Comment", + Attrs: &Comment{ + Host: "dummy-0", + Author: "icingaadmin", + Text: "foo bar", + EntryType: 1, + }, + }, + }, + { + // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/comments' | jq -c '[.results[] | select(.attrs.service_name != "")][0]' + name: "comment-service", + jsonData: `{"attrs":{"__name":"dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d","active":true,"author":"icingaadmin","entry_time":1697197701.307516,"entry_type":1,"expire_time":0,"ha_mode":0,"host_name":"dummy-912","legacy_id":1,"name":"1b29580d-0a09-4265-ad1f-5e16f462443d","original_attributes":null,"package":"_api","paused":false,"persistent":false,"service_name":"ping6","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d.conf"},"templates":["1b29580d-0a09-4265-ad1f-5e16f462443d"],"text":"adfadsfasdfasdf","type":"Comment","version":1697197701.307536,"zone":"master"},"joins":{},"meta":{},"name":"dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d","type":"Comment"}`, + expected: ObjectQueriesResult{ + Name: "dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d", + Type: "Comment", + Attrs: &Comment{ + Host: "dummy-912", + Service: "ping6", + Author: "icingaadmin", + Text: "adfadsfasdfasdf", + EntryType: 1, + }, + }, + }, + { + // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/downtimes' | jq -c '[.results[] | select(.attrs.service_name == "")][0]' + name: "downtime-host", + jsonData: `{"attrs":{"__name":"dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c","active":true,"author":"icingaadmin","authoritative_zone":"","comment":"turn down for what","config_owner":"","config_owner_hash":"","duration":0,"end_time":1698096240,"entry_time":1697456415.667442,"fixed":true,"ha_mode":0,"host_name":"dummy-11","legacy_id":2,"name":"af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c","original_attributes":null,"package":"_api","parent":"","paused":false,"remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c.conf"},"start_time":1697456292,"templates":["af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c"],"trigger_time":1697456415.667442,"triggered_by":"","triggers":[],"type":"Downtime","version":1697456415.667458,"was_cancelled":false,"zone":"master"},"joins":{},"meta":{},"name":"dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c","type":"Downtime"}`, + expected: ObjectQueriesResult{ + Name: "dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c", + Type: "Downtime", + Attrs: &Downtime{ + Host: "dummy-11", + Author: "icingaadmin", + Comment: "turn down for what", + }, + }, + }, + { + // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/downtimes' | jq -c '[.results[] | select(.attrs.service_name != "")][0]' + name: "downtime-service", + jsonData: `{"attrs":{"__name":"docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0","active":true,"author":"icingaadmin","authoritative_zone":"master","comment":"Scheduled downtime for backup","config_owner":"docker-master!load!backup-downtime","config_owner_hash":"ca9502dc8fa5d29c1cb2686808b5d2ccf3ea4a9c6dc3f3c09bfc54614c03c765","duration":0,"end_time":1697511600,"entry_time":1697439555.095232,"fixed":true,"ha_mode":0,"host_name":"docker-master","legacy_id":1,"name":"c27b27c2-e0ab-45ff-8b9b-e95f29851eb0","original_attributes":null,"package":"_api","parent":"","paused":false,"remove_time":0,"scheduled_by":"docker-master!load!backup-downtime","service_name":"load","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0.conf"},"start_time":1697508000,"templates":["c27b27c2-e0ab-45ff-8b9b-e95f29851eb0"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697439555.095272,"was_cancelled":false,"zone":""},"joins":{},"meta":{},"name":"docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0","type":"Downtime"}`, + expected: ObjectQueriesResult{ + Name: "docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0", + Type: "Downtime", + Attrs: &Downtime{ + Host: "docker-master", + Service: "load", + Author: "icingaadmin", + Comment: "Scheduled downtime for backup", + }, + }, + }, + { + // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/hosts' | jq -c '.results[0]' + name: "host", + jsonData: `{"attrs":{"__name":"dummy-244","acknowledgement":0,"acknowledgement_expiry":0,"acknowledgement_last_change":0,"action_url":"","active":true,"address":"127.0.0.1","address6":"::1","check_attempt":1,"check_command":"random fortune","check_interval":300,"check_period":"","check_timeout":null,"command_endpoint":"","display_name":"dummy-244","downtime_depth":0,"enable_active_checks":true,"enable_event_handler":true,"enable_flapping":false,"enable_notifications":true,"enable_passive_checks":true,"enable_perfdata":true,"event_command":"icinga-notifications-host-events","executions":null,"flapping":false,"flapping_current":0,"flapping_ignore_states":null,"flapping_last_change":0,"flapping_threshold":0,"flapping_threshold_high":30,"flapping_threshold_low":25,"force_next_check":false,"force_next_notification":false,"groups":["app-network","department-dev","env-qa","location-rome"],"ha_mode":0,"handled":false,"icon_image":"","icon_image_alt":"","last_check":1697459643.869006,"last_check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","0"],"execution_end":1697459643.868893,"execution_start":1697459643.863147,"exit_status":0,"output":"If you think last Tuesday was a drag, wait till you see what happens tomorrow!","performance_data":[],"previous_hard_state":99,"schedule_end":1697459643.869006,"schedule_start":1697459643.86287,"scheduling_source":"docker-master","state":0,"ttl":0,"type":"CheckResult","vars_after":{"attempt":1,"reachable":true,"state":0,"state_type":1},"vars_before":{"attempt":1,"reachable":true,"state":0,"state_type":1}},"last_hard_state":0,"last_hard_state_change":1697099900.637215,"last_reachable":true,"last_state":0,"last_state_change":1697099900.637215,"last_state_down":0,"last_state_type":1,"last_state_unreachable":0,"last_state_up":1697459643.868893,"max_check_attempts":3,"name":"dummy-244","next_check":1697459943.019035,"next_update":1697460243.031081,"notes":"","notes_url":"","original_attributes":null,"package":"_etc","paused":false,"previous_state_change":1697099900.637215,"problem":false,"retry_interval":60,"severity":0,"source_location":{"first_column":5,"first_line":2,"last_column":38,"last_line":2,"path":"/etc/icinga2/zones.d/master/03-dummys-hosts.conf"},"state":0,"state_type":1,"templates":["dummy-244","generic-icinga-notifications-host"],"type":"Host","vars":{"app":"network","department":"dev","env":"qa","is_dummy":true,"location":"rome"},"version":0,"volatile":false,"zone":"master"},"joins":{},"meta":{},"name":"dummy-244","type":"Host"}`, + expected: ObjectQueriesResult{ + Name: "dummy-244", + Type: "Host", + Attrs: &HostServiceRuntimeAttributes{ + Name: "dummy-244", + State: 0, + LastCheckResult: CheckResult{ + ExitStatus: 0, + Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", + }, + DowntimeDepth: 0, + Acknowledgement: 0, + }, + }, + }, + { + // $ curl -k -s -u root:icinga -d '{"filter": "service.acknowledgement != 0"}' -H 'Accept: application/json' -H 'X-HTTP-Method-Override: GET' 'https://localhost:5665/v1/objects/services' | jq -c '.results[0]' + name: "service", + jsonData: `{"attrs":{"__name":"docker-master!ssh","acknowledgement":1,"acknowledgement_expiry":0,"acknowledgement_last_change":1697460655.878141,"action_url":"","active":true,"check_attempt":1,"check_command":"ssh","check_interval":60,"check_period":"","check_timeout":null,"command_endpoint":"","display_name":"ssh","downtime_depth":0,"enable_active_checks":true,"enable_event_handler":true,"enable_flapping":false,"enable_notifications":true,"enable_passive_checks":true,"enable_perfdata":true,"event_command":"icinga-notifications-service-events","executions":null,"flapping":false,"flapping_current":0,"flapping_ignore_states":null,"flapping_last_change":0,"flapping_threshold":0,"flapping_threshold_high":30,"flapping_threshold_low":25,"force_next_check":false,"force_next_notification":false,"groups":[],"ha_mode":0,"handled":true,"host_name":"docker-master","icon_image":"","icon_image_alt":"","last_check":1697460711.134904,"last_check_result":{"active":true,"check_source":"docker-master","command":["/usr/lib/nagios/plugins/check_ssh","127.0.0.1"],"execution_end":1697460711.134875,"execution_start":1697460711.130247,"exit_status":2,"output":"connect to address 127.0.0.1 and port 22: Connection refused","performance_data":[],"previous_hard_state":99,"schedule_end":1697460711.134904,"schedule_start":1697460711.13,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":1,"reachable":true,"state":2,"state_type":1},"vars_before":{"attempt":1,"reachable":true,"state":2,"state_type":1}},"last_hard_state":2,"last_hard_state_change":1697099980.820806,"last_reachable":true,"last_state":2,"last_state_change":1697099896.120829,"last_state_critical":1697460711.134875,"last_state_ok":0,"last_state_type":1,"last_state_unknown":0,"last_state_unreachable":0,"last_state_warning":0,"max_check_attempts":5,"name":"ssh","next_check":1697460771.1299999,"next_update":1697460831.1397498,"notes":"","notes_url":"","original_attributes":null,"package":"_etc","paused":false,"previous_state_change":1697099896.120829,"problem":true,"retry_interval":30,"severity":640,"source_location":{"first_column":1,"first_line":47,"last_column":19,"last_line":47,"path":"/etc/icinga2/conf.d/services.conf"},"state":2,"state_type":1,"templates":["ssh","generic-icinga-notifications-service","generic-service"],"type":"Service","vars":null,"version":0,"volatile":false,"zone":""},"joins":{},"meta":{},"name":"docker-master!ssh","type":"Service"}`, + expected: ObjectQueriesResult{ + Name: "docker-master!ssh", + Type: "Service", + Attrs: &HostServiceRuntimeAttributes{ + Name: "docker-master!ssh", + State: 2, + LastCheckResult: CheckResult{ + ExitStatus: 2, + Output: "connect to address 127.0.0.1 and port 22: Connection refused", + }, + DowntimeDepth: 0, + Acknowledgement: 1, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var resp ObjectQueriesResult + err := json.Unmarshal([]byte(test.jsonData), &resp) + if (err != nil) != test.isError { + t.Errorf("unexpected error state; got error: %t, expected: %t; %v", err != nil, test.isError, err) + return + } else if err != nil { + return + } + + if !reflect.DeepEqual(resp, test.expected) { + t.Logf("got: %#v", resp) + t.Logf("expected: %#v", test.expected) + t.Error("unexpected response") + } + }) + } +} + func TestApiResponseUnmarshal(t *testing.T) { tests := []struct { name string @@ -27,7 +228,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "statechange-host-valid", jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697188278.202986,"execution_start":1697188278.194409,"exit_status":2,"output":"If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway","performance_data":[],"previous_hard_state":99,"schedule_end":1697188278.203036,"schedule_start":1697188278.1938322,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":true,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":true,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-158","state":1,"state_type":0,"timestamp":1697188278.203504,"type":"StateChange"}`, - expected: StateChange{ + expected: &StateChange{ Timestamp: Icinga2Time{time.UnixMicro(1697188278203504)}, Host: "dummy-158", State: 1, @@ -43,7 +244,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "statechange-service-valid", jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697184778.611465,"execution_start":1697184778.600973,"exit_status":2,"output":"You're growing out of some of your problems, but there are others that\nyou're growing into.","performance_data":[],"previous_hard_state":0,"schedule_end":1697184778.611557,"schedule_start":1697184778.6,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":false,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":false,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-280","service":"random fortune","state":2,"state_type":0,"timestamp":1697184778.612108,"type":"StateChange"}`, - expected: StateChange{ + expected: &StateChange{ Timestamp: Icinga2Time{time.UnixMicro(1697184778612108)}, Host: "dummy-280", Service: "random fortune", @@ -60,7 +261,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "acknowledgementset-host", jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"working on it","expiry":0,"host":"dummy-805","notify":true,"persistent":false,"state":1,"state_type":1,"timestamp":1697201074.579106,"type":"AcknowledgementSet"}`, - expected: AcknowledgementSet{ + expected: &AcknowledgementSet{ Timestamp: Icinga2Time{time.UnixMicro(1697201074579106)}, Host: "dummy-805", State: 1, @@ -72,7 +273,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "acknowledgementset-service", jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"will be fixed soon","expiry":0,"host":"docker-master","notify":true,"persistent":false,"service":"ssh","state":2,"state_type":1,"timestamp":1697201107.64792,"type":"AcknowledgementSet"}`, - expected: AcknowledgementSet{ + expected: &AcknowledgementSet{ Timestamp: Icinga2Time{time.UnixMicro(1697201107647920)}, Host: "docker-master", Service: "ssh", @@ -85,7 +286,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "acknowledgementcleared-host", jsonData: `{"acknowledgement_type":0,"host":"dummy-805","state":1,"state_type":1,"timestamp":1697201082.440148,"type":"AcknowledgementCleared"}`, - expected: AcknowledgementCleared{ + expected: &AcknowledgementCleared{ Timestamp: Icinga2Time{time.UnixMicro(1697201082440148)}, Host: "dummy-805", State: 1, @@ -95,7 +296,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "acknowledgementcleared-service", jsonData: `{"acknowledgement_type":0,"host":"docker-master","service":"ssh","state":2,"state_type":1,"timestamp":1697201110.220349,"type":"AcknowledgementCleared"}`, - expected: AcknowledgementCleared{ + expected: &AcknowledgementCleared{ Timestamp: Icinga2Time{time.UnixMicro(1697201110220349)}, Host: "docker-master", Service: "ssh", @@ -106,57 +307,61 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "commentadded-host", jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191791.099201,"type":"CommentAdded"}`, - expected: CommentAdded{ + expected: &CommentAdded{ Timestamp: Icinga2Time{time.UnixMicro(1697191791099201)}, Comment: Comment{ - Host: "dummy-912", - Author: "icingaadmin", - Text: "oh noes", + Host: "dummy-912", + Author: "icingaadmin", + Text: "oh noes", + EntryType: 1, }, }, }, { name: "commentadded-service", jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197990.037244,"type":"CommentAdded"}`, - expected: CommentAdded{ + expected: &CommentAdded{ Timestamp: Icinga2Time{time.UnixMicro(1697197990037244)}, Comment: Comment{ - Host: "dummy-912", - Service: "ping4", - Author: "icingaadmin", - Text: "if in doubt, check ticket #23", + Host: "dummy-912", + Service: "ping4", + Author: "icingaadmin", + Text: "if in doubt, check ticket #23", + EntryType: 1, }, }, }, { name: "commentremoved-host", jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191807.910093,"type":"CommentRemoved"}`, - expected: CommentRemoved{ + expected: &CommentRemoved{ Timestamp: Icinga2Time{time.UnixMicro(1697191807910093)}, Comment: Comment{ - Host: "dummy-912", - Author: "icingaadmin", - Text: "oh noes", + Host: "dummy-912", + Author: "icingaadmin", + Text: "oh noes", + EntryType: 1, }, }, }, { name: "commentremoved-service", jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197996.584392,"type":"CommentRemoved"}`, - expected: CommentRemoved{ + expected: &CommentRemoved{ Timestamp: Icinga2Time{time.UnixMicro(1697197996584392)}, Comment: Comment{ - Host: "dummy-912", - Service: "ping4", - Author: "icingaadmin", - Text: "if in doubt, check ticket #23", + Host: "dummy-912", + Service: "ping4", + Author: "icingaadmin", + Text: "if in doubt, check ticket #23", + EntryType: 1, }, }, }, { name: "downtimeadded-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511293,"type":"DowntimeAdded"}`, - expected: DowntimeAdded{ + expected: &DowntimeAdded{ Timestamp: Icinga2Time{time.UnixMicro(1697207050511293)}, Downtime: Downtime{ Host: "dummy-157", @@ -168,7 +373,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "downtimeadded-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217425,"type":"DowntimeAdded"}`, - expected: DowntimeAdded{ + expected: &DowntimeAdded{ Timestamp: Icinga2Time{time.UnixMicro(1697207141217425)}, Downtime: Downtime{ Host: "docker-master", @@ -181,7 +386,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "downtimestarted-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511378,"type":"DowntimeStarted"}`, - expected: DowntimeStarted{ + expected: &DowntimeStarted{ Timestamp: Icinga2Time{time.UnixMicro(1697207050511378)}, Downtime: Downtime{ Host: "dummy-157", @@ -193,7 +398,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "downtimestarted-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217507,"type":"DowntimeStarted"}`, - expected: DowntimeStarted{ + expected: &DowntimeStarted{ Timestamp: Icinga2Time{time.UnixMicro(1697207141217507)}, Downtime: Downtime{ Host: "docker-master", @@ -206,7 +411,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "downtimetriggered-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511608,"type":"DowntimeTriggered"}`, - expected: DowntimeTriggered{ + expected: &DowntimeTriggered{ Timestamp: Icinga2Time{time.UnixMicro(1697207050511608)}, Downtime: Downtime{ Host: "dummy-157", @@ -218,7 +423,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "downtimetriggered-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217726,"type":"DowntimeTriggered"}`, - expected: DowntimeTriggered{ + expected: &DowntimeTriggered{ Timestamp: Icinga2Time{time.UnixMicro(1697207141217726)}, Downtime: Downtime{ Host: "docker-master", @@ -231,7 +436,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "downtimeremoved-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":1697207096.187718,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207096.187866,"type":"DowntimeRemoved"}`, - expected: DowntimeRemoved{ + expected: &DowntimeRemoved{ Timestamp: Icinga2Time{time.UnixMicro(1697207096187866)}, Downtime: Downtime{ Host: "dummy-157", @@ -243,7 +448,7 @@ func TestApiResponseUnmarshal(t *testing.T) { { name: "downtimeremoved-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":1697207144.746117,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207144.746333,"type":"DowntimeRemoved"}`, - expected: DowntimeRemoved{ + expected: &DowntimeRemoved{ Timestamp: Icinga2Time{time.UnixMicro(1697207144746333)}, Downtime: Downtime{ Host: "docker-master", @@ -259,13 +464,9 @@ func TestApiResponseUnmarshal(t *testing.T) { t.Run(test.name, func(t *testing.T) { resp, err := UnmarshalEventStreamResponse([]byte(test.jsonData)) if (err != nil) != test.isError { - t.Errorf("unexpected error state; got error: %t, expected: %t", err != nil, test.isError) + t.Errorf("unexpected error state; got error: %t, expected: %t; %v", err != nil, test.isError, err) return - } - if err != nil { - if !test.isError { - t.Error(err) - } + } else if err != nil { return } @@ -277,66 +478,3 @@ func TestApiResponseUnmarshal(t *testing.T) { }) } } - -func TestIcinga2Time(t *testing.T) { - tests := []struct { - name string - jsonData string - isError bool - expected Icinga2Time - }{ - { - name: "json-empty", - jsonData: "", - isError: true, - }, - { - name: "json-invalid", - jsonData: "{", - isError: true, - }, - { - name: "json-wrong-type", - jsonData: `"AAA"`, - isError: true, - }, - { - name: "epoch-time", - jsonData: "0.0", - expected: Icinga2Time{time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)}, - }, - { - name: "example-time", - jsonData: "1697207144.746333", - expected: Icinga2Time{time.Date(2023, time.October, 13, 14, 25, 44, 746333000, time.UTC)}, - }, - { - name: "example-time-location", - jsonData: "1697207144.746333", - expected: Icinga2Time{time.Date(2023, time.October, 13, 16, 25, 44, 746333000, - time.FixedZone("Europe/Berlin summer", 2*60*60))}, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - var ici2time Icinga2Time - err := json.Unmarshal([]byte(test.jsonData), &ici2time) - if (err != nil) != test.isError { - t.Errorf("unexpected error state; got error: %t, expected: %t", err != nil, test.isError) - return - } - if err != nil { - if !test.isError { - t.Error(err) - } - return - } - - if ici2time.Compare(test.expected.Time) != 0 { - t.Logf("got: %#v", ici2time) - t.Logf("expected: %#v", test.expected) - t.Error("unexpected response") - } - }) - } -} From 9eefb193670ad9ffcd1ea57c6e58465d98acc3be Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 16 Oct 2023 17:16:54 +0200 Subject: [PATCH 05/65] eventstream: Initial Client So far, the Client has not much more logic as the initial testing main method. However, not it uses the previously defined types and is the initial building block for some looping type with an Event callback. --- cmd/icinga2-notification-source/main.go | 68 ++-------- internal/eventstream/client.go | 168 ++++++++++++++++++++++++ 2 files changed, 180 insertions(+), 56 deletions(-) create mode 100644 internal/eventstream/client.go diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go index 7f9a5932..b0c665f9 100644 --- a/cmd/icinga2-notification-source/main.go +++ b/cmd/icinga2-notification-source/main.go @@ -1,66 +1,22 @@ package main import ( + "context" "crypto/tls" - "encoding/json" - "io" - "log" + "fmt" + "github.com/icinga/icinga-notifications/internal/event" + "github.com/icinga/icinga-notifications/internal/eventstream" "net/http" - "os" - "strings" ) func main() { - req, err := http.NewRequest(http.MethodPost, "https://localhost:5665/v1/events", strings.NewReader(`{"queue":"icinga-notifications","types":["StateChange","AcknowledgementSet","AcknowledgementCleared"]}`)) - if err != nil { - panic(err) + client := eventstream.Client{ + ApiHost: "https://localhost:5665", + ApiBasicAuthUser: "root", + ApiBasicAuthPass: "icinga", + ApiHttpTransport: http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, + Ctx: context.Background(), + CallbackFn: func(event event.Event) { fmt.Println(event.FullString()) }, } - - req.SetBasicAuth("root", "icinga") - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Accept", "application/json") - - client := &http.Client{ - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, - }, - }, - } - - res, err := client.Do(req) - if err != nil { - panic(err) - } - - jsonR, jsonW := io.Pipe() - go func() { - _, err = io.Copy(io.MultiWriter(os.Stdout, jsonW), res.Body) - if err != nil { - panic(err) - } - }() - - dec := json.NewDecoder(jsonR) - for { - var event Icinga2Event - err := dec.Decode(&event) - if err != nil { - panic(err) - } - log.Printf("%#v", &event) - } -} - -type Icinga2Event struct { - Acknowledgement bool `json:"acknowledgement"` - CheckResult struct { - Output string `json:"output"` - } `json:"check_result"` - Host string `json:"host"` - Service string `json:"service"` - State int `json:"state"` - StateType int `json:"state_type"` - Timestamp float64 `json:"timestamp"` - Type string `json:"type"` + panic(client.ListenEventStream()) } diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go new file mode 100644 index 00000000..d9da31bc --- /dev/null +++ b/internal/eventstream/client.go @@ -0,0 +1,168 @@ +package eventstream + +import ( + "bufio" + "bytes" + "context" + "crypto/rand" + "encoding/json" + "fmt" + "github.com/icinga/icinga-notifications/internal/event" + "net/http" + "net/url" + "time" +) + +const IcingaNotificationsEventSourceId = 1 // TODO + +type Client struct { + ApiHost string + ApiBasicAuthUser string + ApiBasicAuthPass string + ApiHttpTransport http.Transport + + Ctx context.Context + + CallbackFn func(event event.Event) + + LastTimestamp time.Time +} + +func (client *Client) uniqueQueueName() string { + buff := make([]byte, 16) + _, err := rand.Read(buff) + if err != nil { + // This error SHOULD NOT happen. Otherwise, it might be wise to crash. + panic(err) + } + return fmt.Sprintf("icinga-notifications-%x", buff) +} + +func (client *Client) handleStateChange(stateChange *StateChange) error { + client.LastTimestamp = stateChange.Timestamp.Time + + var ( + eventName string + eventUrlSuffix string + eventTags map[string]string + eventSeverity event.Severity + ) + + if stateChange.Service != "" { + eventName = stateChange.Host + "!" + stateChange.Service + eventUrlSuffix = "/icingadb/service?q=" + url.QueryEscape(stateChange.Service) + "&host.name=" + url.QueryEscape(stateChange.Host) + eventTags = map[string]string{ + "host": stateChange.Host, + "service": stateChange.Service, + } + switch stateChange.State { + case 0: + eventSeverity = event.SeverityOK + case 1: + eventSeverity = event.SeverityWarning + case 2: + eventSeverity = event.SeverityCrit + default: + eventSeverity = event.SeverityErr + } + } else { + eventName = stateChange.Host + eventUrlSuffix = "/icingadb/host?name=" + url.QueryEscape(stateChange.Host) + eventTags = map[string]string{ + "host": stateChange.Host, + } + switch stateChange.State { + case 0: + eventSeverity = event.SeverityOK + case 1: + eventSeverity = event.SeverityCrit + default: + eventSeverity = event.SeverityErr + } + } + + ev := event.Event{ + Time: stateChange.Timestamp.Time, + SourceId: IcingaNotificationsEventSourceId, + Name: eventName, + URL: client.ApiHost + eventUrlSuffix, + Tags: eventTags, + ExtraTags: nil, // TODO + Type: event.TypeState, + Severity: eventSeverity, + Username: "", // TODO: a StateChange has no user per se + Message: stateChange.CheckResult.Output, + } + client.CallbackFn(ev) + + return nil +} + +func (client *Client) ListenEventStream() error { + reqBody, err := json.Marshal(map[string]any{ + "queue": client.uniqueQueueName(), + "types": []string{ + "StateChange", + // "AcknowledgementSet", + // "AcknowledgementCleared", + // "CommentAdded", + // "CommentRemoved", + // "DowntimeAdded", + // "DowntimeRemoved", + // "DowntimeStarted", + // "DowntimeTriggered", + }, + }) + if err != nil { + return err + } + + req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/events", bytes.NewReader(reqBody)) + if err != nil { + return err + } + + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + res, err := httpClient.Do(req) + if err != nil { + return err + } + + lineScanner := bufio.NewScanner(res.Body) + for lineScanner.Scan() { + rawJson := lineScanner.Bytes() + + resp, err := UnmarshalEventStreamResponse(rawJson) + if err != nil { + return err + } + + switch resp.(type) { + case *StateChange: + err = client.handleStateChange(resp.(*StateChange)) + // case *AcknowledgementSet: + // case *AcknowledgementCleared: + // case *CommentAdded: + // case *CommentRemoved: + // case *DowntimeAdded: + // case *DowntimeRemoved: + // case *DowntimeStarted: + // case *DowntimeTriggered: + default: + err = fmt.Errorf("unsupported type %T", resp) + } + if err != nil { + return err + } + } + err = lineScanner.Err() + if err != nil { + return err + } + + return nil +} From 5c63cadf526c6279abd80e4d5ed2febac094ed93 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Tue, 17 Oct 2023 10:54:52 +0200 Subject: [PATCH 06/65] eventstream: handle AcknowledgementSet Additionally, the known Event Stream Types were extracted into consts against typos and the generated URLs to Icinga Web were fixed. --- cmd/icinga2-notification-source/main.go | 1 + internal/eventstream/api_responses.go | 32 +++++--- internal/eventstream/client.go | 97 +++++++++++++++++-------- 3 files changed, 90 insertions(+), 40 deletions(-) diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go index b0c665f9..f3451e8b 100644 --- a/cmd/icinga2-notification-source/main.go +++ b/cmd/icinga2-notification-source/main.go @@ -15,6 +15,7 @@ func main() { ApiBasicAuthUser: "root", ApiBasicAuthPass: "icinga", ApiHttpTransport: http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, + IcingaWebRoot: "http://localhost/icingaweb2", Ctx: context.Background(), CallbackFn: func(event event.Event) { fmt.Println(event.FullString()) }, } diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index e7891fd1..002272b8 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -116,6 +116,20 @@ func (objQueriesRes *ObjectQueriesResult) UnmarshalJSON(bytes []byte) error { return json.Unmarshal(responseAttrs, objQueriesRes.Attrs) } +// The following constants list all implemented Icinga 2 API Event Stream Types to be used as a const instead of +// (mis)typing the name at multiple places. +const ( + typeStateChange = "StateChange" + typeAcknowledgementSet = "AcknowledgementSet" + typeAcknowledgementCleared = "AcknowledgementCleared" + typeCommentAdded = "CommentAdded" + typeCommentRemoved = "CommentRemoved" + typeDowntimeAdded = "DowntimeAdded" + typeDowntimeRemoved = "DowntimeRemoved" + typeDowntimeStarted = "DowntimeStarted" + typeDowntimeTriggered = "DowntimeTriggered" +) + // StateChange represents the Icinga 2 API Event Stream StateChange response for host/service state changes. // // NOTE: @@ -236,23 +250,23 @@ func UnmarshalEventStreamResponse(bytes []byte) (any, error) { var resp any switch responseType { - case "StateChange": + case typeStateChange: resp = new(StateChange) - case "AcknowledgementSet": + case typeAcknowledgementSet: resp = new(AcknowledgementSet) - case "AcknowledgementCleared": + case typeAcknowledgementCleared: resp = new(AcknowledgementCleared) - case "CommentAdded": + case typeCommentAdded: resp = new(CommentAdded) - case "CommentRemoved": + case typeCommentRemoved: resp = new(CommentRemoved) - case "DowntimeAdded": + case typeDowntimeAdded: resp = new(DowntimeAdded) - case "DowntimeRemoved": + case typeDowntimeRemoved: resp = new(DowntimeRemoved) - case "DowntimeStarted": + case typeDowntimeStarted: resp = new(DowntimeStarted) - case "DowntimeTriggered": + case typeDowntimeTriggered: resp = new(DowntimeTriggered) default: return nil, fmt.Errorf("unsupported type %q", responseType) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index d9da31bc..9b4b031a 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -21,6 +21,8 @@ type Client struct { ApiBasicAuthPass string ApiHttpTransport http.Transport + IcingaWebRoot string + Ctx context.Context CallbackFn func(event event.Event) @@ -28,19 +30,8 @@ type Client struct { LastTimestamp time.Time } -func (client *Client) uniqueQueueName() string { - buff := make([]byte, 16) - _, err := rand.Read(buff) - if err != nil { - // This error SHOULD NOT happen. Otherwise, it might be wise to crash. - panic(err) - } - return fmt.Sprintf("icinga-notifications-%x", buff) -} - -func (client *Client) handleStateChange(stateChange *StateChange) error { - client.LastTimestamp = stateChange.Timestamp.Time - +// eventStreamHandleStateChange acts on a received Event Stream StateChange object. +func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (event.Event, error) { var ( eventName string eventUrlSuffix string @@ -50,7 +41,7 @@ func (client *Client) handleStateChange(stateChange *StateChange) error { if stateChange.Service != "" { eventName = stateChange.Host + "!" + stateChange.Service - eventUrlSuffix = "/icingadb/service?q=" + url.QueryEscape(stateChange.Service) + "&host.name=" + url.QueryEscape(stateChange.Host) + eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(stateChange.Service) + "&host.name=" + url.PathEscape(stateChange.Host) eventTags = map[string]string{ "host": stateChange.Host, "service": stateChange.Service, @@ -67,7 +58,7 @@ func (client *Client) handleStateChange(stateChange *StateChange) error { } } else { eventName = stateChange.Host - eventUrlSuffix = "/icingadb/host?name=" + url.QueryEscape(stateChange.Host) + eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(stateChange.Host) eventTags = map[string]string{ "host": stateChange.Host, } @@ -85,32 +76,71 @@ func (client *Client) handleStateChange(stateChange *StateChange) error { Time: stateChange.Timestamp.Time, SourceId: IcingaNotificationsEventSourceId, Name: eventName, - URL: client.ApiHost + eventUrlSuffix, + URL: client.IcingaWebRoot + eventUrlSuffix, Tags: eventTags, ExtraTags: nil, // TODO Type: event.TypeState, Severity: eventSeverity, - Username: "", // TODO: a StateChange has no user per se + Username: "", // NOTE: a StateChange has no user per se Message: stateChange.CheckResult.Output, } - client.CallbackFn(ev) + return ev, nil +} - return nil +// eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. +func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *AcknowledgementSet) (event.Event, error) { + var ( + eventName string + eventUrlSuffix string + eventTags map[string]string + ) + + if ackSet.Service != "" { + eventName = ackSet.Host + "!" + ackSet.Service + eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(ackSet.Service) + "&host.name=" + url.PathEscape(ackSet.Host) + eventTags = map[string]string{ + "host": ackSet.Host, + "service": ackSet.Service, + } + } else { + eventName = ackSet.Host + eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(ackSet.Host) + eventTags = map[string]string{ + "host": ackSet.Host, + } + } + + ev := event.Event{ + Time: ackSet.Timestamp.Time, + SourceId: IcingaNotificationsEventSourceId, + Name: eventName, + URL: client.IcingaWebRoot + eventUrlSuffix, + Tags: eventTags, + ExtraTags: nil, // TODO + Type: event.TypeAcknowledgement, + Username: ackSet.Author, + Message: ackSet.Comment, + } + return ev, nil } +// ListenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. func (client *Client) ListenEventStream() error { + queueNameRndBuff := make([]byte, 16) + _, _ = rand.Read(queueNameRndBuff) + reqBody, err := json.Marshal(map[string]any{ - "queue": client.uniqueQueueName(), + "queue": fmt.Sprintf("icinga-notifications-%x", queueNameRndBuff), "types": []string{ - "StateChange", - // "AcknowledgementSet", - // "AcknowledgementCleared", - // "CommentAdded", - // "CommentRemoved", - // "DowntimeAdded", - // "DowntimeRemoved", - // "DowntimeStarted", - // "DowntimeTriggered", + typeStateChange, + typeAcknowledgementSet, + // typeAcknowledgementCleared, + // typeCommentAdded, + // typeCommentRemoved, + // typeDowntimeAdded, + // typeDowntimeRemoved, + // typeDowntimeStarted, + // typeDowntimeTriggered, }, }) if err != nil { @@ -141,10 +171,12 @@ func (client *Client) ListenEventStream() error { return err } + var ev event.Event switch resp.(type) { case *StateChange: - err = client.handleStateChange(resp.(*StateChange)) - // case *AcknowledgementSet: + ev, err = client.eventStreamHandleStateChange(resp.(*StateChange)) + case *AcknowledgementSet: + ev, err = client.eventStreamHandleAcknowledgementSet(resp.(*AcknowledgementSet)) // case *AcknowledgementCleared: // case *CommentAdded: // case *CommentRemoved: @@ -158,6 +190,9 @@ func (client *Client) ListenEventStream() error { if err != nil { return err } + + client.LastTimestamp = ev.Time + client.CallbackFn(ev) } err = lineScanner.Err() if err != nil { From 814cc21b4b6d1d352e2834a41600ed7c0d599053 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Tue, 17 Oct 2023 11:53:44 +0200 Subject: [PATCH 07/65] eventstream: Icinga Objects API support to Client The specific Client.QueryObjectApiSince method will later be used to catch up missed objects in case of a state loss. --- cmd/icinga2-notification-source/main.go | 5 +++ internal/eventstream/client.go | 55 +++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go index f3451e8b..113955cf 100644 --- a/cmd/icinga2-notification-source/main.go +++ b/cmd/icinga2-notification-source/main.go @@ -7,6 +7,7 @@ import ( "github.com/icinga/icinga-notifications/internal/event" "github.com/icinga/icinga-notifications/internal/eventstream" "net/http" + "time" ) func main() { @@ -19,5 +20,9 @@ func main() { Ctx: context.Background(), CallbackFn: func(event event.Event) { fmt.Println(event.FullString()) }, } + + fmt.Println(client.QueryObjectApiSince("host", time.Now().Add(-time.Minute))) + fmt.Println(client.QueryObjectApiSince("service", time.Now().Add(-time.Minute))) + panic(client.ListenEventStream()) } diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 9b4b031a..db4d814e 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -125,6 +125,9 @@ func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *Acknowledgemen } // ListenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. +// +// In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will +// be returned. func (client *Client) ListenEventStream() error { queueNameRndBuff := make([]byte, 16) _, _ = rand.Read(queueNameRndBuff) @@ -161,6 +164,7 @@ func (client *Client) ListenEventStream() error { if err != nil { return err } + defer res.Body.Close() lineScanner := bufio.NewScanner(res.Body) for lineScanner.Scan() { @@ -172,11 +176,11 @@ func (client *Client) ListenEventStream() error { } var ev event.Event - switch resp.(type) { + switch respT := resp.(type) { case *StateChange: - ev, err = client.eventStreamHandleStateChange(resp.(*StateChange)) + ev, err = client.eventStreamHandleStateChange(respT) case *AcknowledgementSet: - ev, err = client.eventStreamHandleAcknowledgementSet(resp.(*AcknowledgementSet)) + ev, err = client.eventStreamHandleAcknowledgementSet(respT) // case *AcknowledgementCleared: // case *CommentAdded: // case *CommentRemoved: @@ -201,3 +205,48 @@ func (client *Client) ListenEventStream() error { return nil } + +// queryObjectsApi sends a query to the Icinga 2 API /v1/objects to receive data of the given objType. +func (client *Client) queryObjectsApi(objType string, payload map[string]any) ([]ObjectQueriesResult, error) { + reqBody, err := json.Marshal(payload) + if err != nil { + return nil, err + } + + req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/objects/"+objType, bytes.NewReader(reqBody)) + if err != nil { + return nil, err + } + + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Http-Method-Override", "GET") + + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + res, err := httpClient.Do(req) + if err != nil { + return nil, err + } + defer res.Body.Close() + + var objQueriesResults []ObjectQueriesResult + err = json.NewDecoder(res.Body).Decode(&struct { + Results *[]ObjectQueriesResult `json:"results"` + }{&objQueriesResults}) + if err != nil { + return nil, err + } + + return objQueriesResults, nil +} + +// QueryObjectApiSince retrieves all objects of the given type, e.g., "host" or "service", with a state change after the +// passed time. +func (client *Client) QueryObjectApiSince(objType string, since time.Time) ([]ObjectQueriesResult, error) { + return client.queryObjectsApi( + objType+"s", + map[string]any{ + "filter": fmt.Sprintf("%s.last_state_change>%f", objType, float64(since.UnixMicro())/1_000_000.0), + }) +} From a85db4f04cd54290aa5aec1b20057b51b750b7db Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Tue, 17 Oct 2023 17:14:00 +0200 Subject: [PATCH 08/65] eventstream: reconnection and event replay logic The new Client's main loop tries to fetch potentially lost events after a connection abort. Those events will be compared by a hashed representation to ensure no duplicates will be dispatched. --- cmd/icinga2-notification-source/main.go | 22 ++- internal/eventstream/api_responses.go | 10 +- internal/eventstream/api_responses_test.go | 3 + internal/eventstream/client.go | 219 +++++++++++++++++++-- 4 files changed, 229 insertions(+), 25 deletions(-) diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go index 113955cf..b850e583 100644 --- a/cmd/icinga2-notification-source/main.go +++ b/cmd/icinga2-notification-source/main.go @@ -3,26 +3,32 @@ package main import ( "context" "crypto/tls" - "fmt" "github.com/icinga/icinga-notifications/internal/event" "github.com/icinga/icinga-notifications/internal/eventstream" + "github.com/icinga/icingadb/pkg/logging" + "go.uber.org/zap" "net/http" "time" ) func main() { + logs, err := logging.NewLogging("ici2-noma", zap.DebugLevel, logging.CONSOLE, nil, time.Second) + if err != nil { + panic(err) + } + client := eventstream.Client{ ApiHost: "https://localhost:5665", ApiBasicAuthUser: "root", ApiBasicAuthPass: "icinga", ApiHttpTransport: http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, - IcingaWebRoot: "http://localhost/icingaweb2", - Ctx: context.Background(), - CallbackFn: func(event event.Event) { fmt.Println(event.FullString()) }, - } - fmt.Println(client.QueryObjectApiSince("host", time.Now().Add(-time.Minute))) - fmt.Println(client.QueryObjectApiSince("service", time.Now().Add(-time.Minute))) + IcingaWebRoot: "http://localhost/icingaweb2", + IcingaNotificationsEventSourceId: 1, - panic(client.ListenEventStream()) + CallbackFn: func(event.Event) { /* nop */ }, + Ctx: context.Background(), + Logger: logs.GetLogger(), + } + client.Process() } diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index 002272b8..375a50f1 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -64,11 +64,13 @@ type Downtime struct { // When catching up potentially missed changes, the following fields are holding relevant changes which, fortunately, // are identical for Icinga 2 Host and Service objects. // -// According to the documentation, neither the Host nor the Service name is part of the attributes. However, next to -// being part of the wrapping API response, see ObjectQueriesResult, it is also available in the "__name" attribute, -// reflected in the Name field. For Service objects, it is "${host}!${service}". +// According to the documentation, neither the Host nor the Service name is part of the attributes for Host resp. +// Service objects. However, next to being part of the wrapping API response, see ObjectQueriesResult, it is also +// available in the "__name" attribute, reflected in the Name field. For Service objects, it is "${host}!${service}". +// Furthermore, Service objects have a required non-empty reference to their Host. // // NOTE: +// - Host is empty for Host objects; Host contains the Service's Host object name for Services. // - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. // - Acknowledgement type is 0 = NONE, 1 = NORMAL, 2 = STICKY. // @@ -76,8 +78,10 @@ type Downtime struct { // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#service type HostServiceRuntimeAttributes struct { Name string `json:"__name"` + Host string `json:"host_name,omitempty"` State int `json:"state"` LastCheckResult CheckResult `json:"last_check_result"` + LastStateChange Icinga2Time `json:"last_state_change"` DowntimeDepth int `json:"downtime_depth"` Acknowledgement int `json:"acknowledgement"` } diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 322cbbc4..716e115c 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -162,6 +162,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { ExitStatus: 0, Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", }, + LastStateChange: Icinga2Time{time.UnixMicro(1697099900637215)}, DowntimeDepth: 0, Acknowledgement: 0, }, @@ -176,11 +177,13 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Type: "Service", Attrs: &HostServiceRuntimeAttributes{ Name: "docker-master!ssh", + Host: "docker-master", State: 2, LastCheckResult: CheckResult{ ExitStatus: 2, Output: "connect to address 127.0.0.1 and port 22: Connection refused", }, + LastStateChange: Icinga2Time{time.UnixMicro(1697099896120829)}, DowntimeDepth: 0, Acknowledgement: 1, }, diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index db4d814e..0bd20a4f 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -8,26 +8,66 @@ import ( "encoding/json" "fmt" "github.com/icinga/icinga-notifications/internal/event" + "github.com/icinga/icingadb/pkg/logging" + "hash/fnv" + "math" "net/http" "net/url" + "slices" + "strings" + "sync" "time" ) -const IcingaNotificationsEventSourceId = 1 // TODO - type Client struct { ApiHost string ApiBasicAuthUser string ApiBasicAuthPass string ApiHttpTransport http.Transport - IcingaWebRoot string - - Ctx context.Context + IcingaNotificationsEventSourceId int64 + IcingaWebRoot string CallbackFn func(event event.Event) + Ctx context.Context + Logger *logging.Logger + + eventsHandlerMutex sync.RWMutex + eventsRingBuffer []uint64 + eventsRingBufferPos int + eventsLastTs time.Time +} + +// handleEvent checks and dispatches generated Events. +func (client *Client) handleEvent(ev event.Event, source string) { + h := fnv.New64a() + _ = json.NewEncoder(h).Encode(ev) + evHash := h.Sum64() + + client.Logger.Debugf("Start handling event %s as %x received from %s", ev.String(), evHash, source) + client.Logger.Debugf("%#v", ev) + + client.eventsHandlerMutex.RLock() + inCache := slices.Contains(client.eventsRingBuffer, evHash) + client.eventsHandlerMutex.RUnlock() + if inCache { + client.Logger.Warnf("Event %s is already in cache and will not be processed", ev.String()) + return + } + + client.eventsHandlerMutex.Lock() + client.eventsRingBuffer[client.eventsRingBufferPos] = evHash + client.eventsRingBufferPos = (client.eventsRingBufferPos + 1) % len(client.eventsRingBuffer) + + if ev.Time.Before(client.eventsLastTs) { + client.Logger.Warnf("Received Event %s generated before last known timestamp %v; turn back the clock", + ev.String(), client.eventsLastTs) + } + client.eventsLastTs = ev.Time + client.eventsHandlerMutex.Unlock() - LastTimestamp time.Time + client.Logger.Debugf("Forward event %s to callback function", ev.String()) + client.CallbackFn(ev) } // eventStreamHandleStateChange acts on a received Event Stream StateChange object. @@ -74,7 +114,7 @@ func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (ev ev := event.Event{ Time: stateChange.Timestamp.Time, - SourceId: IcingaNotificationsEventSourceId, + SourceId: client.IcingaNotificationsEventSourceId, Name: eventName, URL: client.IcingaWebRoot + eventUrlSuffix, Tags: eventTags, @@ -112,7 +152,7 @@ func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *Acknowledgemen ev := event.Event{ Time: ackSet.Timestamp.Time, - SourceId: IcingaNotificationsEventSourceId, + SourceId: client.IcingaNotificationsEventSourceId, Name: eventName, URL: client.IcingaWebRoot + eventUrlSuffix, Tags: eventTags, @@ -124,11 +164,11 @@ func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *Acknowledgemen return ev, nil } -// ListenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. +// listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. // // In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will // be returned. -func (client *Client) ListenEventStream() error { +func (client *Client) listenEventStream() error { queueNameRndBuff := make([]byte, 16) _, _ = rand.Read(queueNameRndBuff) @@ -195,8 +235,7 @@ func (client *Client) ListenEventStream() error { return err } - client.LastTimestamp = ev.Time - client.CallbackFn(ev) + client.handleEvent(ev, "Event Stream") } err = lineScanner.Err() if err != nil { @@ -241,12 +280,164 @@ func (client *Client) queryObjectsApi(objType string, payload map[string]any) ([ return objQueriesResults, nil } -// QueryObjectApiSince retrieves all objects of the given type, e.g., "host" or "service", with a state change after the +// queryObjectApiSince retrieves all objects of the given type, e.g., "host" or "service", with a state change after the // passed time. -func (client *Client) QueryObjectApiSince(objType string, since time.Time) ([]ObjectQueriesResult, error) { +func (client *Client) queryObjectApiSince(objType string, since time.Time) ([]ObjectQueriesResult, error) { return client.queryObjectsApi( objType+"s", map[string]any{ "filter": fmt.Sprintf("%s.last_state_change>%f", objType, float64(since.UnixMicro())/1_000_000.0), }) } + +func (client *Client) checkMissedObjects(objType string) { + client.eventsHandlerMutex.RLock() + objQueriesResults, err := client.queryObjectApiSince(objType, client.eventsLastTs.Add(-time.Minute)) + client.eventsHandlerMutex.RUnlock() + + if err != nil { + client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) + return + } + + client.Logger.Infof("Querying %ss from API resulted in %d objects", objType, len(objQueriesResults)) + + for _, objQueriesResult := range objQueriesResults { + if client.Ctx.Err() != nil { + client.Logger.Info("Stopping %s API response processing as context is finished", objType) + return + } + + attrs := objQueriesResult.Attrs.(*HostServiceRuntimeAttributes) + + var ( + eventUrlSuffix string + eventTags map[string]string + eventSeverity event.Severity + ) + + switch objQueriesResult.Type { + case "Host": + eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(attrs.Name) + eventTags = map[string]string{ + "host": attrs.Name, + } + switch attrs.State { + case 0: + eventSeverity = event.SeverityOK + case 1: + eventSeverity = event.SeverityCrit + default: + eventSeverity = event.SeverityErr + } + + case "Service": + if !strings.HasPrefix(attrs.Name, attrs.Host+"!") { + client.Logger.Errorf("Queried API Service object's name mismatches, %q is no prefix of %q", attrs.Host, attrs.Name) + continue + } + serviceName := attrs.Name[len(attrs.Host)+1:] + eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(serviceName) + "&host.name=" + url.PathEscape(attrs.Host) + eventTags = map[string]string{ + "host": attrs.Host, + "service": serviceName, + } + switch attrs.State { + case 0: + eventSeverity = event.SeverityOK + case 1: + eventSeverity = event.SeverityWarning + case 2: + eventSeverity = event.SeverityCrit + default: + eventSeverity = event.SeverityErr + } + + default: + client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) + continue + } + + ev := event.Event{ + Time: attrs.LastStateChange.Time, + SourceId: client.IcingaNotificationsEventSourceId, + Name: attrs.Name, + URL: client.IcingaWebRoot + eventUrlSuffix, + Tags: eventTags, + ExtraTags: nil, // TODO, same as Event Stream insertion + Type: event.TypeState, + Severity: eventSeverity, + Username: "", // NOTE: a StateChange has no user per se + Message: attrs.LastCheckResult.Output, + } + client.handleEvent(ev, "API "+objType) + } +} + +// reestablishApiConnection tries to access the Icinga 2 API with an exponential backoff. +// +// With 10 retries, it might block up to (2^10 - 1) * 10 / 1_000 = 10.23 seconds. +func (client *Client) reestablishApiConnection() error { + const maxRetries = 10 + + req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, client.ApiHost+"/v1/", nil) + if err != nil { + return err + } + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + + var lastErr error + for i := 0; i < maxRetries; i++ { + time.Sleep((time.Duration)(math.Exp2(float64(i))) * 10 * time.Millisecond) + + client.Logger.Debugf("Try to reestablish an API connection, %d/%d tries..", i+1, maxRetries) + + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + res, err := httpClient.Do(req) + if err != nil { + lastErr = err + client.Logger.Debugf("API probing failed: %v", lastErr) + continue + } + _ = res.Body.Close() + + if res.StatusCode != http.StatusOK { + lastErr = fmt.Errorf("expected HTTP status %d, got %d", http.StatusOK, res.StatusCode) + client.Logger.Debugf("API probing failed: %v", lastErr) + continue + } + return nil + } + return fmt.Errorf("cannot query API backend in %d tries, %w", maxRetries, lastErr) +} + +func (client *Client) Process() { + client.eventsRingBuffer = make([]uint64, 1024) + client.eventsRingBufferPos = 0 + + for { + client.Logger.Info("Start listening on Icinga 2 Event Stream..") + err := client.listenEventStream() + if err != nil { + client.Logger.Errorf("Event Stream processing failed: %v", err) + } else { + client.Logger.Warn("Event Stream closed stream; maybe Icinga 2 is reloading") + } + + for { + if client.Ctx.Err() != nil { + client.Logger.Info("Abort Icinga 2 API reconnections as context is finished") + return + } + + err := client.reestablishApiConnection() + if err == nil { + break + } + client.Logger.Errorf("Cannot reestablish an API connection: %v", err) + } + + go client.checkMissedObjects("host") + go client.checkMissedObjects("service") + } +} From a21f35fa4dc1bf66f67a26519a63e3cd3f25b25f Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 18 Oct 2023 11:10:37 +0200 Subject: [PATCH 09/65] eventstream: Host/Service Events from CheckResult Turns out, the timestamp from Event Stream StateChange objects is neither reflected in the CheckResult nor in a Host or Service's last_state_change field. I'm not quite sure where a CheckResult's timestamp originates, but now I switched to rely on the CheckResult which is identical in both the Event Stream as well as in Hosts and Services. While doing so, the duplicate Event generation logic was unified and refactored into an own method. --- internal/eventstream/api_responses.go | 8 +- internal/eventstream/api_responses_test.go | 34 +++++ internal/eventstream/client.go | 154 ++++++++------------- 3 files changed, 98 insertions(+), 98 deletions(-) diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index 375a50f1..3677b6ea 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -42,8 +42,12 @@ type Comment struct { // // https://icinga.com/docs/icinga-2/latest/doc/08-advanced-topics/#advanced-value-types-checkresult type CheckResult struct { - ExitStatus int `json:"exit_status"` - Output string `json:"output"` + ExitStatus int `json:"exit_status"` + Output string `json:"output"` + State int `json:"state"` + Command []string `json:"command"` + ExecutionStart Icinga2Time `json:"execution_start"` + ExecutionEnd Icinga2Time `json:"execution_end"` } // Downtime represents the Icinga 2 API Downtime object. diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 716e115c..6a88276e 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -161,6 +161,15 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { LastCheckResult: CheckResult{ ExitStatus: 0, Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", + State: 0, + Command: []string{ + "/bin/bash", + "-c", + "/usr/games/fortune; exit $0", + "0", + }, + ExecutionStart: Icinga2Time{time.UnixMicro(1697459643863147)}, + ExecutionEnd: Icinga2Time{time.UnixMicro(1697459643868893)}, }, LastStateChange: Icinga2Time{time.UnixMicro(1697099900637215)}, DowntimeDepth: 0, @@ -182,6 +191,13 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { LastCheckResult: CheckResult{ ExitStatus: 2, Output: "connect to address 127.0.0.1 and port 22: Connection refused", + State: 2, + Command: []string{ + "/usr/lib/nagios/plugins/check_ssh", + "127.0.0.1", + }, + ExecutionStart: Icinga2Time{time.UnixMicro(1697460711130247)}, + ExecutionEnd: Icinga2Time{time.UnixMicro(1697460711134875)}, }, LastStateChange: Icinga2Time{time.UnixMicro(1697099896120829)}, DowntimeDepth: 0, @@ -239,6 +255,15 @@ func TestApiResponseUnmarshal(t *testing.T) { CheckResult: CheckResult{ ExitStatus: 2, Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", + State: 2, + Command: []string{ + "/bin/bash", + "-c", + "/usr/games/fortune; exit $0", + "2", + }, + ExecutionStart: Icinga2Time{time.UnixMicro(1697188278194409)}, + ExecutionEnd: Icinga2Time{time.UnixMicro(1697188278202986)}, }, DowntimeDepth: 0, Acknowledgement: false, @@ -256,6 +281,15 @@ func TestApiResponseUnmarshal(t *testing.T) { CheckResult: CheckResult{ ExitStatus: 2, Output: "You're growing out of some of your problems, but there are others that\nyou're growing into.", + State: 2, + Command: []string{ + "/bin/bash", + "-c", + "/usr/games/fortune; exit $0", + "2", + }, + ExecutionStart: Icinga2Time{time.UnixMicro(1697184778600973)}, + ExecutionEnd: Icinga2Time{time.UnixMicro(1697184778611465)}, }, DowntimeDepth: 0, Acknowledgement: false, diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 0bd20a4f..4681dbd4 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -38,40 +38,8 @@ type Client struct { eventsLastTs time.Time } -// handleEvent checks and dispatches generated Events. -func (client *Client) handleEvent(ev event.Event, source string) { - h := fnv.New64a() - _ = json.NewEncoder(h).Encode(ev) - evHash := h.Sum64() - - client.Logger.Debugf("Start handling event %s as %x received from %s", ev.String(), evHash, source) - client.Logger.Debugf("%#v", ev) - - client.eventsHandlerMutex.RLock() - inCache := slices.Contains(client.eventsRingBuffer, evHash) - client.eventsHandlerMutex.RUnlock() - if inCache { - client.Logger.Warnf("Event %s is already in cache and will not be processed", ev.String()) - return - } - - client.eventsHandlerMutex.Lock() - client.eventsRingBuffer[client.eventsRingBufferPos] = evHash - client.eventsRingBufferPos = (client.eventsRingBufferPos + 1) % len(client.eventsRingBuffer) - - if ev.Time.Before(client.eventsLastTs) { - client.Logger.Warnf("Received Event %s generated before last known timestamp %v; turn back the clock", - ev.String(), client.eventsLastTs) - } - client.eventsLastTs = ev.Time - client.eventsHandlerMutex.Unlock() - - client.Logger.Debugf("Forward event %s to callback function", ev.String()) - client.CallbackFn(ev) -} - -// eventStreamHandleStateChange acts on a received Event Stream StateChange object. -func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (event.Event, error) { +// buildHostServiceEvent constructs an event.Event based on a CheckResult, a host name and an optional service name. +func (client *Client) buildHostServiceEvent(result CheckResult, hostName, serviceName string) event.Event { var ( eventName string eventUrlSuffix string @@ -79,14 +47,14 @@ func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (ev eventSeverity event.Severity ) - if stateChange.Service != "" { - eventName = stateChange.Host + "!" + stateChange.Service - eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(stateChange.Service) + "&host.name=" + url.PathEscape(stateChange.Host) + if serviceName != "" { + eventName = hostName + "!" + serviceName + eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(serviceName) + "&host.name=" + url.PathEscape(hostName) eventTags = map[string]string{ - "host": stateChange.Host, - "service": stateChange.Service, + "host": hostName, + "service": serviceName, } - switch stateChange.State { + switch result.State { case 0: eventSeverity = event.SeverityOK case 1: @@ -97,12 +65,12 @@ func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (ev eventSeverity = event.SeverityErr } } else { - eventName = stateChange.Host - eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(stateChange.Host) + eventName = hostName + eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(hostName) eventTags = map[string]string{ - "host": stateChange.Host, + "host": hostName, } - switch stateChange.State { + switch result.State { case 0: eventSeverity = event.SeverityOK case 1: @@ -112,8 +80,8 @@ func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (ev } } - ev := event.Event{ - Time: stateChange.Timestamp.Time, + return event.Event{ + Time: result.ExecutionEnd.Time, SourceId: client.IcingaNotificationsEventSourceId, Name: eventName, URL: client.IcingaWebRoot + eventUrlSuffix, @@ -122,9 +90,44 @@ func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (ev Type: event.TypeState, Severity: eventSeverity, Username: "", // NOTE: a StateChange has no user per se - Message: stateChange.CheckResult.Output, + Message: result.Output, } - return ev, nil +} + +// handleEvent checks and dispatches generated Events. +func (client *Client) handleEvent(ev event.Event, source string) { + h := fnv.New64a() + _ = json.NewEncoder(h).Encode(ev) + evHash := h.Sum64() + + client.Logger.Debugf("Start handling event %s as %x received from %s", ev.String(), evHash, source) + + client.eventsHandlerMutex.RLock() + inCache := slices.Contains(client.eventsRingBuffer, evHash) + client.eventsHandlerMutex.RUnlock() + if inCache { + client.Logger.Warnf("Event %s is already in cache and will not be processed", ev.String()) + return + } + + client.eventsHandlerMutex.Lock() + client.eventsRingBuffer[client.eventsRingBufferPos] = evHash + client.eventsRingBufferPos = (client.eventsRingBufferPos + 1) % len(client.eventsRingBuffer) + + if ev.Time.Before(client.eventsLastTs) { + client.Logger.Warnf("Received Event %s generated before last known timestamp %v; turn back the clock", + ev.String(), client.eventsLastTs) + } + client.eventsLastTs = ev.Time + client.eventsHandlerMutex.Unlock() + + client.Logger.Debugf("Forward event %s to callback function", ev.String()) + client.CallbackFn(ev) +} + +// eventStreamHandleStateChange acts on a received Event Stream StateChange object. +func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (event.Event, error) { + return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.Host, stateChange.Service), nil } // eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. @@ -204,7 +207,7 @@ func (client *Client) listenEventStream() error { if err != nil { return err } - defer res.Body.Close() + defer func() { _ = res.Body.Close() }() lineScanner := bufio.NewScanner(res.Body) for lineScanner.Scan() { @@ -267,7 +270,7 @@ func (client *Client) queryObjectsApi(objType string, payload map[string]any) ([ if err != nil { return nil, err } - defer res.Body.Close() + defer func() { _ = res.Body.Close() }() var objQueriesResults []ObjectQueriesResult err = json.NewDecoder(res.Body).Decode(&struct { @@ -310,66 +313,25 @@ func (client *Client) checkMissedObjects(objType string) { attrs := objQueriesResult.Attrs.(*HostServiceRuntimeAttributes) - var ( - eventUrlSuffix string - eventTags map[string]string - eventSeverity event.Severity - ) - + var hostName, serviceName string switch objQueriesResult.Type { case "Host": - eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(attrs.Name) - eventTags = map[string]string{ - "host": attrs.Name, - } - switch attrs.State { - case 0: - eventSeverity = event.SeverityOK - case 1: - eventSeverity = event.SeverityCrit - default: - eventSeverity = event.SeverityErr - } + hostName = attrs.Name case "Service": if !strings.HasPrefix(attrs.Name, attrs.Host+"!") { client.Logger.Errorf("Queried API Service object's name mismatches, %q is no prefix of %q", attrs.Host, attrs.Name) continue } - serviceName := attrs.Name[len(attrs.Host)+1:] - eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(serviceName) + "&host.name=" + url.PathEscape(attrs.Host) - eventTags = map[string]string{ - "host": attrs.Host, - "service": serviceName, - } - switch attrs.State { - case 0: - eventSeverity = event.SeverityOK - case 1: - eventSeverity = event.SeverityWarning - case 2: - eventSeverity = event.SeverityCrit - default: - eventSeverity = event.SeverityErr - } + hostName = attrs.Host + serviceName = attrs.Name[len(attrs.Host)+1:] default: client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) continue } - ev := event.Event{ - Time: attrs.LastStateChange.Time, - SourceId: client.IcingaNotificationsEventSourceId, - Name: attrs.Name, - URL: client.IcingaWebRoot + eventUrlSuffix, - Tags: eventTags, - ExtraTags: nil, // TODO, same as Event Stream insertion - Type: event.TypeState, - Severity: eventSeverity, - Username: "", // NOTE: a StateChange has no user per se - Message: attrs.LastCheckResult.Output, - } + ev := client.buildHostServiceEvent(attrs.LastCheckResult, hostName, serviceName) client.handleEvent(ev, "API "+objType) } } From aae7f875cb000dfdef941a9761a41e1ce9f3ef8e Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 18 Oct 2023 11:59:30 +0200 Subject: [PATCH 10/65] eventstream: fix Severity, improve logging and doc --- cmd/icinga2-notification-source/main.go | 9 +++- internal/eventstream/client.go | 58 +++++++++++++++++-------- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go index b850e583..78b43c12 100644 --- a/cmd/icinga2-notification-source/main.go +++ b/cmd/icinga2-notification-source/main.go @@ -8,11 +8,16 @@ import ( "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" "net/http" + "os" + "os/signal" "time" ) func main() { - logs, err := logging.NewLogging("ici2-noma", zap.DebugLevel, logging.CONSOLE, nil, time.Second) + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) + defer cancel() + + logs, err := logging.NewLogging("ici2-noma", zap.InfoLevel, logging.CONSOLE, nil, time.Second) if err != nil { panic(err) } @@ -27,7 +32,7 @@ func main() { IcingaNotificationsEventSourceId: 1, CallbackFn: func(event.Event) { /* nop */ }, - Ctx: context.Background(), + Ctx: ctx, Logger: logs.GetLogger(), } client.Process() diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 4681dbd4..9adb7f03 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -19,27 +19,37 @@ import ( "time" ) +// Client for the Icinga 2 Event Stream API with extended support for other Icinga 2 APIs to gather additional +// information and allow a replay in case of a connection loss. type Client struct { + // ApiHost et al. configure where and how the Icinga 2 API can be reached. ApiHost string ApiBasicAuthUser string ApiBasicAuthPass string ApiHttpTransport http.Transport + // IcingaNotificationsEventSourceId to be reflected in generated event.Events. IcingaNotificationsEventSourceId int64 - IcingaWebRoot string + // IcingaWebRoot points to the Icinga Web 2 endpoint for generated URLs. + IcingaWebRoot string + // CallbackFn receives generated event.Events. CallbackFn func(event event.Event) - Ctx context.Context - Logger *logging.Logger + // Ctx for all web requests as well as internal wait loops. + Ctx context.Context + // Logger to log to. + Logger *logging.Logger + // All those variables are used internally to keep at least some state. eventsHandlerMutex sync.RWMutex eventsRingBuffer []uint64 eventsRingBufferPos int eventsLastTs time.Time } -// buildHostServiceEvent constructs an event.Event based on a CheckResult, a host name and an optional service name. -func (client *Client) buildHostServiceEvent(result CheckResult, hostName, serviceName string) event.Event { +// buildHostServiceEvent constructs an event.Event based on a CheckResult, a Host or Service state, a Host name and an +// optional Service name if the Event should represent a Service object. +func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostName, serviceName string) event.Event { var ( eventName string eventUrlSuffix string @@ -54,7 +64,7 @@ func (client *Client) buildHostServiceEvent(result CheckResult, hostName, servic "host": hostName, "service": serviceName, } - switch result.State { + switch state { case 0: eventSeverity = event.SeverityOK case 1: @@ -70,7 +80,7 @@ func (client *Client) buildHostServiceEvent(result CheckResult, hostName, servic eventTags = map[string]string{ "host": hostName, } - switch result.State { + switch state { case 0: eventSeverity = event.SeverityOK case 1: @@ -100,13 +110,13 @@ func (client *Client) handleEvent(ev event.Event, source string) { _ = json.NewEncoder(h).Encode(ev) evHash := h.Sum64() - client.Logger.Debugf("Start handling event %s as %x received from %s", ev.String(), evHash, source) + client.Logger.Debugf("Start handling event %s received from %s", ev.String(), source) client.eventsHandlerMutex.RLock() inCache := slices.Contains(client.eventsRingBuffer, evHash) client.eventsHandlerMutex.RUnlock() if inCache { - client.Logger.Warnf("Event %s is already in cache and will not be processed", ev.String()) + client.Logger.Warnf("Event %s received from %s is already in cache and will not be processed", ev.String(), source) return } @@ -115,19 +125,18 @@ func (client *Client) handleEvent(ev event.Event, source string) { client.eventsRingBufferPos = (client.eventsRingBufferPos + 1) % len(client.eventsRingBuffer) if ev.Time.Before(client.eventsLastTs) { - client.Logger.Warnf("Received Event %s generated before last known timestamp %v; turn back the clock", - ev.String(), client.eventsLastTs) + client.Logger.Infof("Event %s received from %s generated at %v before last known timestamp %v; might be a replay", + ev.String(), source, ev.Time, client.eventsLastTs) } client.eventsLastTs = ev.Time client.eventsHandlerMutex.Unlock() - client.Logger.Debugf("Forward event %s to callback function", ev.String()) client.CallbackFn(ev) } // eventStreamHandleStateChange acts on a received Event Stream StateChange object. func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (event.Event, error) { - return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.Host, stateChange.Service), nil + return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.State, stateChange.Host, stateChange.Service), nil } // eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. @@ -293,17 +302,22 @@ func (client *Client) queryObjectApiSince(objType string, since time.Time) ([]Ob }) } +// checkMissedObjects fetches all objects of the requested objType (host or service) from the API and sends those to the +// handleEvent method to be eventually dispatched to the callback. func (client *Client) checkMissedObjects(objType string) { client.eventsHandlerMutex.RLock() - objQueriesResults, err := client.queryObjectApiSince(objType, client.eventsLastTs.Add(-time.Minute)) + objQueriesResults, err := client.queryObjectApiSince(objType, client.eventsLastTs) client.eventsHandlerMutex.RUnlock() if err != nil { client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) return } + if len(objQueriesResults) == 0 { + return + } - client.Logger.Infof("Querying %ss from API resulted in %d objects", objType, len(objQueriesResults)) + client.Logger.Infof("Querying %ss from API resulted in %d objects to replay", objType, len(objQueriesResults)) for _, objQueriesResult := range objQueriesResults { if client.Ctx.Err() != nil { @@ -324,14 +338,14 @@ func (client *Client) checkMissedObjects(objType string) { continue } hostName = attrs.Host - serviceName = attrs.Name[len(attrs.Host)+1:] + serviceName = attrs.Name[len(attrs.Host+"!"):] default: client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) continue } - ev := client.buildHostServiceEvent(attrs.LastCheckResult, hostName, serviceName) + ev := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, hostName, serviceName) client.handleEvent(ev, "API "+objType) } } @@ -350,6 +364,9 @@ func (client *Client) reestablishApiConnection() error { var lastErr error for i := 0; i < maxRetries; i++ { + if client.Ctx.Err() != nil { + return client.Ctx.Err() + } time.Sleep((time.Duration)(math.Exp2(float64(i))) * 10 * time.Millisecond) client.Logger.Debugf("Try to reestablish an API connection, %d/%d tries..", i+1, maxRetries) @@ -373,10 +390,17 @@ func (client *Client) reestablishApiConnection() error { return fmt.Errorf("cannot query API backend in %d tries, %w", maxRetries, lastErr) } +// Process incoming objects and reconnect to the Event Stream with replaying objects if necessary. +// +// This method blocks as long as the Client runs, which, unless its context is cancelled, is forever. While its internal +// loop takes care of reconnections, all those events will be logged while generated Events will be dispatched to the +// callback function. func (client *Client) Process() { client.eventsRingBuffer = make([]uint64, 1024) client.eventsRingBufferPos = 0 + defer client.Logger.Info("Event Stream Client has stopped") + for { client.Logger.Info("Start listening on Icinga 2 Event Stream..") err := client.listenEventStream() From e9d19ca3c15035c31bc1ca1cc08f841a9f44f82b Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 18 Oct 2023 13:53:58 +0200 Subject: [PATCH 11/65] eventstream: split client.go, refactor queryObjectsApi* As the eventstream.Client code grew, the client.go file became too long for my liking. Thus, I split it into main/common, Event Stream, and other Icinga 2 API parts. Furthermore, preparing future changes, I refactored the queryObjectsApi* functions to allow direct object access without a "slow" filter. --- internal/eventstream/client.go | 264 +---------------------------- internal/eventstream/client_api.go | 164 ++++++++++++++++++ internal/eventstream/client_es.go | 132 +++++++++++++++ 3 files changed, 298 insertions(+), 262 deletions(-) create mode 100644 internal/eventstream/client_api.go create mode 100644 internal/eventstream/client_es.go diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 9adb7f03..1e7e2a06 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -1,24 +1,20 @@ package eventstream import ( - "bufio" - "bytes" "context" - "crypto/rand" "encoding/json" - "fmt" "github.com/icinga/icinga-notifications/internal/event" "github.com/icinga/icingadb/pkg/logging" "hash/fnv" - "math" "net/http" "net/url" "slices" - "strings" "sync" "time" ) +// This file contains the main resp. common methods for the Client. + // Client for the Icinga 2 Event Stream API with extended support for other Icinga 2 APIs to gather additional // information and allow a replay in case of a connection loss. type Client struct { @@ -134,262 +130,6 @@ func (client *Client) handleEvent(ev event.Event, source string) { client.CallbackFn(ev) } -// eventStreamHandleStateChange acts on a received Event Stream StateChange object. -func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (event.Event, error) { - return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.State, stateChange.Host, stateChange.Service), nil -} - -// eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. -func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *AcknowledgementSet) (event.Event, error) { - var ( - eventName string - eventUrlSuffix string - eventTags map[string]string - ) - - if ackSet.Service != "" { - eventName = ackSet.Host + "!" + ackSet.Service - eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(ackSet.Service) + "&host.name=" + url.PathEscape(ackSet.Host) - eventTags = map[string]string{ - "host": ackSet.Host, - "service": ackSet.Service, - } - } else { - eventName = ackSet.Host - eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(ackSet.Host) - eventTags = map[string]string{ - "host": ackSet.Host, - } - } - - ev := event.Event{ - Time: ackSet.Timestamp.Time, - SourceId: client.IcingaNotificationsEventSourceId, - Name: eventName, - URL: client.IcingaWebRoot + eventUrlSuffix, - Tags: eventTags, - ExtraTags: nil, // TODO - Type: event.TypeAcknowledgement, - Username: ackSet.Author, - Message: ackSet.Comment, - } - return ev, nil -} - -// listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. -// -// In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will -// be returned. -func (client *Client) listenEventStream() error { - queueNameRndBuff := make([]byte, 16) - _, _ = rand.Read(queueNameRndBuff) - - reqBody, err := json.Marshal(map[string]any{ - "queue": fmt.Sprintf("icinga-notifications-%x", queueNameRndBuff), - "types": []string{ - typeStateChange, - typeAcknowledgementSet, - // typeAcknowledgementCleared, - // typeCommentAdded, - // typeCommentRemoved, - // typeDowntimeAdded, - // typeDowntimeRemoved, - // typeDowntimeStarted, - // typeDowntimeTriggered, - }, - }) - if err != nil { - return err - } - - req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/events", bytes.NewReader(reqBody)) - if err != nil { - return err - } - - req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - req.Header.Set("Accept", "application/json") - req.Header.Set("Content-Type", "application/json") - - httpClient := &http.Client{Transport: &client.ApiHttpTransport} - res, err := httpClient.Do(req) - if err != nil { - return err - } - defer func() { _ = res.Body.Close() }() - - lineScanner := bufio.NewScanner(res.Body) - for lineScanner.Scan() { - rawJson := lineScanner.Bytes() - - resp, err := UnmarshalEventStreamResponse(rawJson) - if err != nil { - return err - } - - var ev event.Event - switch respT := resp.(type) { - case *StateChange: - ev, err = client.eventStreamHandleStateChange(respT) - case *AcknowledgementSet: - ev, err = client.eventStreamHandleAcknowledgementSet(respT) - // case *AcknowledgementCleared: - // case *CommentAdded: - // case *CommentRemoved: - // case *DowntimeAdded: - // case *DowntimeRemoved: - // case *DowntimeStarted: - // case *DowntimeTriggered: - default: - err = fmt.Errorf("unsupported type %T", resp) - } - if err != nil { - return err - } - - client.handleEvent(ev, "Event Stream") - } - err = lineScanner.Err() - if err != nil { - return err - } - - return nil -} - -// queryObjectsApi sends a query to the Icinga 2 API /v1/objects to receive data of the given objType. -func (client *Client) queryObjectsApi(objType string, payload map[string]any) ([]ObjectQueriesResult, error) { - reqBody, err := json.Marshal(payload) - if err != nil { - return nil, err - } - - req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/objects/"+objType, bytes.NewReader(reqBody)) - if err != nil { - return nil, err - } - - req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - req.Header.Set("Accept", "application/json") - req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Http-Method-Override", "GET") - - httpClient := &http.Client{Transport: &client.ApiHttpTransport} - res, err := httpClient.Do(req) - if err != nil { - return nil, err - } - defer func() { _ = res.Body.Close() }() - - var objQueriesResults []ObjectQueriesResult - err = json.NewDecoder(res.Body).Decode(&struct { - Results *[]ObjectQueriesResult `json:"results"` - }{&objQueriesResults}) - if err != nil { - return nil, err - } - - return objQueriesResults, nil -} - -// queryObjectApiSince retrieves all objects of the given type, e.g., "host" or "service", with a state change after the -// passed time. -func (client *Client) queryObjectApiSince(objType string, since time.Time) ([]ObjectQueriesResult, error) { - return client.queryObjectsApi( - objType+"s", - map[string]any{ - "filter": fmt.Sprintf("%s.last_state_change>%f", objType, float64(since.UnixMicro())/1_000_000.0), - }) -} - -// checkMissedObjects fetches all objects of the requested objType (host or service) from the API and sends those to the -// handleEvent method to be eventually dispatched to the callback. -func (client *Client) checkMissedObjects(objType string) { - client.eventsHandlerMutex.RLock() - objQueriesResults, err := client.queryObjectApiSince(objType, client.eventsLastTs) - client.eventsHandlerMutex.RUnlock() - - if err != nil { - client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) - return - } - if len(objQueriesResults) == 0 { - return - } - - client.Logger.Infof("Querying %ss from API resulted in %d objects to replay", objType, len(objQueriesResults)) - - for _, objQueriesResult := range objQueriesResults { - if client.Ctx.Err() != nil { - client.Logger.Info("Stopping %s API response processing as context is finished", objType) - return - } - - attrs := objQueriesResult.Attrs.(*HostServiceRuntimeAttributes) - - var hostName, serviceName string - switch objQueriesResult.Type { - case "Host": - hostName = attrs.Name - - case "Service": - if !strings.HasPrefix(attrs.Name, attrs.Host+"!") { - client.Logger.Errorf("Queried API Service object's name mismatches, %q is no prefix of %q", attrs.Host, attrs.Name) - continue - } - hostName = attrs.Host - serviceName = attrs.Name[len(attrs.Host+"!"):] - - default: - client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) - continue - } - - ev := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, hostName, serviceName) - client.handleEvent(ev, "API "+objType) - } -} - -// reestablishApiConnection tries to access the Icinga 2 API with an exponential backoff. -// -// With 10 retries, it might block up to (2^10 - 1) * 10 / 1_000 = 10.23 seconds. -func (client *Client) reestablishApiConnection() error { - const maxRetries = 10 - - req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, client.ApiHost+"/v1/", nil) - if err != nil { - return err - } - req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - - var lastErr error - for i := 0; i < maxRetries; i++ { - if client.Ctx.Err() != nil { - return client.Ctx.Err() - } - time.Sleep((time.Duration)(math.Exp2(float64(i))) * 10 * time.Millisecond) - - client.Logger.Debugf("Try to reestablish an API connection, %d/%d tries..", i+1, maxRetries) - - httpClient := &http.Client{Transport: &client.ApiHttpTransport} - res, err := httpClient.Do(req) - if err != nil { - lastErr = err - client.Logger.Debugf("API probing failed: %v", lastErr) - continue - } - _ = res.Body.Close() - - if res.StatusCode != http.StatusOK { - lastErr = fmt.Errorf("expected HTTP status %d, got %d", http.StatusOK, res.StatusCode) - client.Logger.Debugf("API probing failed: %v", lastErr) - continue - } - return nil - } - return fmt.Errorf("cannot query API backend in %d tries, %w", maxRetries, lastErr) -} - // Process incoming objects and reconnect to the Event Stream with replaying objects if necessary. // // This method blocks as long as the Client runs, which, unless its context is cancelled, is forever. While its internal diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go new file mode 100644 index 00000000..61ff0cd4 --- /dev/null +++ b/internal/eventstream/client_api.go @@ -0,0 +1,164 @@ +package eventstream + +import ( + "bytes" + "encoding/json" + "fmt" + "math" + "net/http" + "strings" + "time" +) + +// This method contains Icinga 2 API related methods which are not directly related to the Event Stream. + +// queryObjectsApi takes a Request, executes it and hopefully returns an array of . +func (client *Client) queryObjectsApi(req *http.Request) ([]ObjectQueriesResult, error) { + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + res, err := httpClient.Do(req) + if err != nil { + return nil, err + } + defer func() { _ = res.Body.Close() }() + + var objQueriesResults []ObjectQueriesResult + err = json.NewDecoder(res.Body).Decode(&struct { + Results *[]ObjectQueriesResult `json:"results"` + }{&objQueriesResults}) + if err != nil { + return nil, err + } + + return objQueriesResults, nil +} + +// queryObjectsApiDirect performs a direct resp. "fast" API query against a specific object identified by its name. +func (client *Client) queryObjectsApiDirect(objType, objName string) ([]ObjectQueriesResult, error) { + req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, client.ApiHost+"/v1/objects/"+objType+"s/"+objName, nil) + if err != nil { + return nil, err + } + + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + req.Header.Set("Accept", "application/json") + + return client.queryObjectsApi(req) +} + +// queryObjectsApiQuery sends a query to the Icinga 2 API /v1/objects to receive data of the given objType. +func (client *Client) queryObjectsApiQuery(objType string, payload map[string]any) ([]ObjectQueriesResult, error) { + reqBody, err := json.Marshal(payload) + if err != nil { + return nil, err + } + + req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/objects/"+objType+"s", bytes.NewReader(reqBody)) + if err != nil { + return nil, err + } + + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Http-Method-Override", "GET") + + return client.queryObjectsApi(req) +} + +// queryObjectApiSince retrieves all objects of the given type, e.g., "host" or "service", with a state change after the +// passed time. +func (client *Client) queryObjectApiSince(objType string, since time.Time) ([]ObjectQueriesResult, error) { + return client.queryObjectsApiQuery( + objType, + map[string]any{ + "filter": fmt.Sprintf("%s.last_state_change>%f", objType, float64(since.UnixMicro())/1_000_000.0), + }) +} + +// checkMissedObjects fetches all objects of the requested objType (host or service) from the API and sends those to the +// handleEvent method to be eventually dispatched to the callback. +func (client *Client) checkMissedObjects(objType string) { + client.eventsHandlerMutex.RLock() + objQueriesResults, err := client.queryObjectApiSince(objType, client.eventsLastTs) + client.eventsHandlerMutex.RUnlock() + + if err != nil { + client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) + return + } + if len(objQueriesResults) == 0 { + return + } + + client.Logger.Infof("Querying %ss from API resulted in %d objects to replay", objType, len(objQueriesResults)) + + for _, objQueriesResult := range objQueriesResults { + if client.Ctx.Err() != nil { + client.Logger.Info("Stopping %s API response processing as context is finished", objType) + return + } + + attrs := objQueriesResult.Attrs.(*HostServiceRuntimeAttributes) + + var hostName, serviceName string + switch objQueriesResult.Type { + case "Host": + hostName = attrs.Name + + case "Service": + if !strings.HasPrefix(attrs.Name, attrs.Host+"!") { + client.Logger.Errorf("Queried API Service object's name mismatches, %q is no prefix of %q", attrs.Host, attrs.Name) + continue + } + hostName = attrs.Host + serviceName = attrs.Name[len(attrs.Host+"!"):] + + default: + client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) + continue + } + + ev := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, hostName, serviceName) + client.handleEvent(ev, "API "+objType) + } +} + +// reestablishApiConnection tries to access the Icinga 2 API with an exponential backoff. +// +// With 10 retries, it might block up to (2^10 - 1) * 10 / 1_000 = 10.23 seconds. +func (client *Client) reestablishApiConnection() error { + const maxRetries = 10 + + req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, client.ApiHost+"/v1/", nil) + if err != nil { + return err + } + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + + var lastErr error + for i := 0; i < maxRetries; i++ { + if client.Ctx.Err() != nil { + return client.Ctx.Err() + } + time.Sleep((time.Duration)(math.Exp2(float64(i))) * 10 * time.Millisecond) + + client.Logger.Debugf("Try to reestablish an API connection, %d/%d tries..", i+1, maxRetries) + + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + res, err := httpClient.Do(req) + if err != nil { + lastErr = err + client.Logger.Debugf("API probing failed: %v", lastErr) + continue + } + _ = res.Body.Close() + + if res.StatusCode != http.StatusOK { + lastErr = fmt.Errorf("expected HTTP status %d, got %d", http.StatusOK, res.StatusCode) + client.Logger.Debugf("API probing failed: %v", lastErr) + continue + } + return nil + } + return fmt.Errorf("cannot query API backend in %d tries, %w", maxRetries, lastErr) +} diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go new file mode 100644 index 00000000..7e1fdea1 --- /dev/null +++ b/internal/eventstream/client_es.go @@ -0,0 +1,132 @@ +package eventstream + +import ( + "bufio" + "bytes" + "crypto/rand" + "encoding/json" + "fmt" + "github.com/icinga/icinga-notifications/internal/event" + "net/http" + "net/url" +) + +// This file contains Event Stream related methods of the Client. + +// eventStreamHandleStateChange acts on a received Event Stream StateChange object. +func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (event.Event, error) { + return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.State, stateChange.Host, stateChange.Service), nil +} + +// eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. +func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *AcknowledgementSet) (event.Event, error) { + var ( + eventName string + eventUrlSuffix string + eventTags map[string]string + ) + + if ackSet.Service != "" { + eventName = ackSet.Host + "!" + ackSet.Service + eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(ackSet.Service) + "&host.name=" + url.PathEscape(ackSet.Host) + eventTags = map[string]string{ + "host": ackSet.Host, + "service": ackSet.Service, + } + } else { + eventName = ackSet.Host + eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(ackSet.Host) + eventTags = map[string]string{ + "host": ackSet.Host, + } + } + + ev := event.Event{ + Time: ackSet.Timestamp.Time, + SourceId: client.IcingaNotificationsEventSourceId, + Name: eventName, + URL: client.IcingaWebRoot + eventUrlSuffix, + Tags: eventTags, + ExtraTags: nil, // TODO + Type: event.TypeAcknowledgement, + Username: ackSet.Author, + Message: ackSet.Comment, + } + return ev, nil +} + +// listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. +// +// In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will +// be returned. +func (client *Client) listenEventStream() error { + queueNameRndBuff := make([]byte, 16) + _, _ = rand.Read(queueNameRndBuff) + + reqBody, err := json.Marshal(map[string]any{ + "queue": fmt.Sprintf("icinga-notifications-%x", queueNameRndBuff), + "types": []string{ + typeStateChange, + typeAcknowledgementSet, + // typeAcknowledgementCleared, + // typeCommentAdded, + // typeCommentRemoved, + // typeDowntimeAdded, + // typeDowntimeRemoved, + // typeDowntimeStarted, + // typeDowntimeTriggered, + }, + }) + if err != nil { + return err + } + + req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/events", bytes.NewReader(reqBody)) + if err != nil { + return err + } + + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + res, err := httpClient.Do(req) + if err != nil { + return err + } + defer func() { _ = res.Body.Close() }() + + lineScanner := bufio.NewScanner(res.Body) + for lineScanner.Scan() { + rawJson := lineScanner.Bytes() + + resp, err := UnmarshalEventStreamResponse(rawJson) + if err != nil { + return err + } + + var ev event.Event + switch respT := resp.(type) { + case *StateChange: + ev, err = client.eventStreamHandleStateChange(respT) + case *AcknowledgementSet: + ev, err = client.eventStreamHandleAcknowledgementSet(respT) + // case *AcknowledgementCleared: + // case *CommentAdded: + // case *CommentRemoved: + // case *DowntimeAdded: + // case *DowntimeRemoved: + // case *DowntimeStarted: + // case *DowntimeTriggered: + default: + err = fmt.Errorf("unsupported type %T", resp) + } + if err != nil { + return err + } + + client.handleEvent(ev, "Event Stream") + } + return lineScanner.Err() +} From fca785c6bd3a01ece70b497bfcfc98e8ed27f9f9 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 18 Oct 2023 14:34:03 +0200 Subject: [PATCH 12/65] eventstream: fetch Host/Service Groups from API As neither the Host nor the Service Group is part of the Event Stream return object, those needed to be queried separately. This first version fetches them directly through the "fast" API. For future versions, however, some kind of caching should be considered. --- cmd/icinga2-notification-source/main.go | 4 +- internal/eventstream/api_responses.go | 1 + internal/eventstream/api_responses_test.go | 12 +++--- internal/eventstream/client.go | 41 ++++++++++++++---- internal/eventstream/client_api.go | 50 ++++++++++++++++------ internal/eventstream/client_es.go | 13 +++--- 6 files changed, 86 insertions(+), 35 deletions(-) diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go index 78b43c12..fe0d6a68 100644 --- a/cmd/icinga2-notification-source/main.go +++ b/cmd/icinga2-notification-source/main.go @@ -31,9 +31,9 @@ func main() { IcingaWebRoot: "http://localhost/icingaweb2", IcingaNotificationsEventSourceId: 1, - CallbackFn: func(event.Event) { /* nop */ }, + CallbackFn: func(ev *event.Event) { logs.GetLogger().Debugf("%#v", ev) }, Ctx: ctx, - Logger: logs.GetLogger(), + Logger: logs.GetChildLogger("ESClient"), } client.Process() } diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index 3677b6ea..79013b09 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -83,6 +83,7 @@ type Downtime struct { type HostServiceRuntimeAttributes struct { Name string `json:"__name"` Host string `json:"host_name,omitempty"` + Groups []string `json:"groups"` State int `json:"state"` LastCheckResult CheckResult `json:"last_check_result"` LastStateChange Icinga2Time `json:"last_state_change"` diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 6a88276e..6d2a08cd 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -156,8 +156,9 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "dummy-244", Type: "Host", Attrs: &HostServiceRuntimeAttributes{ - Name: "dummy-244", - State: 0, + Name: "dummy-244", + Groups: []string{"app-network", "department-dev", "env-qa", "location-rome"}, + State: 0, LastCheckResult: CheckResult{ ExitStatus: 0, Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", @@ -185,9 +186,10 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "docker-master!ssh", Type: "Service", Attrs: &HostServiceRuntimeAttributes{ - Name: "docker-master!ssh", - Host: "docker-master", - State: 2, + Name: "docker-master!ssh", + Host: "docker-master", + Groups: []string{}, + State: 2, LastCheckResult: CheckResult{ ExitStatus: 2, Output: "connect to address 127.0.0.1 and port 22: Connection refused", diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 1e7e2a06..39a0be86 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -30,7 +30,7 @@ type Client struct { IcingaWebRoot string // CallbackFn receives generated event.Events. - CallbackFn func(event event.Event) + CallbackFn func(*event.Event) // Ctx for all web requests as well as internal wait loops. Ctx context.Context // Logger to log to. @@ -45,21 +45,33 @@ type Client struct { // buildHostServiceEvent constructs an event.Event based on a CheckResult, a Host or Service state, a Host name and an // optional Service name if the Event should represent a Service object. -func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostName, serviceName string) event.Event { +func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostName, serviceName string) (*event.Event, error) { var ( eventName string eventUrlSuffix string eventTags map[string]string + eventExtraTags = make(map[string]string) eventSeverity event.Severity ) if serviceName != "" { eventName = hostName + "!" + serviceName + eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(serviceName) + "&host.name=" + url.PathEscape(hostName) + eventTags = map[string]string{ "host": hostName, "service": serviceName, } + + serviceGroups, err := client.fetchServiceGroups(hostName, serviceName) + if err != nil { + return nil, err + } + for _, serviceGroup := range serviceGroups { + eventExtraTags["servicegroup/"+serviceGroup] = "" + } + switch state { case 0: eventSeverity = event.SeverityOK @@ -72,10 +84,13 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostN } } else { eventName = hostName + eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(hostName) + eventTags = map[string]string{ "host": hostName, } + switch state { case 0: eventSeverity = event.SeverityOK @@ -86,33 +101,41 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostN } } - return event.Event{ + hostGroups, err := client.fetchHostGroups(hostName) + if err != nil { + return nil, err + } + for _, hostGroup := range hostGroups { + eventExtraTags["hostgroup/"+hostGroup] = "" + } + + return &event.Event{ Time: result.ExecutionEnd.Time, SourceId: client.IcingaNotificationsEventSourceId, Name: eventName, URL: client.IcingaWebRoot + eventUrlSuffix, Tags: eventTags, - ExtraTags: nil, // TODO + ExtraTags: eventExtraTags, Type: event.TypeState, Severity: eventSeverity, Username: "", // NOTE: a StateChange has no user per se Message: result.Output, - } + }, nil } // handleEvent checks and dispatches generated Events. -func (client *Client) handleEvent(ev event.Event, source string) { +func (client *Client) handleEvent(ev *event.Event, source string) { h := fnv.New64a() _ = json.NewEncoder(h).Encode(ev) evHash := h.Sum64() - client.Logger.Debugf("Start handling event %s received from %s", ev.String(), source) + client.Logger.Debugf("Start handling event %s received from %s", ev, source) client.eventsHandlerMutex.RLock() inCache := slices.Contains(client.eventsRingBuffer, evHash) client.eventsHandlerMutex.RUnlock() if inCache { - client.Logger.Warnf("Event %s received from %s is already in cache and will not be processed", ev.String(), source) + client.Logger.Warnf("Event %s received from %s is already in cache and will not be processed", ev, source) return } @@ -122,7 +145,7 @@ func (client *Client) handleEvent(ev event.Event, source string) { if ev.Time.Before(client.eventsLastTs) { client.Logger.Infof("Event %s received from %s generated at %v before last known timestamp %v; might be a replay", - ev.String(), source, ev.Time, client.eventsLastTs) + ev, source, ev.Time, client.eventsLastTs) } client.eventsLastTs = ev.Time client.eventsHandlerMutex.Unlock() diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 61ff0cd4..4638bd68 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -6,6 +6,7 @@ import ( "fmt" "math" "net/http" + "net/url" "strings" "time" ) @@ -46,8 +47,8 @@ func (client *Client) queryObjectsApiDirect(objType, objName string) ([]ObjectQu } // queryObjectsApiQuery sends a query to the Icinga 2 API /v1/objects to receive data of the given objType. -func (client *Client) queryObjectsApiQuery(objType string, payload map[string]any) ([]ObjectQueriesResult, error) { - reqBody, err := json.Marshal(payload) +func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) ([]ObjectQueriesResult, error) { + reqBody, err := json.Marshal(query) if err != nil { return nil, err } @@ -65,23 +66,44 @@ func (client *Client) queryObjectsApiQuery(objType string, payload map[string]an return client.queryObjectsApi(req) } -// queryObjectApiSince retrieves all objects of the given type, e.g., "host" or "service", with a state change after the -// passed time. -func (client *Client) queryObjectApiSince(objType string, since time.Time) ([]ObjectQueriesResult, error) { - return client.queryObjectsApiQuery( - objType, - map[string]any{ - "filter": fmt.Sprintf("%s.last_state_change>%f", objType, float64(since.UnixMicro())/1_000_000.0), - }) +// fetchHostGroup fetches all Host Groups for this host. +func (client *Client) fetchHostGroups(host string) ([]string, error) { + objQueriesResults, err := client.queryObjectsApiDirect("host", url.PathEscape(host)) + if err != nil { + return nil, err + } + if len(objQueriesResults) != 1 { + return nil, fmt.Errorf("expected exactly one result for host %q instead of %d", host, len(objQueriesResults)) + } + + attrs := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) + return attrs.Groups, nil +} + +// fetchServiceGroups fetches all Service Groups for this service on this host. +func (client *Client) fetchServiceGroups(host, service string) ([]string, error) { + objQueriesResults, err := client.queryObjectsApiDirect("service", url.PathEscape(host)+"!"+url.PathEscape(service)) + if err != nil { + return nil, err + } + if len(objQueriesResults) != 1 { + return nil, fmt.Errorf("expected exactly one result for service %q instead of %d", host+"!"+service, len(objQueriesResults)) + } + + attrs := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) + return attrs.Groups, nil } // checkMissedObjects fetches all objects of the requested objType (host or service) from the API and sends those to the // handleEvent method to be eventually dispatched to the callback. func (client *Client) checkMissedObjects(objType string) { client.eventsHandlerMutex.RLock() - objQueriesResults, err := client.queryObjectApiSince(objType, client.eventsLastTs) + queryFilter := map[string]any{ + "filter": fmt.Sprintf("%s.last_state_change>%f", objType, float64(client.eventsLastTs.UnixMicro())/1_000_000.0), + } client.eventsHandlerMutex.RUnlock() + objQueriesResults, err := client.queryObjectsApiQuery(objType, queryFilter) if err != nil { client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) return @@ -118,7 +140,11 @@ func (client *Client) checkMissedObjects(objType string) { continue } - ev := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, hostName, serviceName) + ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, hostName, serviceName) + if err != nil { + client.Logger.Error("Failed to construct Event from %s API: %v", objType, err) + continue + } client.handleEvent(ev, "API "+objType) } } diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go index 7e1fdea1..49feaaa3 100644 --- a/internal/eventstream/client_es.go +++ b/internal/eventstream/client_es.go @@ -14,12 +14,12 @@ import ( // This file contains Event Stream related methods of the Client. // eventStreamHandleStateChange acts on a received Event Stream StateChange object. -func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (event.Event, error) { - return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.State, stateChange.Host, stateChange.Service), nil +func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (*event.Event, error) { + return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.State, stateChange.Host, stateChange.Service) } // eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. -func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *AcknowledgementSet) (event.Event, error) { +func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *AcknowledgementSet) (*event.Event, error) { var ( eventName string eventUrlSuffix string @@ -41,7 +41,7 @@ func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *Acknowledgemen } } - ev := event.Event{ + return &event.Event{ Time: ackSet.Timestamp.Time, SourceId: client.IcingaNotificationsEventSourceId, Name: eventName, @@ -51,8 +51,7 @@ func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *Acknowledgemen Type: event.TypeAcknowledgement, Username: ackSet.Author, Message: ackSet.Comment, - } - return ev, nil + }, nil } // listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. @@ -106,7 +105,7 @@ func (client *Client) listenEventStream() error { return err } - var ev event.Event + var ev *event.Event switch respT := resp.(type) { case *StateChange: ev, err = client.eventStreamHandleStateChange(respT) From f1ac287260156ed0ff9e858023a568869c22a71d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 18 Oct 2023 17:40:47 +0200 Subject: [PATCH 13/65] eventstream: replay Acknowledgements, refactoring First, it is now also possible to query for missed Host or Service Acknowledgements and replay those with the linked Comment. To do so, some code was refactored. During testing[0] I realized that the debouncing gets a huge additional HTTP delay, which I have limited. [0] docker exec -it icinga2 kill -SIGHUP 1 && (ip6tables -I INPUT -p tcp --dport 5665 -j DROP; iptables -I INPUT -p tcp --dport 5665 -j DROP) --- cmd/icinga2-notification-source/main.go | 2 +- internal/eventstream/api_responses.go | 28 +++--- internal/eventstream/api_responses_test.go | 20 ++-- internal/eventstream/client.go | 108 ++++++++++++++------- internal/eventstream/client_api.go | 107 ++++++++++++++++---- internal/eventstream/client_es.go | 37 +------ 6 files changed, 196 insertions(+), 106 deletions(-) diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go index fe0d6a68..80ba3529 100644 --- a/cmd/icinga2-notification-source/main.go +++ b/cmd/icinga2-notification-source/main.go @@ -17,7 +17,7 @@ func main() { ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) defer cancel() - logs, err := logging.NewLogging("ici2-noma", zap.InfoLevel, logging.CONSOLE, nil, time.Second) + logs, err := logging.NewLogging("ici2-noma", zap.DebugLevel, logging.CONSOLE, nil, time.Second) if err != nil { panic(err) } diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index 79013b09..f36c1e15 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -31,11 +31,12 @@ func (iciTime *Icinga2Time) UnmarshalJSON(data []byte) error { // // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-comment type Comment struct { - Host string `json:"host_name"` - Service string `json:"service_name"` - Author string `json:"author"` - Text string `json:"text"` - EntryType int `json:"entry_type"` + Host string `json:"host_name"` + Service string `json:"service_name"` + Author string `json:"author"` + Text string `json:"text"` + EntryTime Icinga2Time `json:"entry_time"` + EntryType int `json:"entry_type"` } // CheckResult represents the Icinga 2 API CheckResult object. @@ -81,14 +82,15 @@ type Downtime struct { // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#host // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#service type HostServiceRuntimeAttributes struct { - Name string `json:"__name"` - Host string `json:"host_name,omitempty"` - Groups []string `json:"groups"` - State int `json:"state"` - LastCheckResult CheckResult `json:"last_check_result"` - LastStateChange Icinga2Time `json:"last_state_change"` - DowntimeDepth int `json:"downtime_depth"` - Acknowledgement int `json:"acknowledgement"` + Name string `json:"__name"` + Host string `json:"host_name,omitempty"` + Groups []string `json:"groups"` + State int `json:"state"` + LastCheckResult CheckResult `json:"last_check_result"` + LastStateChange Icinga2Time `json:"last_state_change"` + DowntimeDepth int `json:"downtime_depth"` + Acknowledgement int `json:"acknowledgement"` + AcknowledgementLastChange Icinga2Time `json:"acknowledgement_last_change"` } // ObjectQueriesResult represents the Icinga 2 API Object Queries Result wrapper object. diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 6d2a08cd..78a77875 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -99,6 +99,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Host: "dummy-0", Author: "icingaadmin", Text: "foo bar", + EntryTime: Icinga2Time{time.UnixMicro(1697454753536457)}, EntryType: 1, }, }, @@ -116,6 +117,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Author: "icingaadmin", Text: "adfadsfasdfasdf", EntryType: 1, + EntryTime: Icinga2Time{time.UnixMicro(1697197701307516)}, }, }, }, @@ -172,9 +174,10 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { ExecutionStart: Icinga2Time{time.UnixMicro(1697459643863147)}, ExecutionEnd: Icinga2Time{time.UnixMicro(1697459643868893)}, }, - LastStateChange: Icinga2Time{time.UnixMicro(1697099900637215)}, - DowntimeDepth: 0, - Acknowledgement: 0, + LastStateChange: Icinga2Time{time.UnixMicro(1697099900637215)}, + DowntimeDepth: 0, + Acknowledgement: 0, + AcknowledgementLastChange: Icinga2Time{time.UnixMicro(0)}, }, }, }, @@ -201,9 +204,10 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { ExecutionStart: Icinga2Time{time.UnixMicro(1697460711130247)}, ExecutionEnd: Icinga2Time{time.UnixMicro(1697460711134875)}, }, - LastStateChange: Icinga2Time{time.UnixMicro(1697099896120829)}, - DowntimeDepth: 0, - Acknowledgement: 1, + LastStateChange: Icinga2Time{time.UnixMicro(1697099896120829)}, + DowntimeDepth: 0, + Acknowledgement: 1, + AcknowledgementLastChange: Icinga2Time{time.UnixMicro(1697460655878141)}, }, }, }, @@ -353,6 +357,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Author: "icingaadmin", Text: "oh noes", EntryType: 1, + EntryTime: Icinga2Time{time.UnixMicro(1697191791097852)}, }, }, }, @@ -367,6 +372,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Author: "icingaadmin", Text: "if in doubt, check ticket #23", EntryType: 1, + EntryTime: Icinga2Time{time.UnixMicro(1697197990035889)}, }, }, }, @@ -380,6 +386,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Author: "icingaadmin", Text: "oh noes", EntryType: 1, + EntryTime: Icinga2Time{time.UnixMicro(1697191791097852)}, }, }, }, @@ -394,6 +401,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Author: "icingaadmin", Text: "if in doubt, check ticket #23", EntryType: 1, + EntryTime: Icinga2Time{time.UnixMicro(1697197990035889)}, }, }, }, diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 39a0be86..0d1436c9 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -43,35 +43,71 @@ type Client struct { eventsLastTs time.Time } -// buildHostServiceEvent constructs an event.Event based on a CheckResult, a Host or Service state, a Host name and an -// optional Service name if the Event should represent a Service object. -func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostName, serviceName string) (*event.Event, error) { +// buildCommonEvent creates an event.Event based on Host and (optional) Service attributes to be specified later. +// +// The following fields will NOT be populated and might be altered later: +// - Time +// - Type +// - Severity +// - Username +// - Message +// - ID +func (client *Client) buildCommonEvent(host, service string) (*event.Event, error) { var ( eventName string eventUrlSuffix string eventTags map[string]string eventExtraTags = make(map[string]string) - eventSeverity event.Severity ) - if serviceName != "" { - eventName = hostName + "!" + serviceName - - eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(serviceName) + "&host.name=" + url.PathEscape(hostName) + if service != "" { + eventName = host + "!" + service + eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(service) + "&host.name=" + url.PathEscape(host) eventTags = map[string]string{ - "host": hostName, - "service": serviceName, + "host": host, + "service": service, } - serviceGroups, err := client.fetchServiceGroups(hostName, serviceName) + serviceGroups, err := client.fetchServiceGroups(host, service) if err != nil { return nil, err } for _, serviceGroup := range serviceGroups { eventExtraTags["servicegroup/"+serviceGroup] = "" } + } else { + eventName = host + eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(host) + + eventTags = map[string]string{ + "host": host, + } + } + + hostGroups, err := client.fetchHostGroups(host) + if err != nil { + return nil, err + } + for _, hostGroup := range hostGroups { + eventExtraTags["hostgroup/"+hostGroup] = "" + } + + return &event.Event{ + SourceId: client.IcingaNotificationsEventSourceId, + Name: eventName, + URL: client.IcingaWebRoot + eventUrlSuffix, + Tags: eventTags, + ExtraTags: eventExtraTags, + }, nil +} +// buildHostServiceEvent constructs an event.Event based on a CheckResult, a Host or Service state, a Host name and an +// optional Service name if the Event should represent a Service object. +func (client *Client) buildHostServiceEvent(result CheckResult, state int, host, service string) (*event.Event, error) { + var eventSeverity event.Severity + + if service != "" { switch state { case 0: eventSeverity = event.SeverityOK @@ -83,14 +119,6 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostN eventSeverity = event.SeverityErr } } else { - eventName = hostName - - eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(hostName) - - eventTags = map[string]string{ - "host": hostName, - } - switch state { case 0: eventSeverity = event.SeverityOK @@ -101,26 +129,32 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, hostN } } - hostGroups, err := client.fetchHostGroups(hostName) + ev, err := client.buildCommonEvent(host, service) if err != nil { return nil, err } - for _, hostGroup := range hostGroups { - eventExtraTags["hostgroup/"+hostGroup] = "" + + ev.Time = result.ExecutionEnd.Time + ev.Type = event.TypeState + ev.Severity = eventSeverity + ev.Message = result.Output + + return ev, nil +} + +// buildAcknowledgementEvent from the given fields. +func (client *Client) buildAcknowledgementEvent(ts time.Time, host, service, author, comment string) (*event.Event, error) { + ev, err := client.buildCommonEvent(host, service) + if err != nil { + return nil, err } - return &event.Event{ - Time: result.ExecutionEnd.Time, - SourceId: client.IcingaNotificationsEventSourceId, - Name: eventName, - URL: client.IcingaWebRoot + eventUrlSuffix, - Tags: eventTags, - ExtraTags: eventExtraTags, - Type: event.TypeState, - Severity: eventSeverity, - Username: "", // NOTE: a StateChange has no user per se - Message: result.Output, - }, nil + ev.Time = ts + ev.Type = event.TypeAcknowledgement + ev.Username = author + ev.Message = comment + + return ev, nil } // handleEvent checks and dispatches generated Events. @@ -186,7 +220,9 @@ func (client *Client) Process() { client.Logger.Errorf("Cannot reestablish an API connection: %v", err) } - go client.checkMissedObjects("host") - go client.checkMissedObjects("service") + go client.checkMissedStateChanges("host") + go client.checkMissedStateChanges("service") + go client.checkMissedAcknowledgements("host") + go client.checkMissedAcknowledgements("service") } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 4638bd68..50f2c3a0 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -6,7 +6,7 @@ import ( "fmt" "math" "net/http" - "net/url" + "slices" "strings" "time" ) @@ -68,7 +68,7 @@ func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) // fetchHostGroup fetches all Host Groups for this host. func (client *Client) fetchHostGroups(host string) ([]string, error) { - objQueriesResults, err := client.queryObjectsApiDirect("host", url.PathEscape(host)) + objQueriesResults, err := client.queryObjectsApiDirect("host", host) if err != nil { return nil, err } @@ -82,7 +82,7 @@ func (client *Client) fetchHostGroups(host string) ([]string, error) { // fetchServiceGroups fetches all Service Groups for this service on this host. func (client *Client) fetchServiceGroups(host, service string) ([]string, error) { - objQueriesResults, err := client.queryObjectsApiDirect("service", url.PathEscape(host)+"!"+url.PathEscape(service)) + objQueriesResults, err := client.queryObjectsApiDirect("service", host+"!"+service) if err != nil { return nil, err } @@ -94,16 +94,46 @@ func (client *Client) fetchServiceGroups(host, service string) ([]string, error) return attrs.Groups, nil } -// checkMissedObjects fetches all objects of the requested objType (host or service) from the API and sends those to the -// handleEvent method to be eventually dispatched to the callback. -func (client *Client) checkMissedObjects(objType string) { - client.eventsHandlerMutex.RLock() - queryFilter := map[string]any{ - "filter": fmt.Sprintf("%s.last_state_change>%f", objType, float64(client.eventsLastTs.UnixMicro())/1_000_000.0), +// fetchAcknowledgementComment fetches an Acknowledgement Comment for a Host (empty service) or for a Service at a Host. +// +// Unfortunately, there is no direct link between ACK'ed Host or Service objects and their acknowledgement Comment. The +// closest we can do, is query for Comments with the Acknowledgement Service Type and the host/service name. In addition, +// the Host's resp. Service's AcknowledgementLastChange field has NOT the same timestamp as the Comment; there is a +// difference of some milliseconds. As there might be even multiple ACK comments, we have to find the closest one. +func (client *Client) fetchAcknowledgementComment(host, service string, ackTime time.Time) (*Comment, error) { + filterExpr := `comment.entry_type == 4 && comment.host_name == "` + host + `"` + if service != "" { + filterExpr += ` && comment.service_name == "` + service + `"` } - client.eventsHandlerMutex.RUnlock() - objQueriesResults, err := client.queryObjectsApiQuery(objType, queryFilter) + objQueriesResults, err := client.queryObjectsApiQuery("comment", map[string]any{"filter": filterExpr}) + if err != nil { + return nil, err + } + if len(objQueriesResults) == 0 { + return nil, fmt.Errorf("found no ACK Comments found for %q", filterExpr) + } + + comments := make([]*Comment, len(objQueriesResults)) + for i, objQueriesResult := range objQueriesResults { + comments[i] = objQueriesResult.Attrs.(*Comment) + } + + slices.SortFunc(comments, func(a, b *Comment) int { + distA := a.EntryTime.Time.Sub(ackTime).Abs() + distB := b.EntryTime.Time.Sub(ackTime).Abs() + return int(distA - distB) + }) + if comments[0].EntryTime.Sub(ackTime).Abs() > time.Second { + return nil, fmt.Errorf("found no ACK Comment for %q close to %v", filterExpr, ackTime) + } + + return comments[0], nil +} + +// checkMissedChanges queries for Service or Host objects with a specific filter to handle missed elements. +func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallbackFn func(attrs *HostServiceRuntimeAttributes, host, service string)) { + objQueriesResults, err := client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) if err != nil { client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) return @@ -112,7 +142,7 @@ func (client *Client) checkMissedObjects(objType string) { return } - client.Logger.Infof("Querying %ss from API resulted in %d objects to replay", objType, len(objQueriesResults)) + client.Logger.Infof("Querying %ss from API resulted in %d state changes to replay", objType, len(objQueriesResults)) for _, objQueriesResult := range objQueriesResults { if client.Ctx.Err() != nil { @@ -140,18 +170,58 @@ func (client *Client) checkMissedObjects(objType string) { continue } - ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, hostName, serviceName) + attrsCallbackFn(attrs, hostName, serviceName) + } +} + +// checkMissedStateChanges fetches missed Host or Service state changes and feeds them into the handler. +func (client *Client) checkMissedStateChanges(objType string) { + client.eventsHandlerMutex.RLock() + filterExpr := fmt.Sprintf("%s.last_state_change>%f", + objType, float64(client.eventsLastTs.UnixMicro())/1_000_000.0) + client.eventsHandlerMutex.RUnlock() + + client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { + ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { client.Logger.Error("Failed to construct Event from %s API: %v", objType, err) - continue + return } + client.handleEvent(ev, "API "+objType) - } + }) +} + +// checkMissedAcknowledgements fetches missed set Host or Service Acknowledgements and feeds them into the handler. +func (client *Client) checkMissedAcknowledgements(objType string) { + client.eventsHandlerMutex.RLock() + filterExpr := fmt.Sprintf("%s.acknowledgement && %s.acknowledgement_last_change>%f", + objType, objType, float64(client.eventsLastTs.UnixMicro())/1_000_000.0) + client.eventsHandlerMutex.RUnlock() + + client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { + ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) + if err != nil { + client.Logger.Errorf("Cannot fetch ACK Comment for Acknowledgement, %v", err) + return + } + + ev, err := client.buildAcknowledgementEvent( + attrs.AcknowledgementLastChange.Time, + host, service, + ackComment.Author, ackComment.Text) + if err != nil { + client.Logger.Error("Failed to construct Event from Acknowledgement %s API: %v", objType, err) + return + } + + client.handleEvent(ev, "ACK API "+objType) + }) } // reestablishApiConnection tries to access the Icinga 2 API with an exponential backoff. // -// With 10 retries, it might block up to (2^10 - 1) * 10 / 1_000 = 10.23 seconds. +// With 10 retries, it might block up to (2^10 - 1) * 10 / 1_000 = 10.23 seconds plus additional HTTP delays. func (client *Client) reestablishApiConnection() error { const maxRetries = 10 @@ -170,7 +240,10 @@ func (client *Client) reestablishApiConnection() error { client.Logger.Debugf("Try to reestablish an API connection, %d/%d tries..", i+1, maxRetries) - httpClient := &http.Client{Transport: &client.ApiHttpTransport} + httpClient := &http.Client{ + Transport: &client.ApiHttpTransport, + Timeout: time.Second, + } res, err := httpClient.Do(req) if err != nil { lastErr = err diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go index 49feaaa3..ce8b772b 100644 --- a/internal/eventstream/client_es.go +++ b/internal/eventstream/client_es.go @@ -8,7 +8,6 @@ import ( "fmt" "github.com/icinga/icinga-notifications/internal/event" "net/http" - "net/url" ) // This file contains Event Stream related methods of the Client. @@ -20,38 +19,10 @@ func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (*e // eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *AcknowledgementSet) (*event.Event, error) { - var ( - eventName string - eventUrlSuffix string - eventTags map[string]string - ) - - if ackSet.Service != "" { - eventName = ackSet.Host + "!" + ackSet.Service - eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(ackSet.Service) + "&host.name=" + url.PathEscape(ackSet.Host) - eventTags = map[string]string{ - "host": ackSet.Host, - "service": ackSet.Service, - } - } else { - eventName = ackSet.Host - eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(ackSet.Host) - eventTags = map[string]string{ - "host": ackSet.Host, - } - } - - return &event.Event{ - Time: ackSet.Timestamp.Time, - SourceId: client.IcingaNotificationsEventSourceId, - Name: eventName, - URL: client.IcingaWebRoot + eventUrlSuffix, - Tags: eventTags, - ExtraTags: nil, // TODO - Type: event.TypeAcknowledgement, - Username: ackSet.Author, - Message: ackSet.Comment, - }, nil + return client.buildAcknowledgementEvent( + ackSet.Timestamp.Time, + ackSet.Host, ackSet.Service, + ackSet.Author, ackSet.Comment) } // listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. From dd142f18022dc4770d9e005f78702bf902f4957c Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 19 Oct 2023 09:39:47 +0200 Subject: [PATCH 14/65] eventstream: test type assertions just to be sure --- internal/eventstream/client_api.go | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 50f2c3a0..36ab50d7 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -76,7 +76,10 @@ func (client *Client) fetchHostGroups(host string) ([]string, error) { return nil, fmt.Errorf("expected exactly one result for host %q instead of %d", host, len(objQueriesResults)) } - attrs := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) + attrs, ok := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) + if !ok { + return nil, fmt.Errorf("queried object's attrs are of wrong type %T", attrs) + } return attrs.Groups, nil } @@ -90,7 +93,10 @@ func (client *Client) fetchServiceGroups(host, service string) ([]string, error) return nil, fmt.Errorf("expected exactly one result for service %q instead of %d", host+"!"+service, len(objQueriesResults)) } - attrs := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) + attrs, ok := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) + if !ok { + return nil, fmt.Errorf("queried object's attrs are of wrong type %T", attrs) + } return attrs.Groups, nil } @@ -116,7 +122,11 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime comments := make([]*Comment, len(objQueriesResults)) for i, objQueriesResult := range objQueriesResults { - comments[i] = objQueriesResult.Attrs.(*Comment) + c, ok := objQueriesResult.Attrs.(*Comment) + if !ok { + return nil, fmt.Errorf("queried object's attrs are of wrong type %T", c) + } + comments[i] = c } slices.SortFunc(comments, func(a, b *Comment) int { @@ -150,7 +160,11 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba return } - attrs := objQueriesResult.Attrs.(*HostServiceRuntimeAttributes) + attrs, ok := objQueriesResult.Attrs.(*HostServiceRuntimeAttributes) + if !ok { + client.Logger.Errorf("Queried %s API response object's attrs are of wrong type %T", objType, attrs) + continue + } var hostName, serviceName string switch objQueriesResult.Type { From ab528b683ffd5a4ec7550edf41b727de3a3e76d2 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 19 Oct 2023 13:24:00 +0200 Subject: [PATCH 15/65] Initial Event Stream API integration With this change, multiple Icinga 2 APIs can be configured in the YAML configuration to be used as Event Stream based event providers. To get there, some other potential bugs, e.g., making sure that no duplicate slashes are part of any Icinga 2 API URL, were addressed first. --- cmd/icinga-notifications-daemon/main.go | 26 +++++++ cmd/icinga2-notification-source/main.go | 39 ----------- config.example.yml | 7 ++ internal/daemon/config.go | 9 +++ internal/eventstream/client_api.go | 23 ++++-- internal/eventstream/client_es.go | 7 +- internal/eventstream/util.go | 93 +++++++++++++++++++++++++ 7 files changed, 159 insertions(+), 45 deletions(-) delete mode 100644 cmd/icinga2-notification-source/main.go create mode 100644 internal/eventstream/util.go diff --git a/cmd/icinga-notifications-daemon/main.go b/cmd/icinga-notifications-daemon/main.go index b0f8f438..1a599487 100644 --- a/cmd/icinga-notifications-daemon/main.go +++ b/cmd/icinga-notifications-daemon/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "crypto/tls" "flag" "fmt" "github.com/icinga/icinga-notifications/internal" @@ -13,11 +14,14 @@ import ( "github.com/icinga/icingadb/pkg/logging" "github.com/icinga/icingadb/pkg/utils" "go.uber.org/zap" + "net/http" "os" "os/signal" "runtime" "syscall" "time" + + "github.com/icinga/icinga-notifications/internal/eventstream" ) func main() { @@ -97,6 +101,28 @@ func main() { logger.Fatalw("Can't load incidents from database", zap.Error(err)) } + for _, icinga2Api := range conf.Icinga2Apis { + logger := logs.GetChildLogger(fmt.Sprintf("eventstream-%d", icinga2Api.NotificationsEventSourceId)) + + esClient := eventstream.Client{ + ApiHost: icinga2Api.Host, + ApiBasicAuthUser: icinga2Api.AuthUser, + ApiBasicAuthPass: icinga2Api.AuthPass, + + IcingaNotificationsEventSourceId: icinga2Api.NotificationsEventSourceId, + IcingaWebRoot: conf.Icingaweb2URL, + + CallbackFn: eventstream.MakeProcessEvent(db, logger, logs, runtimeConfig), + Ctx: ctx, + Logger: logger, + } + if icinga2Api.InsecureTls { + esClient.ApiHttpTransport = http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}} + } + + go esClient.Process() + } + if err := listener.NewListener(db, runtimeConfig, logs).Run(ctx); err != nil { logger.Errorw("Listener has finished with an error", zap.Error(err)) } else { diff --git a/cmd/icinga2-notification-source/main.go b/cmd/icinga2-notification-source/main.go deleted file mode 100644 index 80ba3529..00000000 --- a/cmd/icinga2-notification-source/main.go +++ /dev/null @@ -1,39 +0,0 @@ -package main - -import ( - "context" - "crypto/tls" - "github.com/icinga/icinga-notifications/internal/event" - "github.com/icinga/icinga-notifications/internal/eventstream" - "github.com/icinga/icingadb/pkg/logging" - "go.uber.org/zap" - "net/http" - "os" - "os/signal" - "time" -) - -func main() { - ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) - defer cancel() - - logs, err := logging.NewLogging("ici2-noma", zap.DebugLevel, logging.CONSOLE, nil, time.Second) - if err != nil { - panic(err) - } - - client := eventstream.Client{ - ApiHost: "https://localhost:5665", - ApiBasicAuthUser: "root", - ApiBasicAuthPass: "icinga", - ApiHttpTransport: http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, - - IcingaWebRoot: "http://localhost/icingaweb2", - IcingaNotificationsEventSourceId: 1, - - CallbackFn: func(ev *event.Event) { logs.GetLogger().Debugf("%#v", ev) }, - Ctx: ctx, - Logger: logs.GetChildLogger("ESClient"), - } - client.Process() -} diff --git a/config.example.yml b/config.example.yml index 4877f16f..5ae05537 100644 --- a/config.example.yml +++ b/config.example.yml @@ -9,6 +9,13 @@ icingaweb2-url: http://localhost/icingaweb2/ channel-plugin-dir: /usr/libexec/icinga-notifications/channel +icinga2-apis: + - notifications-event-source-id: 1 + host: https://localhost:5665 + auth-user: root + auth-pass: icinga + # insecure-tls: true + database: type: pgsql host: /run/postgresql diff --git a/internal/daemon/config.go b/internal/daemon/config.go index bd4c4983..51a32e56 100644 --- a/internal/daemon/config.go +++ b/internal/daemon/config.go @@ -8,11 +8,20 @@ import ( "os" ) +type Icinga2ApiConfig struct { + NotificationsEventSourceId int64 `yaml:"notifications-event-source-id"` + Host string `yaml:"host"` + AuthUser string `yaml:"auth-user"` + AuthPass string `yaml:"auth-pass"` + InsecureTls bool `yaml:"insecure-tls"` +} + type ConfigFile struct { Listen string `yaml:"listen" default:"localhost:5680"` DebugPassword string `yaml:"debug-password"` ChannelPluginDir string `yaml:"channel-plugin-dir" default:"/usr/libexec/icinga-notifications/channel"` Icingaweb2URL string `yaml:"icingaweb2-url"` + Icinga2Apis []Icinga2ApiConfig `yaml:"icinga2-apis"` Database icingadbConfig.Database `yaml:"database"` Logging icingadbConfig.Logging `yaml:"logging"` } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 36ab50d7..78fdfb9e 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -6,6 +6,7 @@ import ( "fmt" "math" "net/http" + "net/url" "slices" "strings" "time" @@ -35,7 +36,11 @@ func (client *Client) queryObjectsApi(req *http.Request) ([]ObjectQueriesResult, // queryObjectsApiDirect performs a direct resp. "fast" API query against a specific object identified by its name. func (client *Client) queryObjectsApiDirect(objType, objName string) ([]ObjectQueriesResult, error) { - req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, client.ApiHost+"/v1/objects/"+objType+"s/"+objName, nil) + apiUrl, err := url.JoinPath(client.ApiHost, "/v1/objects/", objType+"s/", objName) + if err != nil { + return nil, err + } + req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, apiUrl, nil) if err != nil { return nil, err } @@ -53,7 +58,11 @@ func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) return nil, err } - req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/objects/"+objType+"s", bytes.NewReader(reqBody)) + apiUrl, err := url.JoinPath(client.ApiHost, "/v1/objects/", objType+"s") + if err != nil { + return nil, err + } + req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) if err != nil { return nil, err } @@ -198,7 +207,7 @@ func (client *Client) checkMissedStateChanges(objType string) { client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { - client.Logger.Error("Failed to construct Event from %s API: %v", objType, err) + client.Logger.Errorf("Failed to construct Event from %s API: %v", objType, err) return } @@ -225,7 +234,7 @@ func (client *Client) checkMissedAcknowledgements(objType string) { host, service, ackComment.Author, ackComment.Text) if err != nil { - client.Logger.Error("Failed to construct Event from Acknowledgement %s API: %v", objType, err) + client.Logger.Errorf("Failed to construct Event from Acknowledgement %s API: %v", objType, err) return } @@ -239,7 +248,11 @@ func (client *Client) checkMissedAcknowledgements(objType string) { func (client *Client) reestablishApiConnection() error { const maxRetries = 10 - req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, client.ApiHost+"/v1/", nil) + apiUrl, err := url.JoinPath(client.ApiHost, "/v1/") + if err != nil { + return err + } + req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, apiUrl, nil) if err != nil { return err } diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go index ce8b772b..2cd67ee9 100644 --- a/internal/eventstream/client_es.go +++ b/internal/eventstream/client_es.go @@ -8,6 +8,7 @@ import ( "fmt" "github.com/icinga/icinga-notifications/internal/event" "net/http" + "net/url" ) // This file contains Event Stream related methods of the Client. @@ -51,7 +52,11 @@ func (client *Client) listenEventStream() error { return err } - req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, client.ApiHost+"/v1/events", bytes.NewReader(reqBody)) + apiUrl, err := url.JoinPath(client.ApiHost, "/v1/events") + if err != nil { + return err + } + req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) if err != nil { return err } diff --git a/internal/eventstream/util.go b/internal/eventstream/util.go new file mode 100644 index 00000000..013d8da5 --- /dev/null +++ b/internal/eventstream/util.go @@ -0,0 +1,93 @@ +package eventstream + +import ( + "context" + "github.com/icinga/icinga-notifications/internal/config" + "github.com/icinga/icinga-notifications/internal/event" + "github.com/icinga/icinga-notifications/internal/incident" + "github.com/icinga/icinga-notifications/internal/object" + "github.com/icinga/icingadb/pkg/icingadb" + "github.com/icinga/icingadb/pkg/logging" + "go.uber.org/zap" +) + +// ProcessEvent is a copy pasta version of the second half of Listener.ProcessEvent to be removed after #99 has landed. +func ProcessEvent( + ev *event.Event, + db *icingadb.DB, + logger *logging.Logger, + logs *logging.Logging, + runtimeConfig *config.RuntimeConfig, +) { + ctx := context.Background() + obj, err := object.FromEvent(ctx, db, ev) + if err != nil { + logger.Errorw("Can't sync object", zap.Error(err)) + return + } + + tx, err := db.BeginTxx(ctx, nil) + if err != nil { + logger.Errorw("Can't start a db transaction", zap.Error(err)) + return + } + defer func() { _ = tx.Rollback() }() + + if err := ev.Sync(ctx, tx, db, obj.ID); err != nil { + logger.Errorw("Failed to insert event and fetch its ID", zap.String("event", ev.String()), zap.Error(err)) + return + } + + createIncident := ev.Severity != event.SeverityNone && ev.Severity != event.SeverityOK + currentIncident, created, err := incident.GetCurrent( + ctx, + db, + obj, + logs.GetChildLogger("incident"), + runtimeConfig, + createIncident) + if err != nil { + logger.Errorw("Failed to get current incident", zap.Error(err)) + return + } + + if currentIncident == nil { + if ev.Type == event.TypeAcknowledgement { + logger.Warnf("%q doesn't have active incident. Ignoring acknowledgement event from source %d", obj.DisplayName(), ev.SourceId) + return + } + + if ev.Severity != event.SeverityOK { + logger.Error("non-OK state but no incident was created") + return + } + + logger.Warnw("Ignoring superfluous OK state event from source %d", zap.Int64("source", ev.SourceId), zap.String("object", obj.DisplayName())) + return + } + + logger.Debugf("Processing event %v", ev) + + if err := currentIncident.ProcessEvent(ctx, ev, created); err != nil { + logger.Errorw("Failed to process current incident", zap.Error(err)) + return + } + + if err = tx.Commit(); err != nil { + logger.Errorw( + "Can't commit db transaction", zap.String("object", obj.DisplayName()), + zap.String("incident", currentIncident.String()), zap.Error(err), + ) + return + } +} + +// MakeProcessEvent creates a closure around ProcessEvent to wrap all arguments except the event.Event. +func MakeProcessEvent( + db *icingadb.DB, + logger *logging.Logger, + logs *logging.Logging, + runtimeConfig *config.RuntimeConfig, +) func(*event.Event) { + return func(ev *event.Event) { ProcessEvent(ev, db, logger, logs, runtimeConfig) } +} From bac4095d8572d5504973738c67a286ee472debb3 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 19 Oct 2023 15:36:42 +0200 Subject: [PATCH 16/65] Verify Icinga 2 API's CA certificate --- cmd/icinga-notifications-daemon/main.go | 22 +++++++++++++++++++++- config.example.yml | 3 +++ internal/daemon/config.go | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/cmd/icinga-notifications-daemon/main.go b/cmd/icinga-notifications-daemon/main.go index 1a599487..6dda4a3f 100644 --- a/cmd/icinga-notifications-daemon/main.go +++ b/cmd/icinga-notifications-daemon/main.go @@ -3,6 +3,7 @@ package main import ( "context" "crypto/tls" + "crypto/x509" "flag" "fmt" "github.com/icinga/icinga-notifications/internal" @@ -116,8 +117,27 @@ func main() { Ctx: ctx, Logger: logger, } - if icinga2Api.InsecureTls { + + switch { + case icinga2Api.IcingaCaFile != "": + caData, err := os.ReadFile(icinga2Api.IcingaCaFile) + if err != nil { + logger.Errorw("Cannot read CA file", zap.String("file", icinga2Api.IcingaCaFile), zap.Error(err)) + continue + } + + certPool := x509.NewCertPool() + if !certPool.AppendCertsFromPEM(caData) { + logger.Error("Cannot add CA file to cert pool") + continue + } + + esClient.ApiHttpTransport = http.Transport{TLSClientConfig: &tls.Config{RootCAs: certPool}} + logger.Debug("Configured custom CA file for API HTTPS requests") + + case icinga2Api.InsecureTls: esClient.ApiHttpTransport = http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}} + logger.Warn("Skipping TLS verification for API HTTPS requests") } go esClient.Process() diff --git a/config.example.yml b/config.example.yml index 5ae05537..934508da 100644 --- a/config.example.yml +++ b/config.example.yml @@ -14,6 +14,9 @@ icinga2-apis: host: https://localhost:5665 auth-user: root auth-pass: icinga + # The Icinga 2 API CA must either be in the system's CA store, be passed as + # icinga-ca-file or certificate verification can be disabled. + # icinga-ca-file: /path/to/icinga-ca.crt # insecure-tls: true database: diff --git a/internal/daemon/config.go b/internal/daemon/config.go index 51a32e56..9d499f83 100644 --- a/internal/daemon/config.go +++ b/internal/daemon/config.go @@ -13,6 +13,7 @@ type Icinga2ApiConfig struct { Host string `yaml:"host"` AuthUser string `yaml:"auth-user"` AuthPass string `yaml:"auth-pass"` + IcingaCaFile string `yaml:"icinga-ca-file"` InsecureTls bool `yaml:"insecure-tls"` } From f403eab8dcea34d8706b5ccee6506f92bbc69862 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 19 Oct 2023 16:01:07 +0200 Subject: [PATCH 17/65] eventstream: HostServiceRuntimeAttributes real name field Turns out, both Host and Service objects do have a real name field which contains their name. Thus, the hacky __name field can be dropped. --- internal/eventstream/api_responses.go | 8 ++------ internal/eventstream/api_responses_test.go | 2 +- internal/eventstream/client_api.go | 9 ++------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index f36c1e15..c6e209fb 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -69,12 +69,8 @@ type Downtime struct { // When catching up potentially missed changes, the following fields are holding relevant changes which, fortunately, // are identical for Icinga 2 Host and Service objects. // -// According to the documentation, neither the Host nor the Service name is part of the attributes for Host resp. -// Service objects. However, next to being part of the wrapping API response, see ObjectQueriesResult, it is also -// available in the "__name" attribute, reflected in the Name field. For Service objects, it is "${host}!${service}". -// Furthermore, Service objects have a required non-empty reference to their Host. -// // NOTE: +// - Name is either the Host or the Service name. // - Host is empty for Host objects; Host contains the Service's Host object name for Services. // - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. // - Acknowledgement type is 0 = NONE, 1 = NORMAL, 2 = STICKY. @@ -82,7 +78,7 @@ type Downtime struct { // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#host // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#service type HostServiceRuntimeAttributes struct { - Name string `json:"__name"` + Name string `json:"name"` Host string `json:"host_name,omitempty"` Groups []string `json:"groups"` State int `json:"state"` diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 78a77875..38015f0c 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -189,7 +189,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "docker-master!ssh", Type: "Service", Attrs: &HostServiceRuntimeAttributes{ - Name: "docker-master!ssh", + Name: "ssh", Host: "docker-master", Groups: []string{}, State: 2, diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 78fdfb9e..8ea806ad 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -8,7 +8,6 @@ import ( "net/http" "net/url" "slices" - "strings" "time" ) @@ -165,7 +164,7 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba for _, objQueriesResult := range objQueriesResults { if client.Ctx.Err() != nil { - client.Logger.Info("Stopping %s API response processing as context is finished", objType) + client.Logger.Infof("Stopping %s API response processing as context is finished", objType) return } @@ -181,12 +180,8 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba hostName = attrs.Name case "Service": - if !strings.HasPrefix(attrs.Name, attrs.Host+"!") { - client.Logger.Errorf("Queried API Service object's name mismatches, %q is no prefix of %q", attrs.Host, attrs.Name) - continue - } hostName = attrs.Host - serviceName = attrs.Name[len(attrs.Host+"!"):] + serviceName = attrs.Name default: client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) From aae0b41d7fbf0a660406d6ae68ea9f1d5e5ac594 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 19 Oct 2023 16:10:32 +0200 Subject: [PATCH 18/65] eventstream: clean up mutex locks First, the last event's timestamp will now be fetched before entering the checkMissedStateChanges resp. checkMissedAcknowledgements method four times, which shouldn't be a problem.. unless it becomes one. Also to clearly signal what should be locked, the initial declarations in the Process method were protected, even when not necessary. --- internal/eventstream/client.go | 15 +++++++++++---- internal/eventstream/client_api.go | 13 ++++--------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 0d1436c9..d022f4bb 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -193,8 +193,11 @@ func (client *Client) handleEvent(ev *event.Event, source string) { // loop takes care of reconnections, all those events will be logged while generated Events will be dispatched to the // callback function. func (client *Client) Process() { + client.eventsHandlerMutex.Lock() client.eventsRingBuffer = make([]uint64, 1024) client.eventsRingBufferPos = 0 + client.eventsLastTs = time.Time{} + client.eventsHandlerMutex.Unlock() defer client.Logger.Info("Event Stream Client has stopped") @@ -220,9 +223,13 @@ func (client *Client) Process() { client.Logger.Errorf("Cannot reestablish an API connection: %v", err) } - go client.checkMissedStateChanges("host") - go client.checkMissedStateChanges("service") - go client.checkMissedAcknowledgements("host") - go client.checkMissedAcknowledgements("service") + client.eventsHandlerMutex.RLock() + lastEventTime := client.eventsLastTs + client.eventsHandlerMutex.RUnlock() + + go client.checkMissedStateChanges("host", lastEventTime) + go client.checkMissedStateChanges("service", lastEventTime) + go client.checkMissedAcknowledgements("host", lastEventTime) + go client.checkMissedAcknowledgements("service", lastEventTime) } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 8ea806ad..2c71f3e4 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -193,11 +193,8 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba } // checkMissedStateChanges fetches missed Host or Service state changes and feeds them into the handler. -func (client *Client) checkMissedStateChanges(objType string) { - client.eventsHandlerMutex.RLock() - filterExpr := fmt.Sprintf("%s.last_state_change>%f", - objType, float64(client.eventsLastTs.UnixMicro())/1_000_000.0) - client.eventsHandlerMutex.RUnlock() +func (client *Client) checkMissedStateChanges(objType string, since time.Time) { + filterExpr := fmt.Sprintf("%s.last_state_change>%f", objType, float64(since.UnixMicro())/1_000_000.0) client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) @@ -211,11 +208,9 @@ func (client *Client) checkMissedStateChanges(objType string) { } // checkMissedAcknowledgements fetches missed set Host or Service Acknowledgements and feeds them into the handler. -func (client *Client) checkMissedAcknowledgements(objType string) { - client.eventsHandlerMutex.RLock() +func (client *Client) checkMissedAcknowledgements(objType string, since time.Time) { filterExpr := fmt.Sprintf("%s.acknowledgement && %s.acknowledgement_last_change>%f", - objType, objType, float64(client.eventsLastTs.UnixMicro())/1_000_000.0) - client.eventsHandlerMutex.RUnlock() + objType, objType, float64(since.UnixMicro())/1_000_000.0) client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) From e3dc33f09a145525a2b5c299cb1adbfbc8290ec6 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 23 Oct 2023 10:05:14 +0200 Subject: [PATCH 19/65] eventstream: test with testify library --- internal/eventstream/api_responses_test.go | 38 ++++++---------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 38015f0c..71a0f997 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -2,7 +2,7 @@ package eventstream import ( "encoding/json" - "reflect" + "github.com/stretchr/testify/assert" "testing" "time" ) @@ -50,18 +50,12 @@ func TestIcinga2Time_UnmarshalJSON(t *testing.T) { t.Run(test.name, func(t *testing.T) { var ici2time Icinga2Time err := json.Unmarshal([]byte(test.jsonData), &ici2time) - if (err != nil) != test.isError { - t.Errorf("unexpected error state; got error: %t, expected: %t; %v", err != nil, test.isError, err) - return - } else if err != nil { + assert.Equal(t, test.isError, err != nil, "unexpected error state; %v", err) + if err != nil { return } - if ici2time.Compare(test.expected.Time) != 0 { - t.Logf("got: %#v", ici2time) - t.Logf("expected: %#v", test.expected) - t.Error("unexpected response") - } + assert.WithinDuration(t, test.expected.Time, ici2time.Time, time.Duration(0)) }) } } @@ -217,18 +211,12 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { t.Run(test.name, func(t *testing.T) { var resp ObjectQueriesResult err := json.Unmarshal([]byte(test.jsonData), &resp) - if (err != nil) != test.isError { - t.Errorf("unexpected error state; got error: %t, expected: %t; %v", err != nil, test.isError, err) - return - } else if err != nil { + assert.Equal(t, test.isError, err != nil, "unexpected error state; %v", err) + if err != nil { return } - if !reflect.DeepEqual(resp, test.expected) { - t.Logf("got: %#v", resp) - t.Logf("expected: %#v", test.expected) - t.Error("unexpected response") - } + assert.EqualValuesf(t, test.expected, resp, "unexpected ObjectQueriesResult") }) } } @@ -510,18 +498,12 @@ func TestApiResponseUnmarshal(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { resp, err := UnmarshalEventStreamResponse([]byte(test.jsonData)) - if (err != nil) != test.isError { - t.Errorf("unexpected error state; got error: %t, expected: %t; %v", err != nil, test.isError, err) - return - } else if err != nil { + assert.Equal(t, test.isError, err != nil, "unexpected error state; %v", err) + if err != nil { return } - if !reflect.DeepEqual(resp, test.expected) { - t.Logf("got: %#v", resp) - t.Logf("expected: %#v", test.expected) - t.Error("unexpected response") - } + assert.EqualValuesf(t, test.expected, resp, "unexpected Event Stream response") }) } } From 9f0c828f062e8815e5a12ea347c8943b6217622f Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 23 Oct 2023 10:50:19 +0200 Subject: [PATCH 20/65] eventstream: move config parsing out of main to package Next to moving the configuration parsing code from the main file to the eventstream package, the custom TLS configuration creation was changed to allow multiple (possible future) configuration options. --- cmd/icinga-notifications-daemon/main.go | 49 +++---------------- internal/eventstream/client.go | 65 +++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 43 deletions(-) diff --git a/cmd/icinga-notifications-daemon/main.go b/cmd/icinga-notifications-daemon/main.go index 6dda4a3f..891a279f 100644 --- a/cmd/icinga-notifications-daemon/main.go +++ b/cmd/icinga-notifications-daemon/main.go @@ -2,27 +2,23 @@ package main import ( "context" - "crypto/tls" - "crypto/x509" "flag" "fmt" "github.com/icinga/icinga-notifications/internal" "github.com/icinga/icinga-notifications/internal/channel" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" + "github.com/icinga/icinga-notifications/internal/eventstream" "github.com/icinga/icinga-notifications/internal/incident" "github.com/icinga/icinga-notifications/internal/listener" "github.com/icinga/icingadb/pkg/logging" "github.com/icinga/icingadb/pkg/utils" "go.uber.org/zap" - "net/http" "os" "os/signal" "runtime" "syscall" "time" - - "github.com/icinga/icinga-notifications/internal/eventstream" ) func main() { @@ -102,44 +98,11 @@ func main() { logger.Fatalw("Can't load incidents from database", zap.Error(err)) } - for _, icinga2Api := range conf.Icinga2Apis { - logger := logs.GetChildLogger(fmt.Sprintf("eventstream-%d", icinga2Api.NotificationsEventSourceId)) - - esClient := eventstream.Client{ - ApiHost: icinga2Api.Host, - ApiBasicAuthUser: icinga2Api.AuthUser, - ApiBasicAuthPass: icinga2Api.AuthPass, - - IcingaNotificationsEventSourceId: icinga2Api.NotificationsEventSourceId, - IcingaWebRoot: conf.Icingaweb2URL, - - CallbackFn: eventstream.MakeProcessEvent(db, logger, logs, runtimeConfig), - Ctx: ctx, - Logger: logger, - } - - switch { - case icinga2Api.IcingaCaFile != "": - caData, err := os.ReadFile(icinga2Api.IcingaCaFile) - if err != nil { - logger.Errorw("Cannot read CA file", zap.String("file", icinga2Api.IcingaCaFile), zap.Error(err)) - continue - } - - certPool := x509.NewCertPool() - if !certPool.AppendCertsFromPEM(caData) { - logger.Error("Cannot add CA file to cert pool") - continue - } - - esClient.ApiHttpTransport = http.Transport{TLSClientConfig: &tls.Config{RootCAs: certPool}} - logger.Debug("Configured custom CA file for API HTTPS requests") - - case icinga2Api.InsecureTls: - esClient.ApiHttpTransport = http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}} - logger.Warn("Skipping TLS verification for API HTTPS requests") - } - + esClients, err := eventstream.NewClientsFromConfig(ctx, logs, db, runtimeConfig, conf) + if err != nil { + logger.Fatalw("cannot prepare Event Stream API Clients form config", zap.Error(err)) + } + for _, esClient := range esClients { go esClient.Process() } diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index d022f4bb..833f4860 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -2,12 +2,19 @@ package eventstream import ( "context" + "crypto/tls" + "crypto/x509" "encoding/json" + "fmt" + "github.com/icinga/icinga-notifications/internal/config" + "github.com/icinga/icinga-notifications/internal/daemon" "github.com/icinga/icinga-notifications/internal/event" + "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "hash/fnv" "net/http" "net/url" + "os" "slices" "sync" "time" @@ -43,6 +50,64 @@ type Client struct { eventsLastTs time.Time } +// NewClientsFromConfig returns all Clients defined in the conf.ConfigFile. +// +// Those are prepared and just needed to be started by calling their Process method. +func NewClientsFromConfig( + ctx context.Context, + logs *logging.Logging, + db *icingadb.DB, + runtimeConfig *config.RuntimeConfig, + conf *daemon.ConfigFile, +) ([]*Client, error) { + clients := make([]*Client, 0, len(conf.Icinga2Apis)) + + for _, icinga2Api := range conf.Icinga2Apis { + logger := logs.GetChildLogger(fmt.Sprintf("eventstream-%d", icinga2Api.NotificationsEventSourceId)) + + client := &Client{ + ApiHost: icinga2Api.Host, + ApiBasicAuthUser: icinga2Api.AuthUser, + ApiBasicAuthPass: icinga2Api.AuthPass, + ApiHttpTransport: http.Transport{ + TLSClientConfig: &tls.Config{ + MinVersion: tls.VersionTLS13, + }, + }, + + IcingaNotificationsEventSourceId: icinga2Api.NotificationsEventSourceId, + IcingaWebRoot: conf.Icingaweb2URL, + + CallbackFn: MakeProcessEvent(db, logger, logs, runtimeConfig), + Ctx: ctx, + Logger: logger, + } + + if icinga2Api.IcingaCaFile != "" { + caData, err := os.ReadFile(icinga2Api.IcingaCaFile) + if err != nil { + return nil, fmt.Errorf("cannot read CA file %q for Event Stream ID %d, %w", + icinga2Api.IcingaCaFile, icinga2Api.NotificationsEventSourceId, err) + } + + certPool := x509.NewCertPool() + if !certPool.AppendCertsFromPEM(caData) { + return nil, fmt.Errorf("cannot add custom CA file to CA pool for Event Stream ID %d, %w", + icinga2Api.NotificationsEventSourceId, err) + } + + client.ApiHttpTransport.TLSClientConfig.RootCAs = certPool + } + + if icinga2Api.InsecureTls { + client.ApiHttpTransport.TLSClientConfig.InsecureSkipVerify = true + } + + clients = append(clients, client) + } + return clients, nil +} + // buildCommonEvent creates an event.Event based on Host and (optional) Service attributes to be specified later. // // The following fields will NOT be populated and might be altered later: From e17168921ae23025df2425cb8d41cd0e167a7da6 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 23 Oct 2023 11:19:17 +0200 Subject: [PATCH 21/65] eventstream: use filter_vars next to filters --- internal/eventstream/client_api.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 2c71f3e4..5a77190f 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -115,12 +115,15 @@ func (client *Client) fetchServiceGroups(host, service string) ([]string, error) // the Host's resp. Service's AcknowledgementLastChange field has NOT the same timestamp as the Comment; there is a // difference of some milliseconds. As there might be even multiple ACK comments, we have to find the closest one. func (client *Client) fetchAcknowledgementComment(host, service string, ackTime time.Time) (*Comment, error) { - filterExpr := `comment.entry_type == 4 && comment.host_name == "` + host + `"` + filterExpr := "comment.entry_type == 4 && comment.host_name == comment_host_name" + filterVars := map[string]string{"comment_host_name": host} if service != "" { - filterExpr += ` && comment.service_name == "` + service + `"` + filterExpr += " && comment.service_name == comment_service_name" + filterVars["comment_service_name"] = service } - objQueriesResults, err := client.queryObjectsApiQuery("comment", map[string]any{"filter": filterExpr}) + objQueriesResults, err := client.queryObjectsApiQuery("comment", + map[string]any{"filter": filterExpr, "filter_vars": filterVars}) if err != nil { return nil, err } @@ -194,7 +197,7 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba // checkMissedStateChanges fetches missed Host or Service state changes and feeds them into the handler. func (client *Client) checkMissedStateChanges(objType string, since time.Time) { - filterExpr := fmt.Sprintf("%s.last_state_change>%f", objType, float64(since.UnixMicro())/1_000_000.0) + filterExpr := fmt.Sprintf("%s.last_state_change > %f", objType, float64(since.UnixMicro())/1_000_000.0) client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) @@ -209,7 +212,7 @@ func (client *Client) checkMissedStateChanges(objType string, since time.Time) { // checkMissedAcknowledgements fetches missed set Host or Service Acknowledgements and feeds them into the handler. func (client *Client) checkMissedAcknowledgements(objType string, since time.Time) { - filterExpr := fmt.Sprintf("%s.acknowledgement && %s.acknowledgement_last_change>%f", + filterExpr := fmt.Sprintf("%s.acknowledgement && %s.acknowledgement_last_change > %f", objType, objType, float64(since.UnixMicro())/1_000_000.0) client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { From ae6ce9c3c03682b6442403cbcd81dfec304d9c35 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 23 Oct 2023 14:09:40 +0200 Subject: [PATCH 22/65] eventstream: linear back off reconnection interval The prior reconnection pattern was similar to those of a sawtooth with a somewhat unnecessary additional loop in the Client.Process method. Now, it blocks until a reconnection succeeds or some unrecoverable error, e.g., a canceled context, occurs. --- internal/eventstream/client.go | 16 ++++--------- internal/eventstream/client_api.go | 38 ++++++++++++++++-------------- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 833f4860..bc52d92a 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -11,6 +11,7 @@ import ( "github.com/icinga/icinga-notifications/internal/event" "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" + "go.uber.org/zap" "hash/fnv" "net/http" "net/url" @@ -275,17 +276,10 @@ func (client *Client) Process() { client.Logger.Warn("Event Stream closed stream; maybe Icinga 2 is reloading") } - for { - if client.Ctx.Err() != nil { - client.Logger.Info("Abort Icinga 2 API reconnections as context is finished") - return - } - - err := client.reestablishApiConnection() - if err == nil { - break - } - client.Logger.Errorf("Cannot reestablish an API connection: %v", err) + err = client.waitForApiAvailability() + if err != nil { + client.Logger.Errorw("Cannot reestablish an API connection", zap.Error(err)) + return } client.eventsHandlerMutex.RLock() diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 5a77190f..3b643f65 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "fmt" + "go.uber.org/zap" "math" "net/http" "net/url" @@ -235,12 +236,8 @@ func (client *Client) checkMissedAcknowledgements(objType string, since time.Tim }) } -// reestablishApiConnection tries to access the Icinga 2 API with an exponential backoff. -// -// With 10 retries, it might block up to (2^10 - 1) * 10 / 1_000 = 10.23 seconds plus additional HTTP delays. -func (client *Client) reestablishApiConnection() error { - const maxRetries = 10 - +// waitForApiAvailability reconnects to the Icinga 2 API until it either becomes available or the Client context is done. +func (client *Client) waitForApiAvailability() error { apiUrl, err := url.JoinPath(client.ApiHost, "/v1/") if err != nil { return err @@ -251,33 +248,38 @@ func (client *Client) reestablishApiConnection() error { } req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - var lastErr error - for i := 0; i < maxRetries; i++ { - if client.Ctx.Err() != nil { - return client.Ctx.Err() + // To neither flood the API nor have to wait unnecessary long, at first the exponential function for the backoff + // time calculation will be used. When numbers are starting to get big, a logarithm will be used instead. + // 10ms, 27ms, 73ms, 200ms, 545ms, 1.484s, 2.584s, 2.807s, 3s, 3.169s, ... + backoffDelay := func(i int) time.Duration { + if i <= 5 { + return time.Duration(math.Exp(float64(i)) * 10 * float64(time.Millisecond)) } - time.Sleep((time.Duration)(math.Exp2(float64(i))) * 10 * time.Millisecond) + return time.Duration(math.Log2(float64(i)) * float64(time.Second)) + } - client.Logger.Debugf("Try to reestablish an API connection, %d/%d tries..", i+1, maxRetries) + for i := 0; client.Ctx.Err() == nil; i++ { + time.Sleep(backoffDelay(i)) + client.Logger.Debugw("Try to reestablish an API connection", zap.Int("try", i+1)) httpClient := &http.Client{ Transport: &client.ApiHttpTransport, - Timeout: time.Second, + Timeout: 100 * time.Millisecond, } res, err := httpClient.Do(req) if err != nil { - lastErr = err - client.Logger.Debugf("API probing failed: %v", lastErr) + client.Logger.Errorw("Reestablishing an API connection failed", zap.Error(err)) continue } _ = res.Body.Close() if res.StatusCode != http.StatusOK { - lastErr = fmt.Errorf("expected HTTP status %d, got %d", http.StatusOK, res.StatusCode) - client.Logger.Debugf("API probing failed: %v", lastErr) + client.Logger.Errorw("API returns unexpected status code during API reconnection", zap.Int("status", res.StatusCode)) continue } + + client.Logger.Debugw("Successfully reconnected to API", zap.Int("try", i+1)) return nil } - return fmt.Errorf("cannot query API backend in %d tries, %w", maxRetries, lastErr) + return client.Ctx.Err() } From ff6f2671257510082b9daecef330488c5382d7f7 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 23 Oct 2023 16:36:56 +0200 Subject: [PATCH 23/65] eventstream: generic Attrs for ObjectQueriesResult First, this change brings the advantage that the JSON unmarshalling does not require any special code and works directly without double parsing the input. Also, no double type check is needed later in the code, because the type information is already there. However, to make this work, some refactoring was necessary all over the client_api.go place. A major obstacle was that methods are not allowed to have type parameters. Thus, some logic needed to be extracted out of the Client type into an own function. This resulted in changes for the HTTP calling methods and a separation of HTTP transmission and parsing the response spreading throughout the library code. --- internal/eventstream/api_responses.go | 29 +--- internal/eventstream/api_responses_test.go | 36 +++-- internal/eventstream/client_api.go | 164 +++++++++++---------- 3 files changed, 112 insertions(+), 117 deletions(-) diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index c6e209fb..5712254a 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -92,35 +92,10 @@ type HostServiceRuntimeAttributes struct { // ObjectQueriesResult represents the Icinga 2 API Object Queries Result wrapper object. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#object-queries-result -type ObjectQueriesResult struct { +type ObjectQueriesResult[T Comment | Downtime | HostServiceRuntimeAttributes] struct { Name string `json:"name"` Type string `json:"type"` - Attrs any `json:"attrs"` -} - -func (objQueriesRes *ObjectQueriesResult) UnmarshalJSON(bytes []byte) error { - var responseAttrs json.RawMessage - err := json.Unmarshal(bytes, &struct { - Name *string `json:"name"` - Type *string `json:"type"` - Attrs *json.RawMessage `json:"attrs"` - }{&objQueriesRes.Name, &objQueriesRes.Type, &responseAttrs}) - if err != nil { - return err - } - - switch objQueriesRes.Type { - case "Comment": - objQueriesRes.Attrs = new(Comment) - case "Downtime": - objQueriesRes.Attrs = new(Downtime) - case "Host", "Service": - objQueriesRes.Attrs = new(HostServiceRuntimeAttributes) - default: - return fmt.Errorf("unsupported type %q", objQueriesRes.Type) - } - - return json.Unmarshal(responseAttrs, objQueriesRes.Attrs) + Attrs T `json:"attrs"` } // The following constants list all implemented Icinga 2 API Event Stream Types to be used as a const instead of diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 71a0f997..07a12da9 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -65,6 +65,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { name string jsonData string isError bool + resp any expected any }{ { @@ -86,10 +87,11 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/comments' | jq -c '[.results[] | select(.attrs.service_name == "")][0]' name: "comment-host", jsonData: `{"attrs":{"__name":"dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80","active":true,"author":"icingaadmin","entry_time":1697454753.536457,"entry_type":1,"expire_time":0,"ha_mode":0,"host_name":"dummy-0","legacy_id":3,"name":"f1239b7d-6e13-4031-b7dd-4055fdd2cd80","original_attributes":null,"package":"_api","paused":false,"persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80.conf"},"templates":["f1239b7d-6e13-4031-b7dd-4055fdd2cd80"],"text":"foo bar","type":"Comment","version":1697454753.53647,"zone":"master"},"joins":{},"meta":{},"name":"dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80","type":"Comment"}`, - expected: ObjectQueriesResult{ + resp: &ObjectQueriesResult[Comment]{}, + expected: &ObjectQueriesResult[Comment]{ Name: "dummy-0!f1239b7d-6e13-4031-b7dd-4055fdd2cd80", Type: "Comment", - Attrs: &Comment{ + Attrs: Comment{ Host: "dummy-0", Author: "icingaadmin", Text: "foo bar", @@ -102,10 +104,11 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/comments' | jq -c '[.results[] | select(.attrs.service_name != "")][0]' name: "comment-service", jsonData: `{"attrs":{"__name":"dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d","active":true,"author":"icingaadmin","entry_time":1697197701.307516,"entry_type":1,"expire_time":0,"ha_mode":0,"host_name":"dummy-912","legacy_id":1,"name":"1b29580d-0a09-4265-ad1f-5e16f462443d","original_attributes":null,"package":"_api","paused":false,"persistent":false,"service_name":"ping6","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d.conf"},"templates":["1b29580d-0a09-4265-ad1f-5e16f462443d"],"text":"adfadsfasdfasdf","type":"Comment","version":1697197701.307536,"zone":"master"},"joins":{},"meta":{},"name":"dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d","type":"Comment"}`, - expected: ObjectQueriesResult{ + resp: &ObjectQueriesResult[Comment]{}, + expected: &ObjectQueriesResult[Comment]{ Name: "dummy-912!ping6!1b29580d-0a09-4265-ad1f-5e16f462443d", Type: "Comment", - Attrs: &Comment{ + Attrs: Comment{ Host: "dummy-912", Service: "ping6", Author: "icingaadmin", @@ -119,10 +122,11 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/downtimes' | jq -c '[.results[] | select(.attrs.service_name == "")][0]' name: "downtime-host", jsonData: `{"attrs":{"__name":"dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c","active":true,"author":"icingaadmin","authoritative_zone":"","comment":"turn down for what","config_owner":"","config_owner_hash":"","duration":0,"end_time":1698096240,"entry_time":1697456415.667442,"fixed":true,"ha_mode":0,"host_name":"dummy-11","legacy_id":2,"name":"af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c","original_attributes":null,"package":"_api","parent":"","paused":false,"remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c.conf"},"start_time":1697456292,"templates":["af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c"],"trigger_time":1697456415.667442,"triggered_by":"","triggers":[],"type":"Downtime","version":1697456415.667458,"was_cancelled":false,"zone":"master"},"joins":{},"meta":{},"name":"dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c","type":"Downtime"}`, - expected: ObjectQueriesResult{ + resp: &ObjectQueriesResult[Downtime]{}, + expected: &ObjectQueriesResult[Downtime]{ Name: "dummy-11!af73f9d9-2ed8-45f8-b541-cce3f3fe0f6c", Type: "Downtime", - Attrs: &Downtime{ + Attrs: Downtime{ Host: "dummy-11", Author: "icingaadmin", Comment: "turn down for what", @@ -133,10 +137,11 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/downtimes' | jq -c '[.results[] | select(.attrs.service_name != "")][0]' name: "downtime-service", jsonData: `{"attrs":{"__name":"docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0","active":true,"author":"icingaadmin","authoritative_zone":"master","comment":"Scheduled downtime for backup","config_owner":"docker-master!load!backup-downtime","config_owner_hash":"ca9502dc8fa5d29c1cb2686808b5d2ccf3ea4a9c6dc3f3c09bfc54614c03c765","duration":0,"end_time":1697511600,"entry_time":1697439555.095232,"fixed":true,"ha_mode":0,"host_name":"docker-master","legacy_id":1,"name":"c27b27c2-e0ab-45ff-8b9b-e95f29851eb0","original_attributes":null,"package":"_api","parent":"","paused":false,"remove_time":0,"scheduled_by":"docker-master!load!backup-downtime","service_name":"load","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0.conf"},"start_time":1697508000,"templates":["c27b27c2-e0ab-45ff-8b9b-e95f29851eb0"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697439555.095272,"was_cancelled":false,"zone":""},"joins":{},"meta":{},"name":"docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0","type":"Downtime"}`, - expected: ObjectQueriesResult{ + resp: &ObjectQueriesResult[Downtime]{}, + expected: &ObjectQueriesResult[Downtime]{ Name: "docker-master!load!c27b27c2-e0ab-45ff-8b9b-e95f29851eb0", Type: "Downtime", - Attrs: &Downtime{ + Attrs: Downtime{ Host: "docker-master", Service: "load", Author: "icingaadmin", @@ -148,10 +153,11 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/hosts' | jq -c '.results[0]' name: "host", jsonData: `{"attrs":{"__name":"dummy-244","acknowledgement":0,"acknowledgement_expiry":0,"acknowledgement_last_change":0,"action_url":"","active":true,"address":"127.0.0.1","address6":"::1","check_attempt":1,"check_command":"random fortune","check_interval":300,"check_period":"","check_timeout":null,"command_endpoint":"","display_name":"dummy-244","downtime_depth":0,"enable_active_checks":true,"enable_event_handler":true,"enable_flapping":false,"enable_notifications":true,"enable_passive_checks":true,"enable_perfdata":true,"event_command":"icinga-notifications-host-events","executions":null,"flapping":false,"flapping_current":0,"flapping_ignore_states":null,"flapping_last_change":0,"flapping_threshold":0,"flapping_threshold_high":30,"flapping_threshold_low":25,"force_next_check":false,"force_next_notification":false,"groups":["app-network","department-dev","env-qa","location-rome"],"ha_mode":0,"handled":false,"icon_image":"","icon_image_alt":"","last_check":1697459643.869006,"last_check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","0"],"execution_end":1697459643.868893,"execution_start":1697459643.863147,"exit_status":0,"output":"If you think last Tuesday was a drag, wait till you see what happens tomorrow!","performance_data":[],"previous_hard_state":99,"schedule_end":1697459643.869006,"schedule_start":1697459643.86287,"scheduling_source":"docker-master","state":0,"ttl":0,"type":"CheckResult","vars_after":{"attempt":1,"reachable":true,"state":0,"state_type":1},"vars_before":{"attempt":1,"reachable":true,"state":0,"state_type":1}},"last_hard_state":0,"last_hard_state_change":1697099900.637215,"last_reachable":true,"last_state":0,"last_state_change":1697099900.637215,"last_state_down":0,"last_state_type":1,"last_state_unreachable":0,"last_state_up":1697459643.868893,"max_check_attempts":3,"name":"dummy-244","next_check":1697459943.019035,"next_update":1697460243.031081,"notes":"","notes_url":"","original_attributes":null,"package":"_etc","paused":false,"previous_state_change":1697099900.637215,"problem":false,"retry_interval":60,"severity":0,"source_location":{"first_column":5,"first_line":2,"last_column":38,"last_line":2,"path":"/etc/icinga2/zones.d/master/03-dummys-hosts.conf"},"state":0,"state_type":1,"templates":["dummy-244","generic-icinga-notifications-host"],"type":"Host","vars":{"app":"network","department":"dev","env":"qa","is_dummy":true,"location":"rome"},"version":0,"volatile":false,"zone":"master"},"joins":{},"meta":{},"name":"dummy-244","type":"Host"}`, - expected: ObjectQueriesResult{ + resp: &ObjectQueriesResult[HostServiceRuntimeAttributes]{}, + expected: &ObjectQueriesResult[HostServiceRuntimeAttributes]{ Name: "dummy-244", Type: "Host", - Attrs: &HostServiceRuntimeAttributes{ + Attrs: HostServiceRuntimeAttributes{ Name: "dummy-244", Groups: []string{"app-network", "department-dev", "env-qa", "location-rome"}, State: 0, @@ -179,10 +185,11 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { // $ curl -k -s -u root:icinga -d '{"filter": "service.acknowledgement != 0"}' -H 'Accept: application/json' -H 'X-HTTP-Method-Override: GET' 'https://localhost:5665/v1/objects/services' | jq -c '.results[0]' name: "service", jsonData: `{"attrs":{"__name":"docker-master!ssh","acknowledgement":1,"acknowledgement_expiry":0,"acknowledgement_last_change":1697460655.878141,"action_url":"","active":true,"check_attempt":1,"check_command":"ssh","check_interval":60,"check_period":"","check_timeout":null,"command_endpoint":"","display_name":"ssh","downtime_depth":0,"enable_active_checks":true,"enable_event_handler":true,"enable_flapping":false,"enable_notifications":true,"enable_passive_checks":true,"enable_perfdata":true,"event_command":"icinga-notifications-service-events","executions":null,"flapping":false,"flapping_current":0,"flapping_ignore_states":null,"flapping_last_change":0,"flapping_threshold":0,"flapping_threshold_high":30,"flapping_threshold_low":25,"force_next_check":false,"force_next_notification":false,"groups":[],"ha_mode":0,"handled":true,"host_name":"docker-master","icon_image":"","icon_image_alt":"","last_check":1697460711.134904,"last_check_result":{"active":true,"check_source":"docker-master","command":["/usr/lib/nagios/plugins/check_ssh","127.0.0.1"],"execution_end":1697460711.134875,"execution_start":1697460711.130247,"exit_status":2,"output":"connect to address 127.0.0.1 and port 22: Connection refused","performance_data":[],"previous_hard_state":99,"schedule_end":1697460711.134904,"schedule_start":1697460711.13,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":1,"reachable":true,"state":2,"state_type":1},"vars_before":{"attempt":1,"reachable":true,"state":2,"state_type":1}},"last_hard_state":2,"last_hard_state_change":1697099980.820806,"last_reachable":true,"last_state":2,"last_state_change":1697099896.120829,"last_state_critical":1697460711.134875,"last_state_ok":0,"last_state_type":1,"last_state_unknown":0,"last_state_unreachable":0,"last_state_warning":0,"max_check_attempts":5,"name":"ssh","next_check":1697460771.1299999,"next_update":1697460831.1397498,"notes":"","notes_url":"","original_attributes":null,"package":"_etc","paused":false,"previous_state_change":1697099896.120829,"problem":true,"retry_interval":30,"severity":640,"source_location":{"first_column":1,"first_line":47,"last_column":19,"last_line":47,"path":"/etc/icinga2/conf.d/services.conf"},"state":2,"state_type":1,"templates":["ssh","generic-icinga-notifications-service","generic-service"],"type":"Service","vars":null,"version":0,"volatile":false,"zone":""},"joins":{},"meta":{},"name":"docker-master!ssh","type":"Service"}`, - expected: ObjectQueriesResult{ + resp: &ObjectQueriesResult[HostServiceRuntimeAttributes]{}, + expected: &ObjectQueriesResult[HostServiceRuntimeAttributes]{ Name: "docker-master!ssh", Type: "Service", - Attrs: &HostServiceRuntimeAttributes{ + Attrs: HostServiceRuntimeAttributes{ Name: "ssh", Host: "docker-master", Groups: []string{}, @@ -209,14 +216,13 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - var resp ObjectQueriesResult - err := json.Unmarshal([]byte(test.jsonData), &resp) + err := json.Unmarshal([]byte(test.jsonData), test.resp) assert.Equal(t, test.isError, err != nil, "unexpected error state; %v", err) if err != nil { return } - assert.EqualValuesf(t, test.expected, resp, "unexpected ObjectQueriesResult") + assert.EqualValuesf(t, test.expected, test.resp, "unexpected ObjectQueriesResult") }) } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 3b643f65..1d1d97f8 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "go.uber.org/zap" + "io" "math" "net/http" "net/url" @@ -14,99 +15,117 @@ import ( // This method contains Icinga 2 API related methods which are not directly related to the Event Stream. -// queryObjectsApi takes a Request, executes it and hopefully returns an array of . -func (client *Client) queryObjectsApi(req *http.Request) ([]ObjectQueriesResult, error) { - httpClient := &http.Client{Transport: &client.ApiHttpTransport} - res, err := httpClient.Do(req) - if err != nil { - return nil, err - } - defer func() { _ = res.Body.Close() }() +// extractObjectQueriesResult parses a typed ObjectQueriesResult array out of a JSON io.ReaderCloser. +// +// As Go 1.21 does not allow type parameters in methods[0], the logic was extracted into a function transforming the +// JSON response - passed as an io.ReaderCloser which will be closed within this function - into the typed response to +// be used within the methods below. +// +// [0] https://github.com/golang/go/issues/49085 +func extractObjectQueriesResult[T Comment | Downtime | HostServiceRuntimeAttributes](jsonResp io.ReadCloser) ([]ObjectQueriesResult[T], error) { + defer func() { _ = jsonResp.Close() }() - var objQueriesResults []ObjectQueriesResult - err = json.NewDecoder(res.Body).Decode(&struct { - Results *[]ObjectQueriesResult `json:"results"` + var objQueriesResults []ObjectQueriesResult[T] + err := json.NewDecoder(jsonResp).Decode(&struct { + Results *[]ObjectQueriesResult[T] `json:"results"` }{&objQueriesResults}) if err != nil { return nil, err } - return objQueriesResults, nil } -// queryObjectsApiDirect performs a direct resp. "fast" API query against a specific object identified by its name. -func (client *Client) queryObjectsApiDirect(objType, objName string) ([]ObjectQueriesResult, error) { - apiUrl, err := url.JoinPath(client.ApiHost, "/v1/objects/", objType+"s/", objName) +// queryObjectsApi performs a configurable HTTP request against the Icinga 2 API and returns its raw response. +func (client *Client) queryObjectsApi(urlPaths []string, method string, body io.Reader, headers map[string]string) (io.ReadCloser, error) { + apiUrl, err := url.JoinPath(client.ApiHost, urlPaths...) if err != nil { return nil, err } - req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, apiUrl, nil) + req, err := http.NewRequestWithContext(client.Ctx, method, apiUrl, body) if err != nil { return nil, err } req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - req.Header.Set("Accept", "application/json") - - return client.queryObjectsApi(req) -} + for k, v := range headers { + req.Header.Set(k, v) + } -// queryObjectsApiQuery sends a query to the Icinga 2 API /v1/objects to receive data of the given objType. -func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) ([]ObjectQueriesResult, error) { - reqBody, err := json.Marshal(query) + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + res, err := httpClient.Do(req) if err != nil { return nil, err } - apiUrl, err := url.JoinPath(client.ApiHost, "/v1/objects/", objType+"s") - if err != nil { - return nil, err + if res.StatusCode != http.StatusOK { + _ = res.Body.Close() + return nil, fmt.Errorf("unexpected status code %d", res.StatusCode) } - req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) + + return res.Body, nil +} + +// queryObjectsApiDirect performs a direct resp. "fast" API query against a specific object identified by its name. +func (client *Client) queryObjectsApiDirect(objType, objName string) (io.ReadCloser, error) { + return client.queryObjectsApi( + []string{"/v1/objects/", objType + "s/", objName}, + http.MethodGet, + nil, + map[string]string{"Accept": "application/json"}) +} + +// queryObjectsApiQuery sends a query to the Icinga 2 API /v1/objects to receive data of the given objType. +func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) (io.ReadCloser, error) { + reqBody, err := json.Marshal(query) if err != nil { return nil, err } - req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - req.Header.Set("Accept", "application/json") - req.Header.Set("Content-Type", "application/json") - req.Header.Set("X-Http-Method-Override", "GET") - - return client.queryObjectsApi(req) + return client.queryObjectsApi( + []string{"/v1/objects/", objType + "s"}, + http.MethodPost, + bytes.NewReader(reqBody), + map[string]string{ + "Accept": "application/json", + "Content-Type": "application/json", + "X-Http-Method-Override": "GET", + }) } // fetchHostGroup fetches all Host Groups for this host. func (client *Client) fetchHostGroups(host string) ([]string, error) { - objQueriesResults, err := client.queryObjectsApiDirect("host", host) + jsonRaw, err := client.queryObjectsApiDirect("host", host) if err != nil { return nil, err } + objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) + if err != nil { + return nil, err + } + if len(objQueriesResults) != 1 { return nil, fmt.Errorf("expected exactly one result for host %q instead of %d", host, len(objQueriesResults)) } - attrs, ok := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) - if !ok { - return nil, fmt.Errorf("queried object's attrs are of wrong type %T", attrs) - } - return attrs.Groups, nil + return objQueriesResults[0].Attrs.Groups, nil } // fetchServiceGroups fetches all Service Groups for this service on this host. func (client *Client) fetchServiceGroups(host, service string) ([]string, error) { - objQueriesResults, err := client.queryObjectsApiDirect("service", host+"!"+service) + jsonRaw, err := client.queryObjectsApiDirect("host", host) + if err != nil { + return nil, err + } + objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) if err != nil { return nil, err } + if len(objQueriesResults) != 1 { return nil, fmt.Errorf("expected exactly one result for service %q instead of %d", host+"!"+service, len(objQueriesResults)) } - attrs, ok := objQueriesResults[0].Attrs.(*HostServiceRuntimeAttributes) - if !ok { - return nil, fmt.Errorf("queried object's attrs are of wrong type %T", attrs) - } - return attrs.Groups, nil + return objQueriesResults[0].Attrs.Groups, nil } // fetchAcknowledgementComment fetches an Acknowledgement Comment for a Host (empty service) or for a Service at a Host. @@ -123,43 +142,44 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime filterVars["comment_service_name"] = service } - objQueriesResults, err := client.queryObjectsApiQuery("comment", - map[string]any{"filter": filterExpr, "filter_vars": filterVars}) + jsonRaw, err := client.queryObjectsApiQuery("comment", map[string]any{"filter": filterExpr, "filter_vars": filterVars}) if err != nil { return nil, err } - if len(objQueriesResults) == 0 { - return nil, fmt.Errorf("found no ACK Comments found for %q", filterExpr) + objQueriesResults, err := extractObjectQueriesResult[Comment](jsonRaw) + if err != nil { + return nil, err } - comments := make([]*Comment, len(objQueriesResults)) - for i, objQueriesResult := range objQueriesResults { - c, ok := objQueriesResult.Attrs.(*Comment) - if !ok { - return nil, fmt.Errorf("queried object's attrs are of wrong type %T", c) - } - comments[i] = c + if len(objQueriesResults) == 0 { + return nil, fmt.Errorf("found no ACK Comments found for %q", filterExpr) } - slices.SortFunc(comments, func(a, b *Comment) int { - distA := a.EntryTime.Time.Sub(ackTime).Abs() - distB := b.EntryTime.Time.Sub(ackTime).Abs() + slices.SortFunc(objQueriesResults, func(a, b ObjectQueriesResult[Comment]) int { + distA := a.Attrs.EntryTime.Time.Sub(ackTime).Abs() + distB := b.Attrs.EntryTime.Time.Sub(ackTime).Abs() return int(distA - distB) }) - if comments[0].EntryTime.Sub(ackTime).Abs() > time.Second { + if objQueriesResults[0].Attrs.EntryTime.Sub(ackTime).Abs() > time.Second { return nil, fmt.Errorf("found no ACK Comment for %q close to %v", filterExpr, ackTime) } - return comments[0], nil + return &objQueriesResults[0].Attrs, nil } // checkMissedChanges queries for Service or Host objects with a specific filter to handle missed elements. -func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallbackFn func(attrs *HostServiceRuntimeAttributes, host, service string)) { - objQueriesResults, err := client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) +func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallbackFn func(attrs HostServiceRuntimeAttributes, host, service string)) { + jsonRaw, err := client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) if err != nil { client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) return } + objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) + if err != nil { + client.Logger.Errorf("Parsing %ss from API failed, %v", objType, err) + return + } + if len(objQueriesResults) == 0 { return } @@ -172,27 +192,21 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba return } - attrs, ok := objQueriesResult.Attrs.(*HostServiceRuntimeAttributes) - if !ok { - client.Logger.Errorf("Queried %s API response object's attrs are of wrong type %T", objType, attrs) - continue - } - var hostName, serviceName string switch objQueriesResult.Type { case "Host": - hostName = attrs.Name + hostName = objQueriesResult.Attrs.Name case "Service": - hostName = attrs.Host - serviceName = attrs.Name + hostName = objQueriesResult.Attrs.Host + serviceName = objQueriesResult.Attrs.Name default: client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) continue } - attrsCallbackFn(attrs, hostName, serviceName) + attrsCallbackFn(objQueriesResult.Attrs, hostName, serviceName) } } @@ -200,7 +214,7 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba func (client *Client) checkMissedStateChanges(objType string, since time.Time) { filterExpr := fmt.Sprintf("%s.last_state_change > %f", objType, float64(since.UnixMicro())/1_000_000.0) - client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { + client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { client.Logger.Errorf("Failed to construct Event from %s API: %v", objType, err) @@ -216,7 +230,7 @@ func (client *Client) checkMissedAcknowledgements(objType string, since time.Tim filterExpr := fmt.Sprintf("%s.acknowledgement && %s.acknowledgement_last_change > %f", objType, objType, float64(since.UnixMicro())/1_000_000.0) - client.checkMissedChanges(objType, filterExpr, func(attrs *HostServiceRuntimeAttributes, host, service string) { + client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) { ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) if err != nil { client.Logger.Errorf("Cannot fetch ACK Comment for Acknowledgement, %v", err) From 9178cb1daf8a7696669ec091df13a59f83f824f2 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 25 Oct 2023 14:10:21 +0200 Subject: [PATCH 24/65] eventstream: refactor Event processing Reflecting the changes #99 brought and hiding the function from outside the package, as it is no longer referenced from the main function. --- internal/eventstream/client.go | 2 +- internal/eventstream/util.go | 55 ++++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index bc52d92a..76c24f94 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -79,7 +79,7 @@ func NewClientsFromConfig( IcingaNotificationsEventSourceId: icinga2Api.NotificationsEventSourceId, IcingaWebRoot: conf.Icingaweb2URL, - CallbackFn: MakeProcessEvent(db, logger, logs, runtimeConfig), + CallbackFn: makeProcessEvent(ctx, db, logger, logs, runtimeConfig), Ctx: ctx, Logger: logger, } diff --git a/internal/eventstream/util.go b/internal/eventstream/util.go index 013d8da5..89956d88 100644 --- a/internal/eventstream/util.go +++ b/internal/eventstream/util.go @@ -82,12 +82,61 @@ func ProcessEvent( } } -// MakeProcessEvent creates a closure around ProcessEvent to wrap all arguments except the event.Event. -func MakeProcessEvent( +// makeProcessEvent creates a closure function to process received events. +// +// This function contains glue code similar to those from Listener.ProcessEvent to check for incidents for the Event +// and, if existent, call *Incident.ProcessEvent on this incident. +func makeProcessEvent( + ctx context.Context, db *icingadb.DB, logger *logging.Logger, logs *logging.Logging, runtimeConfig *config.RuntimeConfig, ) func(*event.Event) { - return func(ev *event.Event) { ProcessEvent(ev, db, logger, logs, runtimeConfig) } + return func(ev *event.Event) { + obj, err := object.FromEvent(ctx, db, ev) + if err != nil { + logger.Errorw("Cannot sync object", zap.Stringer("event", ev), zap.Error(err)) + return + } + + createIncident := ev.Severity != event.SeverityNone && ev.Severity != event.SeverityOK + currentIncident, created, err := incident.GetCurrent( + ctx, + db, + obj, + logs.GetChildLogger("incident"), + runtimeConfig, + createIncident) + if err != nil { + logger.Errorw("Failed to get current incident", zap.Error(err)) + return + } + + l := logger.With( + zap.String("object", obj.DisplayName()), + zap.Stringer("event", ev), + zap.Stringer("incident", currentIncident), + zap.Bool("created incident", created)) + + if currentIncident == nil { + switch { + case ev.Type == event.TypeAcknowledgement: + l.Warn("Object doesn't have active incident, ignoring acknowledgement event") + case ev.Severity != event.SeverityOK: + l.Error("Cannot process event with a non OK state without a known incident") + default: + l.Warn("Ignoring superfluous OK state event") + } + + return + } + + l.Debugw("Processing incident event") + + if err := currentIncident.ProcessEvent(ctx, ev, created); err != nil { + logger.Errorw("Failed to process current incident", zap.Error(err)) + return + } + } } From 5f5a97442e60453cd7511f19e89c05fdf74a4fd1 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 26 Oct 2023 14:36:26 +0200 Subject: [PATCH 25/65] eventstream: current time for new Events Adapt the Event generation to the Listener's and use time.Now() for the Event.Time. Furthermore, cleanup two Event generating methods for the Event Stream which are only used once and have, after prior refactoring, an empty function body. Note: After this change, the caching is de facto broken, but will be addressed in the next commit. --- internal/eventstream/client.go | 8 ++++---- internal/eventstream/client_api.go | 5 +---- internal/eventstream/client_es.go | 17 ++--------------- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 76c24f94..5dd61bef 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -111,8 +111,9 @@ func NewClientsFromConfig( // buildCommonEvent creates an event.Event based on Host and (optional) Service attributes to be specified later. // +// The new Event's Time will be the current timestamp. +// // The following fields will NOT be populated and might be altered later: -// - Time // - Type // - Severity // - Username @@ -160,6 +161,7 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro } return &event.Event{ + Time: time.Now(), SourceId: client.IcingaNotificationsEventSourceId, Name: eventName, URL: client.IcingaWebRoot + eventUrlSuffix, @@ -200,7 +202,6 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, host, return nil, err } - ev.Time = result.ExecutionEnd.Time ev.Type = event.TypeState ev.Severity = eventSeverity ev.Message = result.Output @@ -209,13 +210,12 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, host, } // buildAcknowledgementEvent from the given fields. -func (client *Client) buildAcknowledgementEvent(ts time.Time, host, service, author, comment string) (*event.Event, error) { +func (client *Client) buildAcknowledgementEvent(host, service, author, comment string) (*event.Event, error) { ev, err := client.buildCommonEvent(host, service) if err != nil { return nil, err } - ev.Time = ts ev.Type = event.TypeAcknowledgement ev.Username = author ev.Message = comment diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 1d1d97f8..6badffa7 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -237,10 +237,7 @@ func (client *Client) checkMissedAcknowledgements(objType string, since time.Tim return } - ev, err := client.buildAcknowledgementEvent( - attrs.AcknowledgementLastChange.Time, - host, service, - ackComment.Author, ackComment.Text) + ev, err := client.buildAcknowledgementEvent(host, service, ackComment.Author, ackComment.Text) if err != nil { client.Logger.Errorf("Failed to construct Event from Acknowledgement %s API: %v", objType, err) return diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go index 2cd67ee9..e59c20dd 100644 --- a/internal/eventstream/client_es.go +++ b/internal/eventstream/client_es.go @@ -13,19 +13,6 @@ import ( // This file contains Event Stream related methods of the Client. -// eventStreamHandleStateChange acts on a received Event Stream StateChange object. -func (client *Client) eventStreamHandleStateChange(stateChange *StateChange) (*event.Event, error) { - return client.buildHostServiceEvent(stateChange.CheckResult, stateChange.State, stateChange.Host, stateChange.Service) -} - -// eventStreamHandleAcknowledgementSet acts on a received Event Stream AcknowledgementSet object. -func (client *Client) eventStreamHandleAcknowledgementSet(ackSet *AcknowledgementSet) (*event.Event, error) { - return client.buildAcknowledgementEvent( - ackSet.Timestamp.Time, - ackSet.Host, ackSet.Service, - ackSet.Author, ackSet.Comment) -} - // listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. // // In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will @@ -84,9 +71,9 @@ func (client *Client) listenEventStream() error { var ev *event.Event switch respT := resp.(type) { case *StateChange: - ev, err = client.eventStreamHandleStateChange(respT) + ev, err = client.buildHostServiceEvent(respT.CheckResult, respT.State, respT.Host, respT.Service) case *AcknowledgementSet: - ev, err = client.eventStreamHandleAcknowledgementSet(respT) + ev, err = client.buildAcknowledgementEvent(respT.Host, respT.Service, respT.Author, respT.Comment) // case *AcknowledgementCleared: // case *CommentAdded: // case *CommentRemoved: From 0ae9fa178b82eb0cfe6e743090b417188de463f0 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 26 Oct 2023 16:27:13 +0200 Subject: [PATCH 26/65] eventstream: buffer Events during reconnection Contrary to the prior implementation, there is no longer a caching to detect (possible) duplicates, but all Events will be regenerated from the Icinga 2 Objects API and buffered. Eventually, this buffer will then be replayed. --- internal/eventstream/client.go | 109 +++++++++++++++++------------ internal/eventstream/client_api.go | 46 +++++++----- internal/eventstream/client_es.go | 2 +- 3 files changed, 96 insertions(+), 61 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 5dd61bef..74009fe9 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -4,7 +4,6 @@ import ( "context" "crypto/tls" "crypto/x509" - "encoding/json" "fmt" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" @@ -12,12 +11,11 @@ import ( "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" - "hash/fnv" "net/http" "net/url" "os" - "slices" "sync" + "sync/atomic" "time" ) @@ -44,11 +42,11 @@ type Client struct { // Logger to log to. Logger *logging.Logger - // All those variables are used internally to keep at least some state. - eventsHandlerMutex sync.RWMutex - eventsRingBuffer []uint64 - eventsRingBufferPos int - eventsLastTs time.Time + // replayPhase indicates that Events will be cached as the Event Stream Client is in the reconnection phase. + replayPhase atomic.Bool + // replayBuffer is the cache being populated during the reconnection phase and its mutex. + replayBuffer []*event.Event + replayBufferMutex sync.Mutex } // NewClientsFromConfig returns all Clients defined in the conf.ConfigFile. @@ -224,33 +222,71 @@ func (client *Client) buildAcknowledgementEvent(host, service, author, comment s } // handleEvent checks and dispatches generated Events. -func (client *Client) handleEvent(ev *event.Event, source string) { - h := fnv.New64a() - _ = json.NewEncoder(h).Encode(ev) - evHash := h.Sum64() - - client.Logger.Debugf("Start handling event %s received from %s", ev, source) - - client.eventsHandlerMutex.RLock() - inCache := slices.Contains(client.eventsRingBuffer, evHash) - client.eventsHandlerMutex.RUnlock() - if inCache { - client.Logger.Warnf("Event %s received from %s is already in cache and will not be processed", ev, source) +func (client *Client) handleEvent(ev *event.Event) { + if client.replayPhase.Load() { + client.replayBufferMutex.Lock() + client.replayBuffer = append(client.replayBuffer, ev) + client.replayBufferMutex.Unlock() return } - client.eventsHandlerMutex.Lock() - client.eventsRingBuffer[client.eventsRingBufferPos] = evHash - client.eventsRingBufferPos = (client.eventsRingBufferPos + 1) % len(client.eventsRingBuffer) + client.CallbackFn(ev) +} + +func (client *Client) replayBufferedEvents() { + client.replayBufferMutex.Lock() + client.replayBuffer = make([]*event.Event, 0, 1024) + client.replayBufferMutex.Unlock() + client.replayPhase.Store(true) + + queryFns := []func(string){client.checkMissedAcknowledgements, client.checkMissedStateChanges} + objTypes := []string{"host", "service"} + + var replayWg sync.WaitGroup + replayWg.Add(len(queryFns) * len(objTypes)) - if ev.Time.Before(client.eventsLastTs) { - client.Logger.Infof("Event %s received from %s generated at %v before last known timestamp %v; might be a replay", - ev, source, ev.Time, client.eventsLastTs) + for _, fn := range queryFns { + for _, objType := range objTypes { + go func(fn func(string), objType string) { + fn(objType) + replayWg.Done() + }(fn, objType) + } } - client.eventsLastTs = ev.Time - client.eventsHandlerMutex.Unlock() - client.CallbackFn(ev) + // Fork off the synchronization in a background goroutine to wait for all producers to finish. As the producers + // check the Client's context, they should finish early and this should not deadlock. + go func() { + replayWg.Wait() + client.Logger.Debug("Querying the Objects API for replaying finished") + + if client.Ctx.Err() != nil { + client.Logger.Warn("Aborting Objects API replaying as the context is done") + return + } + + for { + // Here is a race between filling the buffer from incoming Event Stream events and processing the buffered + // events. Thus, the buffer will be reset to catch up what happened in between, as otherwise Events would be + // processed out of order. Only when the buffer is empty, the replay mode will be reset. + client.replayBufferMutex.Lock() + tmpReplayBuffer := client.replayBuffer + client.replayBuffer = make([]*event.Event, 0, 1024) + client.replayBufferMutex.Unlock() + + if len(tmpReplayBuffer) == 0 { + break + } + + for _, ev := range tmpReplayBuffer { + client.CallbackFn(ev) + } + client.Logger.Debugf("Replayed %d events", len(tmpReplayBuffer)) + } + + client.replayPhase.Store(false) + client.Logger.Debug("Finished replay") + }() } // Process incoming objects and reconnect to the Event Stream with replaying objects if necessary. @@ -259,12 +295,6 @@ func (client *Client) handleEvent(ev *event.Event, source string) { // loop takes care of reconnections, all those events will be logged while generated Events will be dispatched to the // callback function. func (client *Client) Process() { - client.eventsHandlerMutex.Lock() - client.eventsRingBuffer = make([]uint64, 1024) - client.eventsRingBufferPos = 0 - client.eventsLastTs = time.Time{} - client.eventsHandlerMutex.Unlock() - defer client.Logger.Info("Event Stream Client has stopped") for { @@ -282,13 +312,6 @@ func (client *Client) Process() { return } - client.eventsHandlerMutex.RLock() - lastEventTime := client.eventsLastTs - client.eventsHandlerMutex.RUnlock() - - go client.checkMissedStateChanges("host", lastEventTime) - go client.checkMissedStateChanges("service", lastEventTime) - go client.checkMissedAcknowledgements("host", lastEventTime) - go client.checkMissedAcknowledgements("service", lastEventTime) + client.replayBufferedEvents() } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 6badffa7..29de4910 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -65,7 +65,7 @@ func (client *Client) queryObjectsApi(urlPaths []string, method string, body io. return res.Body, nil } -// queryObjectsApiDirect performs a direct resp. "fast" API query against a specific object identified by its name. +// queryObjectsApiDirect performs a direct resp. "fast" API query against an object, optionally identified by its name. func (client *Client) queryObjectsApiDirect(objType, objName string) (io.ReadCloser, error) { return client.queryObjectsApi( []string{"/v1/objects/", objType + "s/", objName}, @@ -167,13 +167,27 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime return &objQueriesResults[0].Attrs, nil } -// checkMissedChanges queries for Service or Host objects with a specific filter to handle missed elements. +// checkMissedChanges queries for Service or Host objects to handle missed elements. +// +// If a filterExpr is given (non-empty string), it will be used for the query. Otherwise, all objects will be requested. +// +// The callback function will be called f.e. object of the objType (i.e. "host" or "service") being retrieved from the +// Icinga 2 Objects API. The callback function or a later caller must decide if this object should be replayed. func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallbackFn func(attrs HostServiceRuntimeAttributes, host, service string)) { - jsonRaw, err := client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) + var ( + jsonRaw io.ReadCloser + err error + ) + if filterExpr == "" { + jsonRaw, err = client.queryObjectsApiDirect(objType, "") + } else { + jsonRaw, err = client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) + } if err != nil { client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) return } + objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) if err != nil { client.Logger.Errorf("Parsing %ss from API failed, %v", objType, err) @@ -184,11 +198,11 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba return } - client.Logger.Infof("Querying %ss from API resulted in %d state changes to replay", objType, len(objQueriesResults)) + client.Logger.Debugf("Querying %ss from API resulted in %d state changes for optional replay", objType, len(objQueriesResults)) for _, objQueriesResult := range objQueriesResults { if client.Ctx.Err() != nil { - client.Logger.Infof("Stopping %s API response processing as context is finished", objType) + client.Logger.Warnf("Stopping %s API response processing as context is finished", objType) return } @@ -210,26 +224,24 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba } } -// checkMissedStateChanges fetches missed Host or Service state changes and feeds them into the handler. -func (client *Client) checkMissedStateChanges(objType string, since time.Time) { - filterExpr := fmt.Sprintf("%s.last_state_change > %f", objType, float64(since.UnixMicro())/1_000_000.0) - - client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) { +// checkMissedStateChanges fetches all objects of the requested type and feeds them into the handler. +func (client *Client) checkMissedStateChanges(objType string) { + client.checkMissedChanges(objType, "", func(attrs HostServiceRuntimeAttributes, host, service string) { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { client.Logger.Errorf("Failed to construct Event from %s API: %v", objType, err) return } - client.handleEvent(ev, "API "+objType) + client.handleEvent(ev) }) } -// checkMissedAcknowledgements fetches missed set Host or Service Acknowledgements and feeds them into the handler. -func (client *Client) checkMissedAcknowledgements(objType string, since time.Time) { - filterExpr := fmt.Sprintf("%s.acknowledgement && %s.acknowledgement_last_change > %f", - objType, objType, float64(since.UnixMicro())/1_000_000.0) - +// checkMissedAcknowledgements fetches all Host or Service Acknowledgements and feeds them into the handler. +// +// Currently only active acknowledgements are being processed. +func (client *Client) checkMissedAcknowledgements(objType string) { + filterExpr := fmt.Sprintf("%s.acknowledgement", objType) client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) { ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) if err != nil { @@ -243,7 +255,7 @@ func (client *Client) checkMissedAcknowledgements(objType string, since time.Tim return } - client.handleEvent(ev, "ACK API "+objType) + client.handleEvent(ev) }) } diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go index e59c20dd..2f58c9ba 100644 --- a/internal/eventstream/client_es.go +++ b/internal/eventstream/client_es.go @@ -88,7 +88,7 @@ func (client *Client) listenEventStream() error { return err } - client.handleEvent(ev, "Event Stream") + client.handleEvent(ev) } return lineScanner.Err() } From 46eb5b86005da6a22ee122ac1c30d9e2c3d0448c Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 26 Oct 2023 17:22:21 +0200 Subject: [PATCH 27/65] eventstream: Event dispatcher The just introduced replay logic felt a bit clumsy. Thus, I introduced a middle layer - the eventDispatcher method within its own goroutine - to receive all Events and decide if buffering should be performed. --- internal/eventstream/client.go | 97 +++++++++++++++--------------- internal/eventstream/client_api.go | 4 +- internal/eventstream/client_es.go | 2 +- 3 files changed, 53 insertions(+), 50 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 74009fe9..7b67150d 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -42,11 +42,12 @@ type Client struct { // Logger to log to. Logger *logging.Logger + // eventDispatch communicates Events to be processed between producer and consumer. + eventDispatch chan *event.Event + // replayTrigger signals the eventDispatcher method that the reconnection phase is finished. + replayTrigger chan struct{} // replayPhase indicates that Events will be cached as the Event Stream Client is in the reconnection phase. replayPhase atomic.Bool - // replayBuffer is the cache being populated during the reconnection phase and its mutex. - replayBuffer []*event.Event - replayBufferMutex sync.Mutex } // NewClientsFromConfig returns all Clients defined in the conf.ConfigFile. @@ -221,22 +222,44 @@ func (client *Client) buildAcknowledgementEvent(host, service, author, comment s return ev, nil } -// handleEvent checks and dispatches generated Events. -func (client *Client) handleEvent(ev *event.Event) { - if client.replayPhase.Load() { - client.replayBufferMutex.Lock() - client.replayBuffer = append(client.replayBuffer, ev) - client.replayBufferMutex.Unlock() - return - } +// eventDispatcher receives generated event.Events to be either buffered or directly delivered to the CallbackFn. +// +// When the Client is in the reconnection phase, indicated in the enterReconnectionPhase method, than all received Events +// from the eventDispatch channel will be buffered until the replayTrigger fires. +func (client *Client) eventDispatcher() { + var reconnectionBuffer []*event.Event + + for { + select { + case <-client.Ctx.Done(): + client.Logger.Warnw("Closing event dispatcher as context is done", zap.Error(client.Ctx.Err())) + return - client.CallbackFn(ev) + case <-client.replayTrigger: + for _, ev := range reconnectionBuffer { + client.CallbackFn(ev) + } + client.Logger.Debugf("Replayed %d events during reconnection phase", len(reconnectionBuffer)) + client.replayPhase.Store(false) + reconnectionBuffer = []*event.Event{} + client.Logger.Info("Finished reconnection phase and returning normal operation") + + case ev := <-client.eventDispatch: + if client.replayPhase.Load() { + reconnectionBuffer = append(reconnectionBuffer, ev) + } else { + client.CallbackFn(ev) + } + } + } } -func (client *Client) replayBufferedEvents() { - client.replayBufferMutex.Lock() - client.replayBuffer = make([]*event.Event, 0, 1024) - client.replayBufferMutex.Unlock() +// enterReconnectionPhase enters the reconnection phase. +// +// This method starts multiple goroutines. First, some workers to query the Icinga 2 Objects API will be launched. When +// all of those have finished, the replayTrigger will be used to indicate that the buffered Events should be replayed. +func (client *Client) enterReconnectionPhase() { + client.Logger.Info("Entering reconnection phase to replay events") client.replayPhase.Store(true) queryFns := []func(string){client.checkMissedAcknowledgements, client.checkMissedStateChanges} @@ -254,38 +277,9 @@ func (client *Client) replayBufferedEvents() { } } - // Fork off the synchronization in a background goroutine to wait for all producers to finish. As the producers - // check the Client's context, they should finish early and this should not deadlock. go func() { replayWg.Wait() - client.Logger.Debug("Querying the Objects API for replaying finished") - - if client.Ctx.Err() != nil { - client.Logger.Warn("Aborting Objects API replaying as the context is done") - return - } - - for { - // Here is a race between filling the buffer from incoming Event Stream events and processing the buffered - // events. Thus, the buffer will be reset to catch up what happened in between, as otherwise Events would be - // processed out of order. Only when the buffer is empty, the replay mode will be reset. - client.replayBufferMutex.Lock() - tmpReplayBuffer := client.replayBuffer - client.replayBuffer = make([]*event.Event, 0, 1024) - client.replayBufferMutex.Unlock() - - if len(tmpReplayBuffer) == 0 { - break - } - - for _, ev := range tmpReplayBuffer { - client.CallbackFn(ev) - } - client.Logger.Debugf("Replayed %d events", len(tmpReplayBuffer)) - } - - client.replayPhase.Store(false) - client.Logger.Debug("Finished replay") + client.replayTrigger <- struct{}{} }() } @@ -295,8 +289,17 @@ func (client *Client) replayBufferedEvents() { // loop takes care of reconnections, all those events will be logged while generated Events will be dispatched to the // callback function. func (client *Client) Process() { + // These two channels will be used to communicate the Events and are crucial. As there are multiple producers and + // only one consumer, eventDispatcher, there is no ideal closer. However, producers and the consumer will be + // finished by the Client's context. When this happens, the main application should either be stopped or the Client + // is restarted, and we can hope for the GC. To make sure that nothing gets stuck, make the event channel buffered. + client.eventDispatch = make(chan *event.Event, 1024) + client.replayTrigger = make(chan struct{}) + defer client.Logger.Info("Event Stream Client has stopped") + go client.eventDispatcher() + for { client.Logger.Info("Start listening on Icinga 2 Event Stream..") err := client.listenEventStream() @@ -312,6 +315,6 @@ func (client *Client) Process() { return } - client.replayBufferedEvents() + client.enterReconnectionPhase() } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 29de4910..0cfc5842 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -233,7 +233,7 @@ func (client *Client) checkMissedStateChanges(objType string) { return } - client.handleEvent(ev) + client.eventDispatch <- ev }) } @@ -255,7 +255,7 @@ func (client *Client) checkMissedAcknowledgements(objType string) { return } - client.handleEvent(ev) + client.eventDispatch <- ev }) } diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go index 2f58c9ba..3aaf31fe 100644 --- a/internal/eventstream/client_es.go +++ b/internal/eventstream/client_es.go @@ -88,7 +88,7 @@ func (client *Client) listenEventStream() error { return err } - client.handleEvent(ev) + client.eventDispatch <- ev } return lineScanner.Err() } From 494d00845074d65416f80b6c78009c4b34b11d55 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 30 Oct 2023 10:27:43 +0100 Subject: [PATCH 28/65] eventstream: reconnection in Event Stream method First, the reconnection phase was renamed to the replay phase as it handles events to be replayed, also during the initial connection. The waitForApiAvailability method, which fired short lived HTTP probes to check the API's availability, was removed. This logic was now integrated into the listenEventStream method, which retries connecting until a connection can be established. However, to not have to wait too long for timeouts, the net.Dialer.Timeout needed to be adjusted. Furthermore, as listenEventStream was the only remaining method in the client_es.go file, it was moved to client_api.go, which now contains all API related methods. --- internal/eventstream/client.go | 46 +++++++------ internal/eventstream/client_api.go | 104 ++++++++++++++++++++--------- internal/eventstream/client_es.go | 94 -------------------------- 3 files changed, 98 insertions(+), 146 deletions(-) delete mode 100644 internal/eventstream/client_es.go diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 7b67150d..4034f4a6 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -11,6 +11,7 @@ import ( "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" + "net" "net/http" "net/url" "os" @@ -44,9 +45,9 @@ type Client struct { // eventDispatch communicates Events to be processed between producer and consumer. eventDispatch chan *event.Event - // replayTrigger signals the eventDispatcher method that the reconnection phase is finished. + // replayTrigger signals the eventDispatcher method that the replay phase is finished. replayTrigger chan struct{} - // replayPhase indicates that Events will be cached as the Event Stream Client is in the reconnection phase. + // replayPhase indicates that Events will be cached as the Event Stream Client is in the replay phase. replayPhase atomic.Bool } @@ -70,6 +71,16 @@ func NewClientsFromConfig( ApiBasicAuthUser: icinga2Api.AuthUser, ApiBasicAuthPass: icinga2Api.AuthPass, ApiHttpTransport: http.Transport{ + // Limit the initial Dial timeout to enable a fast retry when the Icinga 2 API is offline. Due to the + // need for very long-lived connections against the Event Stream API afterward, Client.Timeout would + // limit the whole connection, which would be fatal. + // + // Check the "Client Timeout" section of the following (slightly outdated) blog post: + // https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + dialer := &net.Dialer{Timeout: 3 * time.Second} + return dialer.DialContext(ctx, network, addr) + }, TLSClientConfig: &tls.Config{ MinVersion: tls.VersionTLS13, }, @@ -224,10 +235,10 @@ func (client *Client) buildAcknowledgementEvent(host, service, author, comment s // eventDispatcher receives generated event.Events to be either buffered or directly delivered to the CallbackFn. // -// When the Client is in the reconnection phase, indicated in the enterReconnectionPhase method, than all received Events +// When the Client is in the replay phase, indicated in the enterReplayPhase method, than all received Events // from the eventDispatch channel will be buffered until the replayTrigger fires. func (client *Client) eventDispatcher() { - var reconnectionBuffer []*event.Event + var replayBuffer []*event.Event for { select { @@ -236,17 +247,17 @@ func (client *Client) eventDispatcher() { return case <-client.replayTrigger: - for _, ev := range reconnectionBuffer { + for _, ev := range replayBuffer { client.CallbackFn(ev) } - client.Logger.Debugf("Replayed %d events during reconnection phase", len(reconnectionBuffer)) + client.Logger.Debugf("Replayed %d events during replay phase", len(replayBuffer)) client.replayPhase.Store(false) - reconnectionBuffer = []*event.Event{} - client.Logger.Info("Finished reconnection phase and returning normal operation") + replayBuffer = []*event.Event{} + client.Logger.Info("Finished replay phase and returning to normal operation") case ev := <-client.eventDispatch: if client.replayPhase.Load() { - reconnectionBuffer = append(reconnectionBuffer, ev) + replayBuffer = append(replayBuffer, ev) } else { client.CallbackFn(ev) } @@ -254,12 +265,12 @@ func (client *Client) eventDispatcher() { } } -// enterReconnectionPhase enters the reconnection phase. +// enterReplayPhase enters the replay phase for the initial sync and after reconnections. // // This method starts multiple goroutines. First, some workers to query the Icinga 2 Objects API will be launched. When // all of those have finished, the replayTrigger will be used to indicate that the buffered Events should be replayed. -func (client *Client) enterReconnectionPhase() { - client.Logger.Info("Entering reconnection phase to replay events") +func (client *Client) enterReplayPhase() { + client.Logger.Info("Entering replay phase to replay events") client.replayPhase.Store(true) queryFns := []func(string){client.checkMissedAcknowledgements, client.checkMissedStateChanges} @@ -301,20 +312,11 @@ func (client *Client) Process() { go client.eventDispatcher() for { - client.Logger.Info("Start listening on Icinga 2 Event Stream..") err := client.listenEventStream() if err != nil { - client.Logger.Errorf("Event Stream processing failed: %v", err) + client.Logger.Errorw("Event Stream processing failed", zap.Error(err)) } else { client.Logger.Warn("Event Stream closed stream; maybe Icinga 2 is reloading") } - - err = client.waitForApiAvailability() - if err != nil { - client.Logger.Errorw("Cannot reestablish an API connection", zap.Error(err)) - return - } - - client.enterReconnectionPhase() } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 0cfc5842..b962e502 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -1,19 +1,21 @@ package eventstream import ( + "bufio" "bytes" + "crypto/rand" "encoding/json" "fmt" + "github.com/icinga/icinga-notifications/internal/event" "go.uber.org/zap" "io" - "math" "net/http" "net/url" "slices" "time" ) -// This method contains Icinga 2 API related methods which are not directly related to the Event Stream. +// This file contains Icinga 2 API related methods. // extractObjectQueriesResult parses a typed ObjectQueriesResult array out of a JSON io.ReaderCloser. // @@ -259,50 +261,92 @@ func (client *Client) checkMissedAcknowledgements(objType string) { }) } -// waitForApiAvailability reconnects to the Icinga 2 API until it either becomes available or the Client context is done. -func (client *Client) waitForApiAvailability() error { - apiUrl, err := url.JoinPath(client.ApiHost, "/v1/") +// listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. +// +// In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will +// be returned. +func (client *Client) listenEventStream() error { + queueNameRndBuff := make([]byte, 16) + _, _ = rand.Read(queueNameRndBuff) + + reqBody, err := json.Marshal(map[string]any{ + "queue": fmt.Sprintf("icinga-notifications-%x", queueNameRndBuff), + "types": []string{ + typeStateChange, + typeAcknowledgementSet, + // typeAcknowledgementCleared, + // typeCommentAdded, + // typeCommentRemoved, + // typeDowntimeAdded, + // typeDowntimeRemoved, + // typeDowntimeStarted, + // typeDowntimeTriggered, + }, + }) + if err != nil { + return err + } + + apiUrl, err := url.JoinPath(client.ApiHost, "/v1/events") if err != nil { return err } - req, err := http.NewRequestWithContext(client.Ctx, http.MethodGet, apiUrl, nil) + req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) if err != nil { return err } + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") - // To neither flood the API nor have to wait unnecessary long, at first the exponential function for the backoff - // time calculation will be used. When numbers are starting to get big, a logarithm will be used instead. - // 10ms, 27ms, 73ms, 200ms, 545ms, 1.484s, 2.584s, 2.807s, 3s, 3.169s, ... - backoffDelay := func(i int) time.Duration { - if i <= 5 { - return time.Duration(math.Exp(float64(i)) * 10 * float64(time.Millisecond)) + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + + var res *http.Response + for { + client.Logger.Info("Try to establish an Event Stream API connection") + res, err = httpClient.Do(req) + if err == nil { + break } - return time.Duration(math.Log2(float64(i)) * float64(time.Second)) + client.Logger.Warnw("Establishing an Event Stream API connection failed; will be retried", zap.Error(err)) } + defer func() { _ = res.Body.Close() }() - for i := 0; client.Ctx.Err() == nil; i++ { - time.Sleep(backoffDelay(i)) - client.Logger.Debugw("Try to reestablish an API connection", zap.Int("try", i+1)) + client.enterReplayPhase() - httpClient := &http.Client{ - Transport: &client.ApiHttpTransport, - Timeout: 100 * time.Millisecond, - } - res, err := httpClient.Do(req) + client.Logger.Info("Start listening on Icinga 2 Event Stream..") + + lineScanner := bufio.NewScanner(res.Body) + for lineScanner.Scan() { + rawJson := lineScanner.Bytes() + + resp, err := UnmarshalEventStreamResponse(rawJson) if err != nil { - client.Logger.Errorw("Reestablishing an API connection failed", zap.Error(err)) - continue + return err } - _ = res.Body.Close() - if res.StatusCode != http.StatusOK { - client.Logger.Errorw("API returns unexpected status code during API reconnection", zap.Int("status", res.StatusCode)) - continue + var ev *event.Event + switch respT := resp.(type) { + case *StateChange: + ev, err = client.buildHostServiceEvent(respT.CheckResult, respT.State, respT.Host, respT.Service) + case *AcknowledgementSet: + ev, err = client.buildAcknowledgementEvent(respT.Host, respT.Service, respT.Author, respT.Comment) + // case *AcknowledgementCleared: + // case *CommentAdded: + // case *CommentRemoved: + // case *DowntimeAdded: + // case *DowntimeRemoved: + // case *DowntimeStarted: + // case *DowntimeTriggered: + default: + err = fmt.Errorf("unsupported type %T", resp) + } + if err != nil { + return err } - client.Logger.Debugw("Successfully reconnected to API", zap.Int("try", i+1)) - return nil + client.eventDispatch <- ev } - return client.Ctx.Err() + return lineScanner.Err() } diff --git a/internal/eventstream/client_es.go b/internal/eventstream/client_es.go deleted file mode 100644 index 3aaf31fe..00000000 --- a/internal/eventstream/client_es.go +++ /dev/null @@ -1,94 +0,0 @@ -package eventstream - -import ( - "bufio" - "bytes" - "crypto/rand" - "encoding/json" - "fmt" - "github.com/icinga/icinga-notifications/internal/event" - "net/http" - "net/url" -) - -// This file contains Event Stream related methods of the Client. - -// listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. -// -// In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will -// be returned. -func (client *Client) listenEventStream() error { - queueNameRndBuff := make([]byte, 16) - _, _ = rand.Read(queueNameRndBuff) - - reqBody, err := json.Marshal(map[string]any{ - "queue": fmt.Sprintf("icinga-notifications-%x", queueNameRndBuff), - "types": []string{ - typeStateChange, - typeAcknowledgementSet, - // typeAcknowledgementCleared, - // typeCommentAdded, - // typeCommentRemoved, - // typeDowntimeAdded, - // typeDowntimeRemoved, - // typeDowntimeStarted, - // typeDowntimeTriggered, - }, - }) - if err != nil { - return err - } - - apiUrl, err := url.JoinPath(client.ApiHost, "/v1/events") - if err != nil { - return err - } - req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) - if err != nil { - return err - } - - req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - req.Header.Set("Accept", "application/json") - req.Header.Set("Content-Type", "application/json") - - httpClient := &http.Client{Transport: &client.ApiHttpTransport} - res, err := httpClient.Do(req) - if err != nil { - return err - } - defer func() { _ = res.Body.Close() }() - - lineScanner := bufio.NewScanner(res.Body) - for lineScanner.Scan() { - rawJson := lineScanner.Bytes() - - resp, err := UnmarshalEventStreamResponse(rawJson) - if err != nil { - return err - } - - var ev *event.Event - switch respT := resp.(type) { - case *StateChange: - ev, err = client.buildHostServiceEvent(respT.CheckResult, respT.State, respT.Host, respT.Service) - case *AcknowledgementSet: - ev, err = client.buildAcknowledgementEvent(respT.Host, respT.Service, respT.Author, respT.Comment) - // case *AcknowledgementCleared: - // case *CommentAdded: - // case *CommentRemoved: - // case *DowntimeAdded: - // case *DowntimeRemoved: - // case *DowntimeStarted: - // case *DowntimeTriggered: - default: - err = fmt.Errorf("unsupported type %T", resp) - } - if err != nil { - return err - } - - client.eventDispatch <- ev - } - return lineScanner.Err() -} From 5c8a4690fee40774c87ce4b6c3d780c0da12fdb1 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 30 Oct 2023 11:10:27 +0100 Subject: [PATCH 29/65] eventstream: only buffer Event Stream APIs Only buffer the live Event Stream API events during the replay phase while letting the replayed events be passed through. --- internal/eventstream/client.go | 20 +++++++++++++------- internal/eventstream/client_api.go | 15 ++++++++++++--- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 4034f4a6..0d9bb0e5 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -22,6 +22,12 @@ import ( // This file contains the main resp. common methods for the Client. +// outgoingEvent is a wrapper around an event.Event and its producer's origin to be sent to the eventDispatcher. +type outgoingEvent struct { + event *event.Event + fromEventStream bool +} + // Client for the Icinga 2 Event Stream API with extended support for other Icinga 2 APIs to gather additional // information and allow a replay in case of a connection loss. type Client struct { @@ -44,7 +50,7 @@ type Client struct { Logger *logging.Logger // eventDispatch communicates Events to be processed between producer and consumer. - eventDispatch chan *event.Event + eventDispatch chan *outgoingEvent // replayTrigger signals the eventDispatcher method that the replay phase is finished. replayTrigger chan struct{} // replayPhase indicates that Events will be cached as the Event Stream Client is in the replay phase. @@ -235,8 +241,8 @@ func (client *Client) buildAcknowledgementEvent(host, service, author, comment s // eventDispatcher receives generated event.Events to be either buffered or directly delivered to the CallbackFn. // -// When the Client is in the replay phase, indicated in the enterReplayPhase method, than all received Events -// from the eventDispatch channel will be buffered until the replayTrigger fires. +// When the Client is in the replay phase, events from the Event Stream API will be cached until the replay phase has +// finished, while replayed events will be delivered directly. func (client *Client) eventDispatcher() { var replayBuffer []*event.Event @@ -256,10 +262,10 @@ func (client *Client) eventDispatcher() { client.Logger.Info("Finished replay phase and returning to normal operation") case ev := <-client.eventDispatch: - if client.replayPhase.Load() { - replayBuffer = append(replayBuffer, ev) + if client.replayPhase.Load() && ev.fromEventStream { + replayBuffer = append(replayBuffer, ev.event) } else { - client.CallbackFn(ev) + client.CallbackFn(ev.event) } } } @@ -304,7 +310,7 @@ func (client *Client) Process() { // only one consumer, eventDispatcher, there is no ideal closer. However, producers and the consumer will be // finished by the Client's context. When this happens, the main application should either be stopped or the Client // is restarted, and we can hope for the GC. To make sure that nothing gets stuck, make the event channel buffered. - client.eventDispatch = make(chan *event.Event, 1024) + client.eventDispatch = make(chan *outgoingEvent, 1024) client.replayTrigger = make(chan struct{}) defer client.Logger.Info("Event Stream Client has stopped") diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index b962e502..0747dc2d 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -235,7 +235,10 @@ func (client *Client) checkMissedStateChanges(objType string) { return } - client.eventDispatch <- ev + client.eventDispatch <- &outgoingEvent{ + event: ev, + fromEventStream: false, + } }) } @@ -257,7 +260,10 @@ func (client *Client) checkMissedAcknowledgements(objType string) { return } - client.eventDispatch <- ev + client.eventDispatch <- &outgoingEvent{ + event: ev, + fromEventStream: false, + } }) } @@ -346,7 +352,10 @@ func (client *Client) listenEventStream() error { return err } - client.eventDispatch <- ev + client.eventDispatch <- &outgoingEvent{ + event: ev, + fromEventStream: true, + } } return lineScanner.Err() } From a5627b8b70561edf61afbea529fb58b940ad9fd8 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 30 Oct 2023 12:31:01 +0100 Subject: [PATCH 30/65] eventstream: unify errors, zap structured logging --- internal/eventstream/client.go | 5 ++--- internal/eventstream/client_api.go | 26 +++++++++++++++----------- internal/eventstream/util.go | 2 -- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 0d9bb0e5..9e914522 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -256,10 +256,9 @@ func (client *Client) eventDispatcher() { for _, ev := range replayBuffer { client.CallbackFn(ev) } - client.Logger.Debugf("Replayed %d events during replay phase", len(replayBuffer)) + client.Logger.Infow("Finished replay phase, returning to normal operation", zap.Int("cached events", len(replayBuffer))) client.replayPhase.Store(false) replayBuffer = []*event.Event{} - client.Logger.Info("Finished replay phase and returning to normal operation") case ev := <-client.eventDispatch: if client.replayPhase.Load() && ev.fromEventStream { @@ -276,7 +275,7 @@ func (client *Client) eventDispatcher() { // This method starts multiple goroutines. First, some workers to query the Icinga 2 Objects API will be launched. When // all of those have finished, the replayTrigger will be used to indicate that the buffered Events should be replayed. func (client *Client) enterReplayPhase() { - client.Logger.Info("Entering replay phase to replay events") + client.Logger.Info("Entering replay phase to replay stored events first") client.replayPhase.Store(true) queryFns := []func(string){client.checkMissedAcknowledgements, client.checkMissedStateChanges} diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 0747dc2d..3b5a54df 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -61,7 +61,7 @@ func (client *Client) queryObjectsApi(urlPaths []string, method string, body io. if res.StatusCode != http.StatusOK { _ = res.Body.Close() - return nil, fmt.Errorf("unexpected status code %d", res.StatusCode) + return nil, fmt.Errorf("unexpected HTTP status code %d", res.StatusCode) } return res.Body, nil @@ -154,7 +154,7 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime } if len(objQueriesResults) == 0 { - return nil, fmt.Errorf("found no ACK Comments found for %q", filterExpr) + return nil, fmt.Errorf("found no ACK Comments for %q with %v", filterExpr, filterVars) } slices.SortFunc(objQueriesResults, func(a, b ObjectQueriesResult[Comment]) int { @@ -163,7 +163,7 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime return int(distA - distB) }) if objQueriesResults[0].Attrs.EntryTime.Sub(ackTime).Abs() > time.Second { - return nil, fmt.Errorf("found no ACK Comment for %q close to %v", filterExpr, ackTime) + return nil, fmt.Errorf("found no ACK Comment for %q with %v close to %v", filterExpr, filterVars, ackTime) } return &objQueriesResults[0].Attrs, nil @@ -177,6 +177,8 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime // Icinga 2 Objects API. The callback function or a later caller must decide if this object should be replayed. func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallbackFn func(attrs HostServiceRuntimeAttributes, host, service string)) { var ( + logger = client.Logger.With(zap.String("object type", objType)) + jsonRaw io.ReadCloser err error ) @@ -186,13 +188,13 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba jsonRaw, err = client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) } if err != nil { - client.Logger.Errorf("Quering %ss from API failed, %v", objType, err) + logger.Errorw("Querying API failed", zap.Error(err)) return } objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) if err != nil { - client.Logger.Errorf("Parsing %ss from API failed, %v", objType, err) + logger.Errorw("Parsing API response failed", zap.Error(err)) return } @@ -200,11 +202,11 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba return } - client.Logger.Debugf("Querying %ss from API resulted in %d state changes for optional replay", objType, len(objQueriesResults)) + logger.Debugw("Querying API resulted in state changes", zap.Int("changes", len(objQueriesResults))) for _, objQueriesResult := range objQueriesResults { if client.Ctx.Err() != nil { - client.Logger.Warnf("Stopping %s API response processing as context is finished", objType) + logger.Warnw("Stopping API response processing as context is finished", zap.Error(client.Ctx.Err())) return } @@ -218,7 +220,7 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba serviceName = objQueriesResult.Attrs.Name default: - client.Logger.Errorf("Querying API delivered a %q object when expecting %s", objQueriesResult.Type, objType) + logger.Errorw("Querying API delivered a wrong object type", zap.String("result type", objQueriesResult.Type)) continue } @@ -231,7 +233,7 @@ func (client *Client) checkMissedStateChanges(objType string) { client.checkMissedChanges(objType, "", func(attrs HostServiceRuntimeAttributes, host, service string) { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { - client.Logger.Errorf("Failed to construct Event from %s API: %v", objType, err) + client.Logger.Errorw("Failed to construct Event from API", zap.String("object type", objType), zap.Error(err)) return } @@ -248,15 +250,17 @@ func (client *Client) checkMissedStateChanges(objType string) { func (client *Client) checkMissedAcknowledgements(objType string) { filterExpr := fmt.Sprintf("%s.acknowledgement", objType) client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) { + logger := client.Logger.With(zap.String("object type", objType)) + ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) if err != nil { - client.Logger.Errorf("Cannot fetch ACK Comment for Acknowledgement, %v", err) + logger.Errorw("Cannot fetch ACK Comment for Acknowledgement", zap.Error(err)) return } ev, err := client.buildAcknowledgementEvent(host, service, ackComment.Author, ackComment.Text) if err != nil { - client.Logger.Errorf("Failed to construct Event from Acknowledgement %s API: %v", objType, err) + logger.Errorw("Failed to construct Event from Acknowledgement API", zap.Error(err)) return } diff --git a/internal/eventstream/util.go b/internal/eventstream/util.go index 89956d88..94c7e8b2 100644 --- a/internal/eventstream/util.go +++ b/internal/eventstream/util.go @@ -132,8 +132,6 @@ func makeProcessEvent( return } - l.Debugw("Processing incident event") - if err := currentIncident.ProcessEvent(ctx, ev, created); err != nil { logger.Errorw("Failed to process current incident", zap.Error(err)) return From 06534e071f71c440c2911f84288b649b897f1b52 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 30 Oct 2023 15:05:52 +0100 Subject: [PATCH 31/65] eventstream: remove CheckResul.Command As documented, the CheckResult Command might either be an array of strings or a string. Only the first case was supported and the second, which only occurred once in my test instance, resulted in errors. However, as the Command field is not even needed, it can be removed. --- internal/eventstream/api_responses.go | 1 - internal/eventstream/api_responses_test.go | 73 ++++++++++++---------- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/internal/eventstream/api_responses.go b/internal/eventstream/api_responses.go index 5712254a..380fb894 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/eventstream/api_responses.go @@ -46,7 +46,6 @@ type CheckResult struct { ExitStatus int `json:"exit_status"` Output string `json:"output"` State int `json:"state"` - Command []string `json:"command"` ExecutionStart Icinga2Time `json:"execution_start"` ExecutionEnd Icinga2Time `json:"execution_end"` } diff --git a/internal/eventstream/api_responses_test.go b/internal/eventstream/api_responses_test.go index 07a12da9..6a3832fb 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/eventstream/api_responses_test.go @@ -162,15 +162,9 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Groups: []string{"app-network", "department-dev", "env-qa", "location-rome"}, State: 0, LastCheckResult: CheckResult{ - ExitStatus: 0, - Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", - State: 0, - Command: []string{ - "/bin/bash", - "-c", - "/usr/games/fortune; exit $0", - "0", - }, + ExitStatus: 0, + Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", + State: 0, ExecutionStart: Icinga2Time{time.UnixMicro(1697459643863147)}, ExecutionEnd: Icinga2Time{time.UnixMicro(1697459643868893)}, }, @@ -195,13 +189,9 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Groups: []string{}, State: 2, LastCheckResult: CheckResult{ - ExitStatus: 2, - Output: "connect to address 127.0.0.1 and port 22: Connection refused", - State: 2, - Command: []string{ - "/usr/lib/nagios/plugins/check_ssh", - "127.0.0.1", - }, + ExitStatus: 2, + Output: "connect to address 127.0.0.1 and port 22: Connection refused", + State: 2, ExecutionStart: Icinga2Time{time.UnixMicro(1697460711130247)}, ExecutionEnd: Icinga2Time{time.UnixMicro(1697460711134875)}, }, @@ -212,6 +202,33 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { }, }, }, + { + // $ curl -k -s -u root:icinga 'https://localhost:5665/v1/objects/services' | jq -c '[.results[] | select(.attrs.last_check_result.command|type=="string")][0]' + name: "service-single-command", + jsonData: `{"attrs":{"__name":"docker-master!icinga","acknowledgement":0,"acknowledgement_expiry":0,"acknowledgement_last_change":0,"action_url":"","active":true,"check_attempt":1,"check_command":"icinga","check_interval":60,"check_period":"","check_timeout":null,"command_endpoint":"","display_name":"icinga","downtime_depth":0,"enable_active_checks":true,"enable_event_handler":true,"enable_flapping":false,"enable_notifications":true,"enable_passive_checks":true,"enable_perfdata":true,"event_command":"","executions":null,"flapping":false,"flapping_current":0,"flapping_ignore_states":null,"flapping_last_change":0,"flapping_threshold":0,"flapping_threshold_high":30,"flapping_threshold_low":25,"force_next_check":false,"force_next_notification":false,"groups":[],"ha_mode":0,"handled":false,"host_name":"docker-master","icon_image":"","icon_image_alt":"","last_check":1698673636.071483,"last_check_result":{"active":true,"check_source":"docker-master","command":"icinga","execution_end":1698673636.071483,"execution_start":1698673636.068106,"exit_status":0,"output":"Icinga 2 has been running for 26 seconds. Version: v2.14.0-35-g31b1294ac","performance_data":[{"counter":false,"crit":null,"label":"api_num_conn_endpoints","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_endpoints","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_http_clients","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_json_rpc_anonymous_clients","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_json_rpc_relay_queue_item_rate","max":null,"min":null,"type":"PerfdataValue","unit":"","value":186.86666666666667,"warn":null},{"counter":false,"crit":null,"label":"api_num_json_rpc_relay_queue_items","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_json_rpc_sync_queue_item_rate","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_json_rpc_sync_queue_items","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_json_rpc_work_queue_item_rate","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"api_num_not_conn_endpoints","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"checkercomponent_checker_idle","max":null,"min":null,"type":"PerfdataValue","unit":"","value":4020,"warn":null},{"counter":false,"crit":null,"label":"checkercomponent_checker_pending","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1,"warn":null},{"counter":false,"crit":null,"label":"idomysqlconnection_ido-mysql_queries_rate","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1526.9166666666667,"warn":null},{"counter":false,"crit":null,"label":"idomysqlconnection_ido-mysql_queries_1min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":91615,"warn":null},{"counter":false,"crit":null,"label":"idomysqlconnection_ido-mysql_queries_5mins","max":null,"min":null,"type":"PerfdataValue","unit":"","value":91615,"warn":null},{"counter":false,"crit":null,"label":"idomysqlconnection_ido-mysql_queries_15mins","max":null,"min":null,"type":"PerfdataValue","unit":"","value":91615,"warn":null},{"counter":false,"crit":null,"label":"idomysqlconnection_ido-mysql_query_queue_items","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"idomysqlconnection_ido-mysql_query_queue_item_rate","max":null,"min":null,"type":"PerfdataValue","unit":"","value":381.5833333333333,"warn":null},{"counter":false,"crit":null,"label":"idopgsqlconnection_ido-pgsql_queries_rate","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1527.15,"warn":null},{"counter":false,"crit":null,"label":"idopgsqlconnection_ido-pgsql_queries_1min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":91629,"warn":null},{"counter":false,"crit":null,"label":"idopgsqlconnection_ido-pgsql_queries_5mins","max":null,"min":null,"type":"PerfdataValue","unit":"","value":91629,"warn":null},{"counter":false,"crit":null,"label":"idopgsqlconnection_ido-pgsql_queries_15mins","max":null,"min":null,"type":"PerfdataValue","unit":"","value":91629,"warn":null},{"counter":false,"crit":null,"label":"idopgsqlconnection_ido-pgsql_query_queue_items","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"idopgsqlconnection_ido-pgsql_query_queue_item_rate","max":null,"min":null,"type":"PerfdataValue","unit":"","value":381.56666666666666,"warn":null},{"counter":false,"crit":null,"label":"active_host_checks","max":null,"min":null,"type":"PerfdataValue","unit":"","value":16.286730297242745,"warn":null},{"counter":false,"crit":null,"label":"passive_host_checks","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"active_host_checks_1min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":451,"warn":null},{"counter":false,"crit":null,"label":"passive_host_checks_1min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"active_host_checks_5min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":451,"warn":null},{"counter":false,"crit":null,"label":"passive_host_checks_5min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"active_host_checks_15min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":451,"warn":null},{"counter":false,"crit":null,"label":"passive_host_checks_15min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"active_service_checks","max":null,"min":null,"type":"PerfdataValue","unit":"","value":47.34161464023706,"warn":null},{"counter":false,"crit":null,"label":"passive_service_checks","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"active_service_checks_1min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1295,"warn":null},{"counter":false,"crit":null,"label":"passive_service_checks_1min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"active_service_checks_5min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1295,"warn":null},{"counter":false,"crit":null,"label":"passive_service_checks_5min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"active_service_checks_15min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1295,"warn":null},{"counter":false,"crit":null,"label":"passive_service_checks_15min","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"current_pending_callbacks","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"current_concurrent_checks","max":null,"min":null,"type":"PerfdataValue","unit":"","value":68,"warn":null},{"counter":false,"crit":null,"label":"remote_check_queue","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"min_latency","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0.00010800361633300781,"warn":null},{"counter":false,"crit":null,"label":"max_latency","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0.003133535385131836,"warn":null},{"counter":false,"crit":null,"label":"avg_latency","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0.0004072719851866463,"warn":null},{"counter":false,"crit":null,"label":"min_execution_time","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0.0009090900421142578,"warn":null},{"counter":false,"crit":null,"label":"max_execution_time","max":null,"min":null,"type":"PerfdataValue","unit":"","value":4.142040014266968,"warn":null},{"counter":false,"crit":null,"label":"avg_execution_time","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1.3660419934632761,"warn":null},{"counter":false,"crit":null,"label":"num_services_ok","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1972,"warn":null},{"counter":false,"crit":null,"label":"num_services_warning","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_services_critical","max":null,"min":null,"type":"PerfdataValue","unit":"","value":47,"warn":null},{"counter":false,"crit":null,"label":"num_services_unknown","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1001,"warn":null},{"counter":false,"crit":null,"label":"num_services_pending","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_services_unreachable","max":null,"min":null,"type":"PerfdataValue","unit":"","value":138,"warn":null},{"counter":false,"crit":null,"label":"num_services_flapping","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_services_in_downtime","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_services_acknowledged","max":null,"min":null,"type":"PerfdataValue","unit":"","value":2,"warn":null},{"counter":false,"crit":null,"label":"num_services_handled","max":null,"min":null,"type":"PerfdataValue","unit":"","value":149,"warn":null},{"counter":false,"crit":null,"label":"num_services_problem","max":null,"min":null,"type":"PerfdataValue","unit":"","value":1048,"warn":null},{"counter":false,"crit":null,"label":"uptime","max":null,"min":null,"type":"PerfdataValue","unit":"","value":26.343533039093018,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_up","max":null,"min":null,"type":"PerfdataValue","unit":"","value":952,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_down","max":null,"min":null,"type":"PerfdataValue","unit":"","value":49,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_pending","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_unreachable","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_flapping","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_in_downtime","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_acknowledged","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_handled","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"num_hosts_problem","max":null,"min":null,"type":"PerfdataValue","unit":"","value":49,"warn":null},{"counter":false,"crit":null,"label":"last_messages_sent","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"last_messages_received","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"sum_messages_sent_per_second","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"sum_messages_received_per_second","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"sum_bytes_sent_per_second","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null},{"counter":false,"crit":null,"label":"sum_bytes_received_per_second","max":null,"min":null,"type":"PerfdataValue","unit":"","value":0,"warn":null}],"previous_hard_state":99,"schedule_end":1698673636.071483,"schedule_start":1698673636.0680327,"scheduling_source":"docker-master","state":0,"ttl":0,"type":"CheckResult","vars_after":{"attempt":1,"reachable":true,"state":0,"state_type":1},"vars_before":{"attempt":1,"reachable":true,"state":0,"state_type":1}},"last_hard_state":0,"last_hard_state_change":1697704135.75631,"last_reachable":true,"last_state":0,"last_state_change":1697704135.75631,"last_state_critical":0,"last_state_ok":1698673636.071483,"last_state_type":1,"last_state_unknown":0,"last_state_unreachable":0,"last_state_warning":0,"max_check_attempts":5,"name":"icinga","next_check":1698673695.12149,"next_update":1698673755.1283903,"notes":"","notes_url":"","original_attributes":null,"package":"_etc","paused":false,"previous_state_change":1697704135.75631,"problem":false,"retry_interval":30,"severity":0,"source_location":{"first_column":1,"first_line":73,"last_column":22,"last_line":73,"path":"/etc/icinga2/conf.d/services.conf"},"state":0,"state_type":1,"templates":["icinga","generic-service"],"type":"Service","vars":null,"version":0,"volatile":false,"zone":""},"joins":{},"meta":{},"name":"docker-master!icinga","type":"Service"}`, + resp: &ObjectQueriesResult[HostServiceRuntimeAttributes]{}, + expected: &ObjectQueriesResult[HostServiceRuntimeAttributes]{ + Name: "docker-master!icinga", + Type: "Service", + Attrs: HostServiceRuntimeAttributes{ + Name: "icinga", + Host: "docker-master", + Groups: []string{}, + State: 0, + LastCheckResult: CheckResult{ + ExitStatus: 0, + Output: "Icinga 2 has been running for 26 seconds. Version: v2.14.0-35-g31b1294ac", + State: 0, + ExecutionStart: Icinga2Time{time.UnixMicro(1698673636068106)}, + ExecutionEnd: Icinga2Time{time.UnixMicro(1698673636071483)}, + }, + LastStateChange: Icinga2Time{time.UnixMicro(1697704135756310)}, + DowntimeDepth: 0, + Acknowledgement: 0, + AcknowledgementLastChange: Icinga2Time{time.UnixMicro(0)}, + }, + }, + }, } for _, test := range tests { @@ -253,15 +270,9 @@ func TestApiResponseUnmarshal(t *testing.T) { State: 1, StateType: 0, CheckResult: CheckResult{ - ExitStatus: 2, - Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", - State: 2, - Command: []string{ - "/bin/bash", - "-c", - "/usr/games/fortune; exit $0", - "2", - }, + ExitStatus: 2, + Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", + State: 2, ExecutionStart: Icinga2Time{time.UnixMicro(1697188278194409)}, ExecutionEnd: Icinga2Time{time.UnixMicro(1697188278202986)}, }, @@ -279,15 +290,9 @@ func TestApiResponseUnmarshal(t *testing.T) { State: 2, StateType: 0, CheckResult: CheckResult{ - ExitStatus: 2, - Output: "You're growing out of some of your problems, but there are others that\nyou're growing into.", - State: 2, - Command: []string{ - "/bin/bash", - "-c", - "/usr/games/fortune; exit $0", - "2", - }, + ExitStatus: 2, + Output: "You're growing out of some of your problems, but there are others that\nyou're growing into.", + State: 2, ExecutionStart: Icinga2Time{time.UnixMicro(1697184778600973)}, ExecutionEnd: Icinga2Time{time.UnixMicro(1697184778611465)}, }, From f5e67251e8216b871d4e9fff6ecaf8749fe35af7 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 30 Oct 2023 15:52:56 +0100 Subject: [PATCH 32/65] README.md: document Event Stream usage --- README.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0a4b46e9..6aceecf0 100644 --- a/README.md +++ b/README.md @@ -32,17 +32,21 @@ The `listener_password_hash` is a [PHP `password_hash`](https://www.php.net/manu In the example above, this is "correct horse battery staple". This mimics Icinga Web 2's behavior, as stated in [its documentation](https://icinga.com/docs/icinga-web/latest/doc/20-Advanced-Topics/#manual-user-creation-for-database-authentication-backend). +Currently, there are two ways how notifications get communicated between Icinga 2 and Icinga Notifications. +Please select only one, whereby the first is recommended: + +* Icinga Notifications can pull those from the Icinga 2 API when being configured in the YAML configuration file. + For each `source`, as inserted in the database above, an `icinga2-apis` endpoint must be defined. +* Otherwise, Icinga 2 can push the notifications to the Icinga Notification daemon. + Therefore, you need to copy the [Icinga 2 config](icinga2.conf) to `/etc/icinga2/features-enabled` on your master node(s) and restart the Icinga 2 service. + At the top of this file, you will find multiple configurations options that can be set in `/etc/icinga2/constants.conf`. + There are also Icinga2 `EventCommand` definitions in this file that will automatically match all your **checkables**, which may not work properly if the configuration already uses event commands for something else. + Then, you can launch the daemon with the following command. ```go go run ./cmd/icinga-notifications-daemon --config config.yml ``` -Last but not least, in order for the daemon to receive events from Icinga 2, you need to copy the [Icinga 2 config](icinga2.conf) -to `/etc/icinga2/features-enabled` on your master node(s) and restart the Icinga 2 service. At the top of this file, -you will find multiple configurations options that can be set in `/etc/icinga2/constants.conf`. There are also Icinga2 -`EventCommand` definitions in this file that will automatically match all your **checkables**, which may not work -properly if the configuration already uses event commands for something else. - ## License Icinga Notifications is licensed under the terms of the [GNU General Public License Version 2](LICENSE). From 7dd8c9434140234915be7610126d5f5fd2538e62 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Tue, 31 Oct 2023 16:28:34 +0100 Subject: [PATCH 33/65] eventstream: rework Client's channels This change started by creating two separate channels for events from the Event Stream API and from the replay phase. During debugging, I came across multiple bugs, which are somewhat all addressed here. - I used buffered channels to pass events from the producers to the single dispatcher consumer, even as I should have known better. When the last replay producer has finished, another channel is used to communicate this state change. Because of my lazy buffered channel hack, the last events were raced by the finish signal. - After restoring (channel processing) order, the producers need a way to quickly exit when the consumer has finished. Thus, a both reading and writing switch - checking the context's Done channel - was introduced to all producers. - Some safeguard checks were introduced, which, e.g., detected the channel race error listed above. - Somehow during a prior refactoring, the Client.fetchServiceGroups method was broken to also query the host groups instead of, as its name says, the service groups. - My Docker-based testing environment sends SIGTERM instead of SIGINT, which, for other reasons, does not even reached the binary. Now SIGTERM is honored for the main context as well. - Some documentation especially regarding error messages had either typos or grammatically mistakes. --- internal/eventstream/client.go | 70 +++++++++++++++++------------- internal/eventstream/client_api.go | 30 ++++++++----- 2 files changed, 58 insertions(+), 42 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 9e914522..fde49a94 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "crypto/x509" + "errors" "fmt" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" @@ -22,12 +23,6 @@ import ( // This file contains the main resp. common methods for the Client. -// outgoingEvent is a wrapper around an event.Event and its producer's origin to be sent to the eventDispatcher. -type outgoingEvent struct { - event *event.Event - fromEventStream bool -} - // Client for the Icinga 2 Event Stream API with extended support for other Icinga 2 APIs to gather additional // information and allow a replay in case of a connection loss. type Client struct { @@ -49,8 +44,11 @@ type Client struct { // Logger to log to. Logger *logging.Logger - // eventDispatch communicates Events to be processed between producer and consumer. - eventDispatch chan *outgoingEvent + // eventDispatcherEventStream communicates Events to be processed from the Event Stream API. + eventDispatcherEventStream chan *event.Event + // eventDispatcherReplay communicates Events to be processed from the Icinga 2 API replay during replay phase. + eventDispatcherReplay chan *event.Event + // replayTrigger signals the eventDispatcher method that the replay phase is finished. replayTrigger chan struct{} // replayPhase indicates that Events will be cached as the Event Stream Client is in the replay phase. @@ -193,22 +191,22 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, host, if service != "" { switch state { - case 0: + case 0: // OK eventSeverity = event.SeverityOK - case 1: + case 1: // WARNING eventSeverity = event.SeverityWarning - case 2: + case 2: // CRITICAL eventSeverity = event.SeverityCrit - default: + default: // UNKNOWN or faulty eventSeverity = event.SeverityErr } } else { switch state { - case 0: + case 0: // UP eventSeverity = event.SeverityOK - case 1: + case 1: // DOWN eventSeverity = event.SeverityCrit - default: + default: // faulty eventSeverity = event.SeverityErr } } @@ -249,7 +247,7 @@ func (client *Client) eventDispatcher() { for { select { case <-client.Ctx.Done(): - client.Logger.Warnw("Closing event dispatcher as context is done", zap.Error(client.Ctx.Err())) + client.Logger.Warnw("Closing event dispatcher as its context is done", zap.Error(client.Ctx.Err())) return case <-client.replayTrigger: @@ -260,12 +258,18 @@ func (client *Client) eventDispatcher() { client.replayPhase.Store(false) replayBuffer = []*event.Event{} - case ev := <-client.eventDispatch: - if client.replayPhase.Load() && ev.fromEventStream { - replayBuffer = append(replayBuffer, ev.event) + case ev := <-client.eventDispatcherEventStream: + if client.replayPhase.Load() { + replayBuffer = append(replayBuffer, ev) } else { - client.CallbackFn(ev.event) + client.CallbackFn(ev) + } + + case ev := <-client.eventDispatcherReplay: + if !client.replayPhase.Load() { + client.Logger.Errorw("Dispatcher received replay event during normal operation", zap.Stringer("event", ev)) } + client.CallbackFn(ev) } } } @@ -276,7 +280,10 @@ func (client *Client) eventDispatcher() { // all of those have finished, the replayTrigger will be used to indicate that the buffered Events should be replayed. func (client *Client) enterReplayPhase() { client.Logger.Info("Entering replay phase to replay stored events first") - client.replayPhase.Store(true) + if !client.replayPhase.CompareAndSwap(false, true) { + client.Logger.Error("The Event Stream Client is already in the replay phase") + return + } queryFns := []func(string){client.checkMissedAcknowledgements, client.checkMissedStateChanges} objTypes := []string{"host", "service"} @@ -294,7 +301,9 @@ func (client *Client) enterReplayPhase() { } go func() { + startTime := time.Now() replayWg.Wait() + client.Logger.Debugw("All replay phase workers have finished", zap.Duration("duration", time.Since(startTime))) client.replayTrigger <- struct{}{} }() } @@ -305,22 +314,23 @@ func (client *Client) enterReplayPhase() { // loop takes care of reconnections, all those events will be logged while generated Events will be dispatched to the // callback function. func (client *Client) Process() { - // These two channels will be used to communicate the Events and are crucial. As there are multiple producers and - // only one consumer, eventDispatcher, there is no ideal closer. However, producers and the consumer will be - // finished by the Client's context. When this happens, the main application should either be stopped or the Client - // is restarted, and we can hope for the GC. To make sure that nothing gets stuck, make the event channel buffered. - client.eventDispatch = make(chan *outgoingEvent, 1024) + client.eventDispatcherEventStream = make(chan *event.Event) + client.eventDispatcherReplay = make(chan *event.Event) client.replayTrigger = make(chan struct{}) - defer client.Logger.Info("Event Stream Client has stopped") - go client.eventDispatcher() for { err := client.listenEventStream() - if err != nil { + switch { + case errors.Is(err, context.Canceled): + client.Logger.Warnw("Stopping Event Stream Client as its context is done", zap.Error(err)) + return + + case err != nil: client.Logger.Errorw("Event Stream processing failed", zap.Error(err)) - } else { + + default: client.Logger.Warn("Event Stream closed stream; maybe Icinga 2 is reloading") } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 3b5a54df..21bcf460 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -94,7 +94,7 @@ func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) }) } -// fetchHostGroup fetches all Host Groups for this host. +// fetchHostGroups fetches all Host Groups for this host. func (client *Client) fetchHostGroups(host string) ([]string, error) { jsonRaw, err := client.queryObjectsApiDirect("host", host) if err != nil { @@ -114,7 +114,7 @@ func (client *Client) fetchHostGroups(host string) ([]string, error) { // fetchServiceGroups fetches all Service Groups for this service on this host. func (client *Client) fetchServiceGroups(host, service string) ([]string, error) { - jsonRaw, err := client.queryObjectsApiDirect("host", host) + jsonRaw, err := client.queryObjectsApiDirect("service", host+"!"+service) if err != nil { return nil, err } @@ -231,15 +231,18 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba // checkMissedStateChanges fetches all objects of the requested type and feeds them into the handler. func (client *Client) checkMissedStateChanges(objType string) { client.checkMissedChanges(objType, "", func(attrs HostServiceRuntimeAttributes, host, service string) { + logger := client.Logger.With(zap.String("object type", objType)) + ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { - client.Logger.Errorw("Failed to construct Event from API", zap.String("object type", objType), zap.Error(err)) + logger.Errorw("Failed to construct Event from API", zap.Error(err)) return } - client.eventDispatch <- &outgoingEvent{ - event: ev, - fromEventStream: false, + select { + case <-client.Ctx.Done(): + logger.Warnw("Cannot dispatch replayed event as context is finished", zap.Error(client.Ctx.Err())) + case client.eventDispatcherReplay <- ev: } }) } @@ -264,9 +267,10 @@ func (client *Client) checkMissedAcknowledgements(objType string) { return } - client.eventDispatch <- &outgoingEvent{ - event: ev, - fromEventStream: false, + select { + case <-client.Ctx.Done(): + logger.Warnw("Cannot dispatch replayed event as context is finished", zap.Error(client.Ctx.Err())) + case client.eventDispatcherReplay <- ev: } }) } @@ -356,9 +360,11 @@ func (client *Client) listenEventStream() error { return err } - client.eventDispatch <- &outgoingEvent{ - event: ev, - fromEventStream: true, + select { + case <-client.Ctx.Done(): + client.Logger.Warnw("Cannot dispatch Event Stream event as context is finished", zap.Error(client.Ctx.Err())) + return client.Ctx.Err() + case client.eventDispatcherEventStream <- ev: } } return lineScanner.Err() From 8e404e00bda20627cfbbcc6260201773c4983e5d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 2 Nov 2023 10:38:22 +0100 Subject: [PATCH 34/65] eventstream: Event Stream connection timeout Instead of only setting the specific Dialer timeout, a wrapped context which will be closed after three seconds in the connection phase will be used. By doing so, timeouts in all connection layers are addressed. --- internal/eventstream/client.go | 11 ------ internal/eventstream/client_api.go | 60 +++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index fde49a94..69119be4 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -12,7 +12,6 @@ import ( "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" - "net" "net/http" "net/url" "os" @@ -75,16 +74,6 @@ func NewClientsFromConfig( ApiBasicAuthUser: icinga2Api.AuthUser, ApiBasicAuthPass: icinga2Api.AuthPass, ApiHttpTransport: http.Transport{ - // Limit the initial Dial timeout to enable a fast retry when the Icinga 2 API is offline. Due to the - // need for very long-lived connections against the Event Stream API afterward, Client.Timeout would - // limit the whole connection, which would be fatal. - // - // Check the "Client Timeout" section of the following (slightly outdated) blog post: - // https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/ - DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { - dialer := &net.Dialer{Timeout: 3 * time.Second} - return dialer.DialContext(ctx, network, addr) - }, TLSClientConfig: &tls.Config{ MinVersion: tls.VersionTLS13, }, diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 21bcf460..518f3122 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -3,6 +3,7 @@ package eventstream import ( "bufio" "bytes" + "context" "crypto/rand" "encoding/json" "fmt" @@ -305,33 +306,58 @@ func (client *Client) listenEventStream() error { if err != nil { return err } - req, err := http.NewRequestWithContext(client.Ctx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) - if err != nil { - return err - } - req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) - req.Header.Set("Accept", "application/json") - req.Header.Set("Content-Type", "application/json") + var response *http.Response +connectionLoop: + for { + // Sub-context which might get canceled early if connecting takes to long. + // The reqCancel function will be called in the select below or when leaving the function, mostly because its + // parent context, client.Ctx, was finished before. + reqCtx, reqCancel := context.WithCancel(client.Ctx) + defer reqCancel() - httpClient := &http.Client{Transport: &client.ApiHttpTransport} + req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) + if err != nil { + return err + } - var res *http.Response - for { - client.Logger.Info("Try to establish an Event Stream API connection") - res, err = httpClient.Do(req) - if err == nil { - break + req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Type", "application/json") + + resCh := make(chan *http.Response) + + go func() { + client.Logger.Info("Try to establish an Event Stream API connection") + httpClient := &http.Client{Transport: &client.ApiHttpTransport} + + res, err := httpClient.Do(req) + if err != nil { + client.Logger.Warnw("Establishing an Event Stream API connection failed; will be retried", zap.Error(err)) + close(resCh) + return + } + resCh <- res + }() + + select { + case res, ok := <-resCh: + if ok { + response = res + break connectionLoop + } + + case <-time.After(3 * time.Second): } - client.Logger.Warnw("Establishing an Event Stream API connection failed; will be retried", zap.Error(err)) + reqCancel() } - defer func() { _ = res.Body.Close() }() + defer func() { _ = response.Body.Close() }() client.enterReplayPhase() client.Logger.Info("Start listening on Icinga 2 Event Stream..") - lineScanner := bufio.NewScanner(res.Body) + lineScanner := bufio.NewScanner(response.Body) for lineScanner.Scan() { rawJson := lineScanner.Bytes() From 3b93a1589741c0e0b3bd08fc3782aed58d43c549 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 2 Nov 2023 13:46:25 +0100 Subject: [PATCH 35/65] eventstream: detect outdated events during replay During a long running replay, there might be contradicting events buffered from the Event Stream, e.g., a CRITICAL followed by an OK. This flapping state can be detected internally and then be dropped. By creating a hash over the identifying fields of an event.Event, related events can be grouped and processed based on their latest time stamp generated by the Icinga 2 API. --- internal/eventstream/client.go | 76 +++++++++++++++++++++++++----- internal/eventstream/client_api.go | 15 ++++-- 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 69119be4..c801d332 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -2,8 +2,10 @@ package eventstream import ( "context" + "crypto/sha256" "crypto/tls" "crypto/x509" + "encoding/binary" "errors" "fmt" "github.com/icinga/icinga-notifications/internal/config" @@ -22,6 +24,12 @@ import ( // This file contains the main resp. common methods for the Client. +// eventMsg is an internal struct for passing events with additional information from producers to the dispatcher. +type eventMsg struct { + event *event.Event + apiTime time.Time +} + // Client for the Icinga 2 Event Stream API with extended support for other Icinga 2 APIs to gather additional // information and allow a replay in case of a connection loss. type Client struct { @@ -44,9 +52,9 @@ type Client struct { Logger *logging.Logger // eventDispatcherEventStream communicates Events to be processed from the Event Stream API. - eventDispatcherEventStream chan *event.Event + eventDispatcherEventStream chan *eventMsg // eventDispatcherReplay communicates Events to be processed from the Icinga 2 API replay during replay phase. - eventDispatcherReplay chan *event.Event + eventDispatcherReplay chan *eventMsg // replayTrigger signals the eventDispatcher method that the replay phase is finished. replayTrigger chan struct{} @@ -231,7 +239,32 @@ func (client *Client) buildAcknowledgementEvent(host, service, author, comment s // When the Client is in the replay phase, events from the Event Stream API will be cached until the replay phase has // finished, while replayed events will be delivered directly. func (client *Client) eventDispatcher() { - var replayBuffer []*event.Event + var ( + // replayBuffer holds Event Stream events to be replayed after the replay phase has finished. + replayBuffer = make([]*event.Event, 0) + // replayCache maps eventHash(ev) to API time to skip replaying outdated Event Stream events. + replayCache = make(map[[sha256.Size]byte]time.Time) + ) + + // eventHash maps a subset of an event.Event to a hash. This is necessary for the replayCache below. As flapping + // events should be ignored, only some of the event fields will be encoded. By excluding some, e.g., the severity, + // events concerning the same host or service are grouped anyway. + eventHash := func(ev *event.Event) [sha256.Size]byte { + h := sha256.New() + _ = binary.Write(h, binary.BigEndian, ev.SourceId) + _, _ = fmt.Fprint(h, ev.Name) + _, _ = fmt.Fprint(h, ev.Type) + return [sha256.Size]byte(h.Sum(nil)) + } + + // eventHashUpdate updates the replayCache if this eventMsg seems to be the latest of its kind. + eventHashUpdate := func(ev *eventMsg) { + h := eventHash(ev.event) + ts, ok := replayCache[h] + if !ok || ev.apiTime.After(ts) { + replayCache[h] = ev.apiTime + } + } for { select { @@ -240,25 +273,42 @@ func (client *Client) eventDispatcher() { return case <-client.replayTrigger: + skipCounter := 0 for _, ev := range replayBuffer { + ts, ok := replayCache[eventHash(ev)] + if ok && ev.Time.Before(ts) { + client.Logger.Debugw("Skip replaying outdated Event Stream event", zap.Stringer("event", ev), + zap.Time("event timestamp", ev.Time), zap.Time("cache timestamp", ts)) + skipCounter++ + continue + } + client.CallbackFn(ev) } - client.Logger.Infow("Finished replay phase, returning to normal operation", zap.Int("cached events", len(replayBuffer))) + client.Logger.Infow("Finished replay phase, returning to normal operation", + zap.Int("cached events", len(replayBuffer)), zap.Int("skipped events", skipCounter)) + + replayBuffer = make([]*event.Event, 0) + replayCache = make(map[[sha256.Size]byte]time.Time) client.replayPhase.Store(false) - replayBuffer = []*event.Event{} case ev := <-client.eventDispatcherEventStream: - if client.replayPhase.Load() { - replayBuffer = append(replayBuffer, ev) - } else { - client.CallbackFn(ev) + if !client.replayPhase.Load() { + client.CallbackFn(ev.event) + continue } + replayBuffer = append(replayBuffer, ev.event) + eventHashUpdate(ev) + case ev := <-client.eventDispatcherReplay: if !client.replayPhase.Load() { - client.Logger.Errorw("Dispatcher received replay event during normal operation", zap.Stringer("event", ev)) + client.Logger.Errorw("Dispatcher received replay event during normal operation", zap.Stringer("event", ev.event)) + continue } - client.CallbackFn(ev) + + client.CallbackFn(ev.event) + eventHashUpdate(ev) } } } @@ -303,8 +353,8 @@ func (client *Client) enterReplayPhase() { // loop takes care of reconnections, all those events will be logged while generated Events will be dispatched to the // callback function. func (client *Client) Process() { - client.eventDispatcherEventStream = make(chan *event.Event) - client.eventDispatcherReplay = make(chan *event.Event) + client.eventDispatcherEventStream = make(chan *eventMsg) + client.eventDispatcherReplay = make(chan *eventMsg) client.replayTrigger = make(chan struct{}) go client.eventDispatcher() diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 518f3122..a2360c4d 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -243,7 +243,7 @@ func (client *Client) checkMissedStateChanges(objType string) { select { case <-client.Ctx.Done(): logger.Warnw("Cannot dispatch replayed event as context is finished", zap.Error(client.Ctx.Err())) - case client.eventDispatcherReplay <- ev: + case client.eventDispatcherReplay <- &eventMsg{ev, attrs.LastStateChange.Time}: } }) } @@ -271,7 +271,7 @@ func (client *Client) checkMissedAcknowledgements(objType string) { select { case <-client.Ctx.Done(): logger.Warnw("Cannot dispatch replayed event as context is finished", zap.Error(client.Ctx.Err())) - case client.eventDispatcherReplay <- ev: + case client.eventDispatcherReplay <- &eventMsg{ev, attrs.AcknowledgementLastChange.Time}: } }) } @@ -366,12 +366,19 @@ connectionLoop: return err } - var ev *event.Event + var ( + ev *event.Event + evTime time.Time + ) switch respT := resp.(type) { case *StateChange: ev, err = client.buildHostServiceEvent(respT.CheckResult, respT.State, respT.Host, respT.Service) + evTime = respT.Timestamp.Time + case *AcknowledgementSet: ev, err = client.buildAcknowledgementEvent(respT.Host, respT.Service, respT.Author, respT.Comment) + evTime = respT.Timestamp.Time + // case *AcknowledgementCleared: // case *CommentAdded: // case *CommentRemoved: @@ -390,7 +397,7 @@ connectionLoop: case <-client.Ctx.Done(): client.Logger.Warnw("Cannot dispatch Event Stream event as context is finished", zap.Error(client.Ctx.Err())) return client.Ctx.Err() - case client.eventDispatcherEventStream <- ev: + case client.eventDispatcherEventStream <- &eventMsg{ev, evTime}: } } return lineScanner.Err() From 99ddf20cf1ed081a64061e7f1afedb213e41317d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 2 Nov 2023 14:34:17 +0100 Subject: [PATCH 36/65] eventstream: Icinga 2 API query fixes First, the HTTP Client against the Icinga 2 API is only limited by the Client's context. However, during a connection loss this context will not being closed and timing out might take unnecessary long. As all those requests are short lived API queries, a total timeout of three seconds was set. Another bug was a missing URL encoding for the object name which causes issues with "example-host!disk /". The trailing slash was still part of the joined URL in its unescaped form. --- internal/eventstream/client_api.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index a2360c4d..aaad4483 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -54,7 +54,10 @@ func (client *Client) queryObjectsApi(urlPaths []string, method string, body io. req.Header.Set(k, v) } - httpClient := &http.Client{Transport: &client.ApiHttpTransport} + httpClient := &http.Client{ + Transport: &client.ApiHttpTransport, + Timeout: 3 * time.Second, + } res, err := httpClient.Do(req) if err != nil { return nil, err @@ -71,7 +74,7 @@ func (client *Client) queryObjectsApi(urlPaths []string, method string, body io. // queryObjectsApiDirect performs a direct resp. "fast" API query against an object, optionally identified by its name. func (client *Client) queryObjectsApiDirect(objType, objName string) (io.ReadCloser, error) { return client.queryObjectsApi( - []string{"/v1/objects/", objType + "s/", objName}, + []string{"/v1/objects/", objType + "s/", url.PathEscape(objName)}, http.MethodGet, nil, map[string]string{"Accept": "application/json"}) From a0da9990c227a9c72d1dfbadc4134b77c6929339 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 2 Nov 2023 16:28:28 +0100 Subject: [PATCH 37/65] eventstream: group replay goroutines in errgroup By switching from a WaitGroup to an errgroup, a quicker exit was possible with the groups' custom context. --- internal/eventstream/client.go | 26 ++++++----- internal/eventstream/client_api.go | 74 ++++++++++++++---------------- 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index c801d332..e02c0a4a 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -14,10 +14,10 @@ import ( "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" + "golang.org/x/sync/errgroup" "net/http" "net/url" "os" - "sync" "sync/atomic" "time" ) @@ -324,25 +324,29 @@ func (client *Client) enterReplayPhase() { return } - queryFns := []func(string){client.checkMissedAcknowledgements, client.checkMissedStateChanges} + queryFns := []func(string, context.Context) error{client.checkMissedAcknowledgements, client.checkMissedStateChanges} objTypes := []string{"host", "service"} - var replayWg sync.WaitGroup - replayWg.Add(len(queryFns) * len(objTypes)) - + group, groupCtx := errgroup.WithContext(client.Ctx) for _, fn := range queryFns { for _, objType := range objTypes { - go func(fn func(string), objType string) { - fn(objType) - replayWg.Done() - }(fn, objType) + fn, objType := fn, objType // https://go.dev/doc/faq#closures_and_goroutines + group.Go(func() error { + return fn(objType, groupCtx) + }) } } go func() { startTime := time.Now() - replayWg.Wait() - client.Logger.Debugw("All replay phase workers have finished", zap.Duration("duration", time.Since(startTime))) + + err := group.Wait() + if err != nil { + client.Logger.Errorw("Replaying the API resulted in errors", zap.Error(err), zap.Duration("duration", time.Since(startTime))) + } else { + client.Logger.Debugw("All replay phase workers have finished", zap.Duration("duration", time.Since(startTime))) + } + client.replayTrigger <- struct{}{} }() } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index aaad4483..d1ba09ad 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -178,42 +178,37 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime // If a filterExpr is given (non-empty string), it will be used for the query. Otherwise, all objects will be requested. // // The callback function will be called f.e. object of the objType (i.e. "host" or "service") being retrieved from the -// Icinga 2 Objects API. The callback function or a later caller must decide if this object should be replayed. -func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallbackFn func(attrs HostServiceRuntimeAttributes, host, service string)) { - var ( - logger = client.Logger.With(zap.String("object type", objType)) - - jsonRaw io.ReadCloser - err error - ) +// Icinga 2 Objects API sequentially. The callback function or a later caller decides if this object should be replayed. +func (client *Client) checkMissedChanges( + objType, filterExpr string, + attrsCallbackFn func(attrs HostServiceRuntimeAttributes, host, service string) error, +) (err error) { + logger := client.Logger.With(zap.String("object type", objType), zap.String("filter expr", filterExpr)) + + defer func() { + if err != nil { + logger.Errorw("Querying API for replay failed", zap.Error(err)) + } + }() + + var jsonRaw io.ReadCloser if filterExpr == "" { jsonRaw, err = client.queryObjectsApiDirect(objType, "") } else { jsonRaw, err = client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) } if err != nil { - logger.Errorw("Querying API failed", zap.Error(err)) return } objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) if err != nil { - logger.Errorw("Parsing API response failed", zap.Error(err)) - return - } - - if len(objQueriesResults) == 0 { return } logger.Debugw("Querying API resulted in state changes", zap.Int("changes", len(objQueriesResults))) for _, objQueriesResult := range objQueriesResults { - if client.Ctx.Err() != nil { - logger.Warnw("Stopping API response processing as context is finished", zap.Error(client.Ctx.Err())) - return - } - var hostName, serviceName string switch objQueriesResult.Type { case "Host": @@ -224,29 +219,31 @@ func (client *Client) checkMissedChanges(objType, filterExpr string, attrsCallba serviceName = objQueriesResult.Attrs.Name default: - logger.Errorw("Querying API delivered a wrong object type", zap.String("result type", objQueriesResult.Type)) - continue + err = fmt.Errorf("querying API delivered a wrong object type %q", objQueriesResult.Type) + return } - attrsCallbackFn(objQueriesResult.Attrs, hostName, serviceName) + err = attrsCallbackFn(objQueriesResult.Attrs, hostName, serviceName) + if err != nil { + return + } } + return } // checkMissedStateChanges fetches all objects of the requested type and feeds them into the handler. -func (client *Client) checkMissedStateChanges(objType string) { - client.checkMissedChanges(objType, "", func(attrs HostServiceRuntimeAttributes, host, service string) { - logger := client.Logger.With(zap.String("object type", objType)) - +func (client *Client) checkMissedStateChanges(objType string, ctx context.Context) error { + return client.checkMissedChanges(objType, "", func(attrs HostServiceRuntimeAttributes, host, service string) error { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { - logger.Errorw("Failed to construct Event from API", zap.Error(err)) - return + return fmt.Errorf("failed to construct Event from API, %w", err) } select { - case <-client.Ctx.Done(): - logger.Warnw("Cannot dispatch replayed event as context is finished", zap.Error(client.Ctx.Err())) + case <-ctx.Done(): + return ctx.Err() case client.eventDispatcherReplay <- &eventMsg{ev, attrs.LastStateChange.Time}: + return nil } }) } @@ -254,27 +251,24 @@ func (client *Client) checkMissedStateChanges(objType string) { // checkMissedAcknowledgements fetches all Host or Service Acknowledgements and feeds them into the handler. // // Currently only active acknowledgements are being processed. -func (client *Client) checkMissedAcknowledgements(objType string) { +func (client *Client) checkMissedAcknowledgements(objType string, ctx context.Context) error { filterExpr := fmt.Sprintf("%s.acknowledgement", objType) - client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) { - logger := client.Logger.With(zap.String("object type", objType)) - + return client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) error { ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) if err != nil { - logger.Errorw("Cannot fetch ACK Comment for Acknowledgement", zap.Error(err)) - return + return fmt.Errorf("cannot fetch ACK Comment for Acknowledgement, %w", err) } ev, err := client.buildAcknowledgementEvent(host, service, ackComment.Author, ackComment.Text) if err != nil { - logger.Errorw("Failed to construct Event from Acknowledgement API", zap.Error(err)) - return + return fmt.Errorf("failed to construct Event from Acknowledgement API, %w", err) } select { - case <-client.Ctx.Done(): - logger.Warnw("Cannot dispatch replayed event as context is finished", zap.Error(client.Ctx.Err())) + case <-ctx.Done(): + return ctx.Err() case client.eventDispatcherReplay <- &eventMsg{ev, attrs.AcknowledgementLastChange.Time}: + return nil } }) } From baa205b58afb7cb61c8acbd42ddddd2312b51f1d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 3 Nov 2023 11:12:45 +0100 Subject: [PATCH 38/65] eventstream: Icinga 2-compatible TLS configuration --- internal/eventstream/client.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index e02c0a4a..76a40d6b 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -82,8 +82,20 @@ func NewClientsFromConfig( ApiBasicAuthUser: icinga2Api.AuthUser, ApiBasicAuthPass: icinga2Api.AuthPass, ApiHttpTransport: http.Transport{ + // Hardened TLS config adjusted to Icinga 2's configuration: + // - https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-apilistener + // - https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#security + // - https://ssl-config.mozilla.org/#server=go&config=intermediate TLSClientConfig: &tls.Config{ - MinVersion: tls.VersionTLS13, + MinVersion: tls.VersionTLS12, + CipherSuites: []uint16{ + tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, + tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, + tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, + }, }, }, From 330aea0950b5e656a5b482e2e76d16fc2e68dc0b Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 3 Nov 2023 12:23:55 +0100 Subject: [PATCH 39/65] eventstream: build Event URL more carefully Unfortunately, using url.Values was not possible for the query arguments as it encodes spaces to a plus sign, which Icinga Web rejects. --- internal/eventstream/client.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 76a40d6b..e2136fde 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -145,14 +145,21 @@ func NewClientsFromConfig( func (client *Client) buildCommonEvent(host, service string) (*event.Event, error) { var ( eventName string - eventUrlSuffix string + eventUrl *url.URL eventTags map[string]string eventExtraTags = make(map[string]string) ) + eventUrl, err := url.Parse(client.IcingaWebRoot) + if err != nil { + return nil, err + } + if service != "" { eventName = host + "!" + service - eventUrlSuffix = "/icingadb/service?name=" + url.PathEscape(service) + "&host.name=" + url.PathEscape(host) + + eventUrl = eventUrl.JoinPath("/icingadb/service") + eventUrl.RawQuery = "name=" + url.PathEscape(service) + "&host.name=" + url.PathEscape(host) eventTags = map[string]string{ "host": host, @@ -168,7 +175,9 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro } } else { eventName = host - eventUrlSuffix = "/icingadb/host?name=" + url.PathEscape(host) + + eventUrl = eventUrl.JoinPath("/icingadb/host") + eventUrl.RawQuery = "name=" + url.PathEscape(host) eventTags = map[string]string{ "host": host, @@ -187,7 +196,7 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro Time: time.Now(), SourceId: client.IcingaNotificationsEventSourceId, Name: eventName, - URL: client.IcingaWebRoot + eventUrlSuffix, + URL: eventUrl.String(), Tags: eventTags, ExtraTags: eventExtraTags, }, nil From f3d9f085003e45cac3f3ab4cb3e05820d88ae968 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 3 Nov 2023 15:06:12 +0100 Subject: [PATCH 40/65] eventstream: refactor Event Stream connection By both extracting and refactoring the Event Stream connection logic, multiple issues were addressed at once. - The defer, which might fill up the call stack, was removed and the cancel function is either called or returned to be called via a defer there. - By extracting the logic into an own method, it is easier to return. - A rising but limited delay was introduced against API flooding. - It was ensured that the connection goroutine will always exit. --- internal/eventstream/client_api.go | 103 ++++++++++++++++++----------- 1 file changed, 65 insertions(+), 38 deletions(-) diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index d1ba09ad..4bab0aab 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -10,6 +10,7 @@ import ( "github.com/icinga/icinga-notifications/internal/event" "go.uber.org/zap" "io" + "math" "net/http" "net/url" "slices" @@ -273,49 +274,38 @@ func (client *Client) checkMissedAcknowledgements(objType string, ctx context.Co }) } -// listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. +// connectEventStream connects to the EventStream within an infinite loop until a connection was established. // -// In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will -// be returned. -func (client *Client) listenEventStream() error { - queueNameRndBuff := make([]byte, 16) - _, _ = rand.Read(queueNameRndBuff) - - reqBody, err := json.Marshal(map[string]any{ - "queue": fmt.Sprintf("icinga-notifications-%x", queueNameRndBuff), - "types": []string{ - typeStateChange, - typeAcknowledgementSet, - // typeAcknowledgementCleared, - // typeCommentAdded, - // typeCommentRemoved, - // typeDowntimeAdded, - // typeDowntimeRemoved, - // typeDowntimeStarted, - // typeDowntimeTriggered, - }, - }) - if err != nil { - return err - } - +// The esTypes is a string array of required Event Stream types. +// +// An error will be returned if reconnecting resp. retrying the (almost) same thing will not help fix it. +func (client *Client) connectEventStream(esTypes []string) (*http.Response, context.CancelFunc, error) { apiUrl, err := url.JoinPath(client.ApiHost, "/v1/events") if err != nil { - return err + return nil, nil, err } - var response *http.Response -connectionLoop: - for { + for i := 0; ; i++ { + // Always ensure an unique queue name to ensure no conflicts might occur. + queueNameRndBuff := make([]byte, 16) + _, _ = rand.Read(queueNameRndBuff) + + reqBody, err := json.Marshal(map[string]any{ + "queue": fmt.Sprintf("icinga-notifications-%x", queueNameRndBuff), + "types": esTypes, + }) + if err != nil { + return nil, nil, err + } + // Sub-context which might get canceled early if connecting takes to long. - // The reqCancel function will be called in the select below or when leaving the function, mostly because its - // parent context, client.Ctx, was finished before. + // The reqCancel function will be called after the select below or when leaving the function with an error. reqCtx, reqCancel := context.WithCancel(client.Ctx) - defer reqCancel() req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) if err != nil { - return err + reqCancel() + return nil, nil, err } req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) @@ -325,29 +315,66 @@ connectionLoop: resCh := make(chan *http.Response) go func() { + defer close(resCh) + client.Logger.Info("Try to establish an Event Stream API connection") httpClient := &http.Client{Transport: &client.ApiHttpTransport} - res, err := httpClient.Do(req) if err != nil { client.Logger.Warnw("Establishing an Event Stream API connection failed; will be retried", zap.Error(err)) - close(resCh) return } - resCh <- res + + select { + case resCh <- res: + + case <-reqCtx.Done(): + // This case might happen when this httpClient.Do and the time.After in the select below finish at round + // about the exact same time, but httpClient.Do was slightly faster than reqCancel(). + _ = res.Body.Close() + } }() select { case res, ok := <-resCh: if ok { - response = res - break connectionLoop + return res, reqCancel, nil } case <-time.After(3 * time.Second): } reqCancel() + + // Rate limit API reconnections: slow down for successive failed attempts but limit to three minutes. + // 1s, 2s, 4s, 8s, 16s, 32s, 1m4s, 2m8s, 3m, 3m, 3m, ... + select { + case <-time.After(min(3*time.Minute, time.Duration(math.Exp2(float64(i)))*time.Second)): + case <-client.Ctx.Done(): + return nil, client.Ctx.Err() + } + } +} + +// listenEventStream subscribes to the Icinga 2 API Event Stream and handles received objects. +// +// In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will +// be returned. +func (client *Client) listenEventStream() error { + response, cancel, err := client.connectEventStream([]string{ + typeStateChange, + typeAcknowledgementSet, + // typeAcknowledgementCleared, + // typeCommentAdded, + // typeCommentRemoved, + // typeDowntimeAdded, + // typeDowntimeRemoved, + // typeDowntimeStarted, + // typeDowntimeTriggered, + }) + if err != nil { + return err } + defer cancel() defer func() { _ = response.Body.Close() }() client.enterReplayPhase() From a656dda8176f8ea39b589635dc6698b5c751ea5d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 3 Nov 2023 15:21:00 +0100 Subject: [PATCH 41/65] eventstream: context.Context as first parameter --- internal/eventstream/client.go | 4 ++-- internal/eventstream/client_api.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index e2136fde..fc0f8004 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -345,7 +345,7 @@ func (client *Client) enterReplayPhase() { return } - queryFns := []func(string, context.Context) error{client.checkMissedAcknowledgements, client.checkMissedStateChanges} + queryFns := []func(context.Context, string) error{client.checkMissedAcknowledgements, client.checkMissedStateChanges} objTypes := []string{"host", "service"} group, groupCtx := errgroup.WithContext(client.Ctx) @@ -353,7 +353,7 @@ func (client *Client) enterReplayPhase() { for _, objType := range objTypes { fn, objType := fn, objType // https://go.dev/doc/faq#closures_and_goroutines group.Go(func() error { - return fn(objType, groupCtx) + return fn(groupCtx, objType) }) } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 4bab0aab..3cdc9641 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -233,7 +233,7 @@ func (client *Client) checkMissedChanges( } // checkMissedStateChanges fetches all objects of the requested type and feeds them into the handler. -func (client *Client) checkMissedStateChanges(objType string, ctx context.Context) error { +func (client *Client) checkMissedStateChanges(ctx context.Context, objType string) error { return client.checkMissedChanges(objType, "", func(attrs HostServiceRuntimeAttributes, host, service string) error { ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) if err != nil { @@ -252,7 +252,7 @@ func (client *Client) checkMissedStateChanges(objType string, ctx context.Contex // checkMissedAcknowledgements fetches all Host or Service Acknowledgements and feeds them into the handler. // // Currently only active acknowledgements are being processed. -func (client *Client) checkMissedAcknowledgements(objType string, ctx context.Context) error { +func (client *Client) checkMissedAcknowledgements(ctx context.Context, objType string) error { filterExpr := fmt.Sprintf("%s.acknowledgement", objType) return client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) error { ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) From 0c05cde5bc06facc5af0c7b5aaba06f2ff870a1d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 3 Nov 2023 15:43:43 +0100 Subject: [PATCH 42/65] eventstream: base replay cache on event name As now all elements are comparable, creating an obscure hashed key is no longer necessary. At least debugging is now easier. --- internal/eventstream/client.go | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index fc0f8004..79e424e4 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -2,10 +2,8 @@ package eventstream import ( "context" - "crypto/sha256" "crypto/tls" "crypto/x509" - "encoding/binary" "errors" "fmt" "github.com/icinga/icinga-notifications/internal/config" @@ -263,27 +261,15 @@ func (client *Client) eventDispatcher() { var ( // replayBuffer holds Event Stream events to be replayed after the replay phase has finished. replayBuffer = make([]*event.Event, 0) - // replayCache maps eventHash(ev) to API time to skip replaying outdated Event Stream events. - replayCache = make(map[[sha256.Size]byte]time.Time) + // replayCache maps event.Events.Name to API time to skip replaying outdated events. + replayCache = make(map[string]time.Time) ) - // eventHash maps a subset of an event.Event to a hash. This is necessary for the replayCache below. As flapping - // events should be ignored, only some of the event fields will be encoded. By excluding some, e.g., the severity, - // events concerning the same host or service are grouped anyway. - eventHash := func(ev *event.Event) [sha256.Size]byte { - h := sha256.New() - _ = binary.Write(h, binary.BigEndian, ev.SourceId) - _, _ = fmt.Fprint(h, ev.Name) - _, _ = fmt.Fprint(h, ev.Type) - return [sha256.Size]byte(h.Sum(nil)) - } - - // eventHashUpdate updates the replayCache if this eventMsg seems to be the latest of its kind. - eventHashUpdate := func(ev *eventMsg) { - h := eventHash(ev.event) - ts, ok := replayCache[h] + // replayCacheUpdate updates the replayCache if this eventMsg seems to be the latest of its kind. + replayCacheUpdate := func(ev *eventMsg) { + ts, ok := replayCache[ev.event.Name] if !ok || ev.apiTime.After(ts) { - replayCache[h] = ev.apiTime + replayCache[ev.event.Name] = ev.apiTime } } @@ -296,7 +282,7 @@ func (client *Client) eventDispatcher() { case <-client.replayTrigger: skipCounter := 0 for _, ev := range replayBuffer { - ts, ok := replayCache[eventHash(ev)] + ts, ok := replayCache[ev.Name] if ok && ev.Time.Before(ts) { client.Logger.Debugw("Skip replaying outdated Event Stream event", zap.Stringer("event", ev), zap.Time("event timestamp", ev.Time), zap.Time("cache timestamp", ts)) @@ -310,7 +296,7 @@ func (client *Client) eventDispatcher() { zap.Int("cached events", len(replayBuffer)), zap.Int("skipped events", skipCounter)) replayBuffer = make([]*event.Event, 0) - replayCache = make(map[[sha256.Size]byte]time.Time) + replayCache = make(map[string]time.Time) client.replayPhase.Store(false) case ev := <-client.eventDispatcherEventStream: @@ -320,7 +306,7 @@ func (client *Client) eventDispatcher() { } replayBuffer = append(replayBuffer, ev.event) - eventHashUpdate(ev) + replayCacheUpdate(ev) case ev := <-client.eventDispatcherReplay: if !client.replayPhase.Load() { @@ -329,7 +315,7 @@ func (client *Client) eventDispatcher() { } client.CallbackFn(ev.event) - eventHashUpdate(ev) + replayCacheUpdate(ev) } } } From 3c4f9468a05d4726f471353412f8067ca9b56e58 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 3 Nov 2023 17:02:24 +0100 Subject: [PATCH 43/65] eventstream: mimic PHP's rawurlencode function As Icinga Web uses rawurldecode instead of rawurlencode, the space to plus substitution cannot be used. --- internal/eventstream/client.go | 4 ++-- internal/eventstream/client_api.go | 2 +- internal/eventstream/util.go | 14 ++++++++++++++ internal/eventstream/util_test.go | 26 ++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 internal/eventstream/util_test.go diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 79e424e4..55ebff69 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -157,7 +157,7 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro eventName = host + "!" + service eventUrl = eventUrl.JoinPath("/icingadb/service") - eventUrl.RawQuery = "name=" + url.PathEscape(service) + "&host.name=" + url.PathEscape(host) + eventUrl.RawQuery = "name=" + rawurlencode(service) + "&host.name=" + rawurlencode(host) eventTags = map[string]string{ "host": host, @@ -175,7 +175,7 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro eventName = host eventUrl = eventUrl.JoinPath("/icingadb/host") - eventUrl.RawQuery = "name=" + url.PathEscape(host) + eventUrl.RawQuery = "name=" + rawurlencode(host) eventTags = map[string]string{ "host": host, diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 3cdc9641..6f7dcfc9 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -350,7 +350,7 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont select { case <-time.After(min(3*time.Minute, time.Duration(math.Exp2(float64(i)))*time.Second)): case <-client.Ctx.Done(): - return nil, client.Ctx.Err() + return nil, nil, client.Ctx.Err() } } } diff --git a/internal/eventstream/util.go b/internal/eventstream/util.go index 94c7e8b2..3337f8be 100644 --- a/internal/eventstream/util.go +++ b/internal/eventstream/util.go @@ -9,6 +9,8 @@ import ( "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" + "net/url" + "strings" ) // ProcessEvent is a copy pasta version of the second half of Listener.ProcessEvent to be removed after #99 has landed. @@ -138,3 +140,15 @@ func makeProcessEvent( } } } + +// rawurlencode mimics PHP's rawurlencode to be used for parameter encoding. +// +// Icinga Web uses rawurldecode instead of urldecode, which, as its main difference, does not honor the plus char ('+') +// as a valid substitution for space (' '). Unfortunately, Go's url.QueryEscape does this very substitution and +// url.PathEscape does a bit too less and has a misleading name on top. +// +// - https://www.php.net/manual/en/function.rawurlencode.php +// - https://github.com/php/php-src/blob/php-8.2.12/ext/standard/url.c#L538 +func rawurlencode(s string) string { + return strings.ReplaceAll(url.QueryEscape(s), "+", "%20") +} diff --git a/internal/eventstream/util_test.go b/internal/eventstream/util_test.go new file mode 100644 index 00000000..2f531106 --- /dev/null +++ b/internal/eventstream/util_test.go @@ -0,0 +1,26 @@ +package eventstream + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestRawurlencode(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {"empty", "", ""}, + {"printable", "abcABC0123", "abcABC0123"}, + {"space", "foo bar", "foo%20bar"}, + {"plus", "foo+bar", "foo%2Bbar"}, + {"slash", "foo/bar", "foo%2Fbar"}, + {"percent", "foo%bar", "foo%25bar"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, rawurlencode(tt.in)) + }) + } +} From fac71ef0fab0dddde5c7231d791403331440c261 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 6 Nov 2023 10:38:28 +0100 Subject: [PATCH 44/65] eventstream: replay State and ACK Changes together Instead of querying both state and acknowledgement changes in parallel and risking replaying an ACK before the referenced state change event, acknowledgement events are now being generated only on demand after a state change event was emitted which holds an acknowledgement flag. This allowed some refactoring as now some methods have collided. Furthermore, the replay context - derived from the Client context - is now explicitly passed to all context-bound functions, allowing a strict termination in case of an early replay error. The other code path - Event Stream API - still uses the Client's main context. --- internal/eventstream/client.go | 35 +++---- internal/eventstream/client_api.go | 154 +++++++++++++---------------- 2 files changed, 84 insertions(+), 105 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 55ebff69..124e3057 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -140,7 +140,7 @@ func NewClientsFromConfig( // - Username // - Message // - ID -func (client *Client) buildCommonEvent(host, service string) (*event.Event, error) { +func (client *Client) buildCommonEvent(ctx context.Context, host, service string) (*event.Event, error) { var ( eventName string eventUrl *url.URL @@ -164,7 +164,7 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro "service": service, } - serviceGroups, err := client.fetchServiceGroups(host, service) + serviceGroups, err := client.fetchHostServiceGroups(ctx, host, service) if err != nil { return nil, err } @@ -182,7 +182,7 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro } } - hostGroups, err := client.fetchHostGroups(host) + hostGroups, err := client.fetchHostServiceGroups(ctx, host, "") if err != nil { return nil, err } @@ -202,7 +202,7 @@ func (client *Client) buildCommonEvent(host, service string) (*event.Event, erro // buildHostServiceEvent constructs an event.Event based on a CheckResult, a Host or Service state, a Host name and an // optional Service name if the Event should represent a Service object. -func (client *Client) buildHostServiceEvent(result CheckResult, state int, host, service string) (*event.Event, error) { +func (client *Client) buildHostServiceEvent(ctx context.Context, result CheckResult, state int, host, service string) (*event.Event, error) { var eventSeverity event.Severity if service != "" { @@ -227,7 +227,7 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, host, } } - ev, err := client.buildCommonEvent(host, service) + ev, err := client.buildCommonEvent(ctx, host, service) if err != nil { return nil, err } @@ -240,8 +240,8 @@ func (client *Client) buildHostServiceEvent(result CheckResult, state int, host, } // buildAcknowledgementEvent from the given fields. -func (client *Client) buildAcknowledgementEvent(host, service, author, comment string) (*event.Event, error) { - ev, err := client.buildCommonEvent(host, service) +func (client *Client) buildAcknowledgementEvent(ctx context.Context, host, service, author, comment string) (*event.Event, error) { + ev, err := client.buildCommonEvent(ctx, host, service) if err != nil { return nil, err } @@ -331,17 +331,18 @@ func (client *Client) enterReplayPhase() { return } - queryFns := []func(context.Context, string) error{client.checkMissedAcknowledgements, client.checkMissedStateChanges} - objTypes := []string{"host", "service"} - group, groupCtx := errgroup.WithContext(client.Ctx) - for _, fn := range queryFns { - for _, objType := range objTypes { - fn, objType := fn, objType // https://go.dev/doc/faq#closures_and_goroutines - group.Go(func() error { - return fn(groupCtx, objType) - }) - } + objTypes := []string{"host", "service"} + for _, objType := range objTypes { + objType := objType // https://go.dev/doc/faq#closures_and_goroutines + group.Go(func() error { + err := client.checkMissedChanges(groupCtx, objType) + if err != nil { + client.Logger.Errorw("Replaying API events resulted in errors", + zap.String("object type", objType), zap.Error(err)) + } + return err + }) } go func() { diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 6f7dcfc9..67f19753 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -40,12 +40,18 @@ func extractObjectQueriesResult[T Comment | Downtime | HostServiceRuntimeAttribu } // queryObjectsApi performs a configurable HTTP request against the Icinga 2 API and returns its raw response. -func (client *Client) queryObjectsApi(urlPaths []string, method string, body io.Reader, headers map[string]string) (io.ReadCloser, error) { +func (client *Client) queryObjectsApi( + ctx context.Context, + urlPaths []string, + method string, + body io.Reader, + headers map[string]string, +) (io.ReadCloser, error) { apiUrl, err := url.JoinPath(client.ApiHost, urlPaths...) if err != nil { return nil, err } - req, err := http.NewRequestWithContext(client.Ctx, method, apiUrl, body) + req, err := http.NewRequestWithContext(ctx, method, apiUrl, body) if err != nil { return nil, err } @@ -73,8 +79,9 @@ func (client *Client) queryObjectsApi(urlPaths []string, method string, body io. } // queryObjectsApiDirect performs a direct resp. "fast" API query against an object, optionally identified by its name. -func (client *Client) queryObjectsApiDirect(objType, objName string) (io.ReadCloser, error) { +func (client *Client) queryObjectsApiDirect(ctx context.Context, objType, objName string) (io.ReadCloser, error) { return client.queryObjectsApi( + ctx, []string{"/v1/objects/", objType + "s/", url.PathEscape(objName)}, http.MethodGet, nil, @@ -82,13 +89,14 @@ func (client *Client) queryObjectsApiDirect(objType, objName string) (io.ReadClo } // queryObjectsApiQuery sends a query to the Icinga 2 API /v1/objects to receive data of the given objType. -func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) (io.ReadCloser, error) { +func (client *Client) queryObjectsApiQuery(ctx context.Context, objType string, query map[string]any) (io.ReadCloser, error) { reqBody, err := json.Marshal(query) if err != nil { return nil, err } return client.queryObjectsApi( + ctx, []string{"/v1/objects/", objType + "s"}, http.MethodPost, bytes.NewReader(reqBody), @@ -99,27 +107,15 @@ func (client *Client) queryObjectsApiQuery(objType string, query map[string]any) }) } -// fetchHostGroups fetches all Host Groups for this host. -func (client *Client) fetchHostGroups(host string) ([]string, error) { - jsonRaw, err := client.queryObjectsApiDirect("host", host) - if err != nil { - return nil, err - } - objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) - if err != nil { - return nil, err - } - - if len(objQueriesResults) != 1 { - return nil, fmt.Errorf("expected exactly one result for host %q instead of %d", host, len(objQueriesResults)) +// fetchHostServiceGroups fetches all Host or, if service is not empty, Service groups. +func (client *Client) fetchHostServiceGroups(ctx context.Context, host, service string) ([]string, error) { + objType, objName := "host", host + if service != "" { + objType = "service" + objName += "!" + service } - return objQueriesResults[0].Attrs.Groups, nil -} - -// fetchServiceGroups fetches all Service Groups for this service on this host. -func (client *Client) fetchServiceGroups(host, service string) ([]string, error) { - jsonRaw, err := client.queryObjectsApiDirect("service", host+"!"+service) + jsonRaw, err := client.queryObjectsApiDirect(ctx, objType, objName) if err != nil { return nil, err } @@ -129,7 +125,8 @@ func (client *Client) fetchServiceGroups(host, service string) ([]string, error) } if len(objQueriesResults) != 1 { - return nil, fmt.Errorf("expected exactly one result for service %q instead of %d", host+"!"+service, len(objQueriesResults)) + return nil, fmt.Errorf("expected exactly one result for object type %q and %q instead of %d", + objType, objName, len(objQueriesResults)) } return objQueriesResults[0].Attrs.Groups, nil @@ -141,7 +138,7 @@ func (client *Client) fetchServiceGroups(host, service string) ([]string, error) // closest we can do, is query for Comments with the Acknowledgement Service Type and the host/service name. In addition, // the Host's resp. Service's AcknowledgementLastChange field has NOT the same timestamp as the Comment; there is a // difference of some milliseconds. As there might be even multiple ACK comments, we have to find the closest one. -func (client *Client) fetchAcknowledgementComment(host, service string, ackTime time.Time) (*Comment, error) { +func (client *Client) fetchAcknowledgementComment(ctx context.Context, host, service string, ackTime time.Time) (*Comment, error) { filterExpr := "comment.entry_type == 4 && comment.host_name == comment_host_name" filterVars := map[string]string{"comment_host_name": host} if service != "" { @@ -149,7 +146,7 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime filterVars["comment_service_name"] = service } - jsonRaw, err := client.queryObjectsApiQuery("comment", map[string]any{"filter": filterExpr, "filter_vars": filterVars}) + jsonRaw, err := client.queryObjectsApiQuery(ctx, "comment", map[string]any{"filter": filterExpr, "filter_vars": filterVars}) if err != nil { return nil, err } @@ -176,39 +173,20 @@ func (client *Client) fetchAcknowledgementComment(host, service string, ackTime // checkMissedChanges queries for Service or Host objects to handle missed elements. // -// If a filterExpr is given (non-empty string), it will be used for the query. Otherwise, all objects will be requested. -// -// The callback function will be called f.e. object of the objType (i.e. "host" or "service") being retrieved from the -// Icinga 2 Objects API sequentially. The callback function or a later caller decides if this object should be replayed. -func (client *Client) checkMissedChanges( - objType, filterExpr string, - attrsCallbackFn func(attrs HostServiceRuntimeAttributes, host, service string) error, -) (err error) { - logger := client.Logger.With(zap.String("object type", objType), zap.String("filter expr", filterExpr)) - - defer func() { - if err != nil { - logger.Errorw("Querying API for replay failed", zap.Error(err)) - } - }() - - var jsonRaw io.ReadCloser - if filterExpr == "" { - jsonRaw, err = client.queryObjectsApiDirect(objType, "") - } else { - jsonRaw, err = client.queryObjectsApiQuery(objType, map[string]any{"filter": filterExpr}) - } +// If the object's acknowledgement field is non-zero, an Acknowledgement Event will be constructed following the Host or +// Service object. +func (client *Client) checkMissedChanges(ctx context.Context, objType string) error { + jsonRaw, err := client.queryObjectsApiDirect(ctx, objType, "") if err != nil { - return + return err } objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) if err != nil { - return + return err } - logger.Debugw("Querying API resulted in state changes", zap.Int("changes", len(objQueriesResults))) - + var stateChangeEvents, acknowledgementEvents int for _, objQueriesResult := range objQueriesResults { var hostName, serviceName string switch objQueriesResult.Type { @@ -220,58 +198,58 @@ func (client *Client) checkMissedChanges( serviceName = objQueriesResult.Attrs.Name default: - err = fmt.Errorf("querying API delivered a wrong object type %q", objQueriesResult.Type) - return + return fmt.Errorf("querying API delivered a wrong object type %q", objQueriesResult.Type) } - err = attrsCallbackFn(objQueriesResult.Attrs, hostName, serviceName) + // State change event first + ev, err := client.buildHostServiceEvent( + ctx, + objQueriesResult.Attrs.LastCheckResult, objQueriesResult.Attrs.State, + hostName, serviceName) if err != nil { - return + return fmt.Errorf("failed to construct Event from Host/Service response, %w", err) } - } - return -} - -// checkMissedStateChanges fetches all objects of the requested type and feeds them into the handler. -func (client *Client) checkMissedStateChanges(ctx context.Context, objType string) error { - return client.checkMissedChanges(objType, "", func(attrs HostServiceRuntimeAttributes, host, service string) error { - ev, err := client.buildHostServiceEvent(attrs.LastCheckResult, attrs.State, host, service) - if err != nil { - return fmt.Errorf("failed to construct Event from API, %w", err) - } - select { case <-ctx.Done(): return ctx.Err() - case client.eventDispatcherReplay <- &eventMsg{ev, attrs.LastStateChange.Time}: - return nil + case client.eventDispatcherReplay <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: + stateChangeEvents++ } - }) -} -// checkMissedAcknowledgements fetches all Host or Service Acknowledgements and feeds them into the handler. -// -// Currently only active acknowledgements are being processed. -func (client *Client) checkMissedAcknowledgements(ctx context.Context, objType string) error { - filterExpr := fmt.Sprintf("%s.acknowledgement", objType) - return client.checkMissedChanges(objType, filterExpr, func(attrs HostServiceRuntimeAttributes, host, service string) error { - ackComment, err := client.fetchAcknowledgementComment(host, service, attrs.AcknowledgementLastChange.Time) - if err != nil { - return fmt.Errorf("cannot fetch ACK Comment for Acknowledgement, %w", err) + // Optional acknowledgement event second + if objQueriesResult.Attrs.Acknowledgement == 0 { + continue } - ev, err := client.buildAcknowledgementEvent(host, service, ackComment.Author, ackComment.Text) + ackComment, err := client.fetchAcknowledgementComment( + ctx, + hostName, serviceName, + objQueriesResult.Attrs.AcknowledgementLastChange.Time) if err != nil { - return fmt.Errorf("failed to construct Event from Acknowledgement API, %w", err) + return fmt.Errorf("fetching acknowledgement comment for %v failed, %w", ev, err) } + ev, err = client.buildAcknowledgementEvent( + ctx, + hostName, serviceName, + ackComment.Author, ackComment.Text) + if err != nil { + return fmt.Errorf("failed to construct Event from Acknowledgement response, %w", err) + } select { case <-ctx.Done(): return ctx.Err() - case client.eventDispatcherReplay <- &eventMsg{ev, attrs.AcknowledgementLastChange.Time}: - return nil + case client.eventDispatcherReplay <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: + acknowledgementEvents++ } - }) + } + + client.Logger.Infow("Replaying API emitted state changes", + zap.String("object type", objType), + zap.Int("state changes", stateChangeEvents), + zap.Int("acknowledgements", acknowledgementEvents)) + + return nil } // connectEventStream connects to the EventStream within an infinite loop until a connection was established. @@ -396,11 +374,11 @@ func (client *Client) listenEventStream() error { ) switch respT := resp.(type) { case *StateChange: - ev, err = client.buildHostServiceEvent(respT.CheckResult, respT.State, respT.Host, respT.Service) + ev, err = client.buildHostServiceEvent(client.Ctx, respT.CheckResult, respT.State, respT.Host, respT.Service) evTime = respT.Timestamp.Time case *AcknowledgementSet: - ev, err = client.buildAcknowledgementEvent(respT.Host, respT.Service, respT.Author, respT.Comment) + ev, err = client.buildAcknowledgementEvent(client.Ctx, respT.Host, respT.Service, respT.Author, respT.Comment) evTime = respT.Timestamp.Time // case *AcknowledgementCleared: From 0ed4960a9cc646eb1120ce003122bf740bd538ca Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 6 Nov 2023 14:01:42 +0100 Subject: [PATCH 45/65] eventstream: rework replay communication logic Prior, two variables - one signaling channel and an atomic.Bool - were holding and/or communicating if the eventDispatcher was in its replay phase. Especially the atomic.Bool variable was accessed both in the producer - enterReplayPhase - as well as the consumer - eventDispatcher. After this rework, the whole logic went into the main worker, previously named eventDispatcher. By using a single channel - replayPhaseRequest -, the worker might now switch to the replay phase. This eases and unifies the internal "API" as all communication with the worker takes place over unidirectional channels. Within the worker, an internal channel is used to communicate replayed events back from the producing goroutines to the worker. Also, when all work is done or a guarding context is closed, this channel is closed, which itself is a signal to switch modes again. This context is also used to cancel already running replay jobs if another one was requested. --- internal/eventstream/client.go | 171 +++++++++++++++++------------ internal/eventstream/client_api.go | 17 ++- 2 files changed, 110 insertions(+), 78 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 124e3057..b09ecbe7 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -16,7 +16,6 @@ import ( "net/http" "net/url" "os" - "sync/atomic" "time" ) @@ -29,7 +28,15 @@ type eventMsg struct { } // Client for the Icinga 2 Event Stream API with extended support for other Icinga 2 APIs to gather additional -// information and allow a replay in case of a connection loss. +// information and a replay either when starting up to catch up the Icinga's state or in case of a connection loss. +// +// Within the icinga-notifications scope, one or multiple Client instances can be generated from the configuration by +// calling NewClientsFromConfig. +// +// A Client must be started by calling its Process method, which blocks until Ctx is marked as done. Reconnections and +// the necessary state replaying from the Icinga 2 API will be taken care off. Internally, the Client executes a worker +// within its own goroutine, which dispatches event.Event to the CallbackFn and enforces event.Event order during +// replaying after (re-)connections. type Client struct { // ApiHost et al. configure where and how the Icinga 2 API can be reached. ApiHost string @@ -51,13 +58,8 @@ type Client struct { // eventDispatcherEventStream communicates Events to be processed from the Event Stream API. eventDispatcherEventStream chan *eventMsg - // eventDispatcherReplay communicates Events to be processed from the Icinga 2 API replay during replay phase. - eventDispatcherReplay chan *eventMsg - - // replayTrigger signals the eventDispatcher method that the replay phase is finished. - replayTrigger chan struct{} - // replayPhase indicates that Events will be cached as the Event Stream Client is in the replay phase. - replayPhase atomic.Bool + // replayPhaseRequest requests the main worker to switch to the replay phase and re-request the Icinga 2 API. + replayPhaseRequest chan struct{} } // NewClientsFromConfig returns all Clients defined in the conf.ConfigFile. @@ -253,12 +255,62 @@ func (client *Client) buildAcknowledgementEvent(ctx context.Context, host, servi return ev, nil } -// eventDispatcher receives generated event.Events to be either buffered or directly delivered to the CallbackFn. +// startReplayWorker launches goroutines for replaying the Icinga 2 API state. // -// When the Client is in the replay phase, events from the Event Stream API will be cached until the replay phase has -// finished, while replayed events will be delivered directly. -func (client *Client) eventDispatcher() { +// Each event will be sent to the returned channel. When all launched workers have finished - either because all are +// done or one has failed and the others were interrupted -, the channel will be closed. Those workers honor a context +// derived from the Client.Ctx and would either stop when the main context is done or when the returned +// context.CancelFunc is called. +func (client *Client) startReplayWorker() (chan *eventMsg, context.CancelFunc) { + startTime := time.Now() + eventMsgCh := make(chan *eventMsg) + + // Unfortunately, the errgroup context is hidden, that's why another context is necessary. + ctx, cancel := context.WithCancel(client.Ctx) + group, groupCtx := errgroup.WithContext(ctx) + + objTypes := []string{"host", "service"} + for _, objType := range objTypes { + objType := objType // https://go.dev/doc/faq#closures_and_goroutines + group.Go(func() error { + err := client.checkMissedChanges(groupCtx, objType, eventMsgCh) + if err != nil { + client.Logger.Errorw("Replaying API events failed", zap.String("object type", objType), zap.Error(err)) + } + return err + }) + } + + go func() { + err := group.Wait() + if err != nil { + client.Logger.Errorw("Replaying the API failed", zap.Error(err), zap.Duration("duration", time.Since(startTime))) + } else { + client.Logger.Infow("Replaying the API has finished", zap.Duration("duration", time.Since(startTime))) + } + + cancel() + close(eventMsgCh) + }() + + return eventMsgCh, cancel +} + +// worker is the Client's main background worker, taking care of event.Event dispatching and mode switching. +// +// When the Client is in the replay phase, requested by replayPhaseRequest, events from the Event Stream API will +// be cached until the replay phase has finished, while replayed events will be delivered directly. +// +// Communication takes place over the eventDispatcherEventStream and replayPhaseRequest channels. +func (client *Client) worker() { var ( + // replayEventCh emits events generated during the replay phase from the replay worker. It will be closed when + // replaying is finished, which indicates the select below to switch phases. When this variable is nil, the + // Client is in the normal operating phase. + replayEventCh chan *eventMsg + // replayCancel cancels, if not nil, the currently running replay worker, e.g., when restarting the replay. + replayCancel context.CancelFunc + // replayBuffer holds Event Stream events to be replayed after the replay phase has finished. replayBuffer = make([]*event.Event, 0) // replayCache maps event.Events.Name to API time to skip replaying outdated events. @@ -276,10 +328,33 @@ func (client *Client) eventDispatcher() { for { select { case <-client.Ctx.Done(): - client.Logger.Warnw("Closing event dispatcher as its context is done", zap.Error(client.Ctx.Err())) + client.Logger.Warnw("Closing down main worker as context is finished", zap.Error(client.Ctx.Err())) return - case <-client.replayTrigger: + case <-client.replayPhaseRequest: + if replayEventCh != nil { + client.Logger.Warn("Replaying was requested while already being in the replay phase; restart replay") + + // Drain the old replay phase producer's channel until it is closed as its context was canceled. + go func(replayEventCh chan *eventMsg) { + for _, ok := <-replayEventCh; ok; { + } + }(replayEventCh) + replayCancel() + } + + client.Logger.Debug("Worker enters replay phase, starting caching Event Stream events") + replayEventCh, replayCancel = client.startReplayWorker() + + case ev, ok := <-replayEventCh: + // Process an incoming event + if ok { + client.CallbackFn(ev.event) + replayCacheUpdate(ev) + break + } + + // The channel was closed - replay and switch modes skipCounter := 0 for _, ev := range replayBuffer { ts, ok := replayCache[ev.Name] @@ -292,73 +367,26 @@ func (client *Client) eventDispatcher() { client.CallbackFn(ev) } - client.Logger.Infow("Finished replay phase, returning to normal operation", + client.Logger.Infow("Worker leaves replay phase, returning to normal operation", zap.Int("cached events", len(replayBuffer)), zap.Int("skipped events", skipCounter)) + replayEventCh, replayCancel = nil, nil replayBuffer = make([]*event.Event, 0) replayCache = make(map[string]time.Time) - client.replayPhase.Store(false) case ev := <-client.eventDispatcherEventStream: - if !client.replayPhase.Load() { - client.CallbackFn(ev.event) - continue - } - - replayBuffer = append(replayBuffer, ev.event) - replayCacheUpdate(ev) - - case ev := <-client.eventDispatcherReplay: - if !client.replayPhase.Load() { - client.Logger.Errorw("Dispatcher received replay event during normal operation", zap.Stringer("event", ev.event)) - continue + // During replay phase, buffer Event Stream events + if replayEventCh != nil { + replayBuffer = append(replayBuffer, ev.event) + replayCacheUpdate(ev) + break } client.CallbackFn(ev.event) - replayCacheUpdate(ev) } } } -// enterReplayPhase enters the replay phase for the initial sync and after reconnections. -// -// This method starts multiple goroutines. First, some workers to query the Icinga 2 Objects API will be launched. When -// all of those have finished, the replayTrigger will be used to indicate that the buffered Events should be replayed. -func (client *Client) enterReplayPhase() { - client.Logger.Info("Entering replay phase to replay stored events first") - if !client.replayPhase.CompareAndSwap(false, true) { - client.Logger.Error("The Event Stream Client is already in the replay phase") - return - } - - group, groupCtx := errgroup.WithContext(client.Ctx) - objTypes := []string{"host", "service"} - for _, objType := range objTypes { - objType := objType // https://go.dev/doc/faq#closures_and_goroutines - group.Go(func() error { - err := client.checkMissedChanges(groupCtx, objType) - if err != nil { - client.Logger.Errorw("Replaying API events resulted in errors", - zap.String("object type", objType), zap.Error(err)) - } - return err - }) - } - - go func() { - startTime := time.Now() - - err := group.Wait() - if err != nil { - client.Logger.Errorw("Replaying the API resulted in errors", zap.Error(err), zap.Duration("duration", time.Since(startTime))) - } else { - client.Logger.Debugw("All replay phase workers have finished", zap.Duration("duration", time.Since(startTime))) - } - - client.replayTrigger <- struct{}{} - }() -} - // Process incoming objects and reconnect to the Event Stream with replaying objects if necessary. // // This method blocks as long as the Client runs, which, unless its context is cancelled, is forever. While its internal @@ -366,10 +394,9 @@ func (client *Client) enterReplayPhase() { // callback function. func (client *Client) Process() { client.eventDispatcherEventStream = make(chan *eventMsg) - client.eventDispatcherReplay = make(chan *eventMsg) - client.replayTrigger = make(chan struct{}) + client.replayPhaseRequest = make(chan struct{}) - go client.eventDispatcher() + go client.worker() for { err := client.listenEventStream() diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 67f19753..0beab54b 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -171,11 +171,11 @@ func (client *Client) fetchAcknowledgementComment(ctx context.Context, host, ser return &objQueriesResults[0].Attrs, nil } -// checkMissedChanges queries for Service or Host objects to handle missed elements. +// checkMissedChanges queries objType (host, service) from the Icinga 2 API for replaying events. // // If the object's acknowledgement field is non-zero, an Acknowledgement Event will be constructed following the Host or -// Service object. -func (client *Client) checkMissedChanges(ctx context.Context, objType string) error { +// Service object. Each event will be delivered to the channel. +func (client *Client) checkMissedChanges(ctx context.Context, objType string, eventCh chan *eventMsg) error { jsonRaw, err := client.queryObjectsApiDirect(ctx, objType, "") if err != nil { return err @@ -212,7 +212,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string) er select { case <-ctx.Done(): return ctx.Err() - case client.eventDispatcherReplay <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: + case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: stateChangeEvents++ } @@ -239,7 +239,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string) er select { case <-ctx.Done(): return ctx.Err() - case client.eventDispatcherReplay <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: + case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: acknowledgementEvents++ } } @@ -355,7 +355,12 @@ func (client *Client) listenEventStream() error { defer cancel() defer func() { _ = response.Body.Close() }() - client.enterReplayPhase() + select { + case <-client.Ctx.Done(): + client.Logger.Warnw("Cannot request starting replay phase as context is finished", zap.Error(client.Ctx.Err())) + return client.Ctx.Err() + case client.replayPhaseRequest <- struct{}{}: + } client.Logger.Info("Start listening on Icinga 2 Event Stream..") From 435b1eff82d1913ac9cabd366e2742847c575535 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 8 Nov 2023 12:02:31 +0100 Subject: [PATCH 46/65] eventstream: dispatch through queue to callback Instead of directly invoking the callback function for each event to be processed, those are being enqueued to a FIFO-like buffered channel to debounce intervals during times of bulk processing. --- internal/eventstream/client.go | 44 +++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index b09ecbe7..e7c92b77 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -315,8 +315,43 @@ func (client *Client) worker() { replayBuffer = make([]*event.Event, 0) // replayCache maps event.Events.Name to API time to skip replaying outdated events. replayCache = make(map[string]time.Time) + + // dispatchQueue is a FIFO-like queue for events to be dispatched to the callback function without having to + // wait for the callback to finish, which, as being database-bound, might take some time for bulk phases. + dispatchQueue = make(chan *event.Event, 1<<16) ) + // While the worker's main loop fills the dispatchQueue for outgoing events, this small goroutine drains the + // buffered channel and forwards each request to the callback function. + go func() { + for { + select { + case <-client.Ctx.Done(): + return + + case ev := <-dispatchQueue: + client.CallbackFn(ev) + } + } + }() + + // dispatchEvent enqueues the event to the dispatchQueue while honoring the Client.Ctx. It returns true if + // enqueueing worked and false either if the buffered queue is full for a whole minute or, more likely, the + // context is done. + dispatchEvent := func(ev *event.Event) bool { + select { + case <-client.Ctx.Done(): + return false + + case <-time.After(time.Minute): + client.Logger.Errorw("Abort event enqueueing for dispatching due to a timeout", zap.Stringer("event", ev)) + return false + + case dispatchQueue <- ev: + return true + } + } + // replayCacheUpdate updates the replayCache if this eventMsg seems to be the latest of its kind. replayCacheUpdate := func(ev *eventMsg) { ts, ok := replayCache[ev.event.Name] @@ -349,7 +384,7 @@ func (client *Client) worker() { case ev, ok := <-replayEventCh: // Process an incoming event if ok { - client.CallbackFn(ev.event) + _ = dispatchEvent(ev.event) replayCacheUpdate(ev) break } @@ -365,7 +400,10 @@ func (client *Client) worker() { continue } - client.CallbackFn(ev) + if !dispatchEvent(ev) { + client.Logger.Error("Aborting replay as an event could not be enqueued for dispatching") + break + } } client.Logger.Infow("Worker leaves replay phase, returning to normal operation", zap.Int("cached events", len(replayBuffer)), zap.Int("skipped events", skipCounter)) @@ -382,7 +420,7 @@ func (client *Client) worker() { break } - client.CallbackFn(ev.event) + _ = dispatchEvent(ev.event) } } } From 776d884cad0bbf436f1a35518c2ea641edd8d0b5 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 8 Nov 2023 12:30:25 +0100 Subject: [PATCH 47/65] eventstream: ensure HTTP connection reusage As the same http.Transport was already used, the underlying HTTP connection is already almost always being reused. This change mostly ensures this for error cases by reading the response to completion and documenting this necessity. --- internal/eventstream/client_api.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 0beab54b..00427188 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -27,7 +27,10 @@ import ( // // [0] https://github.com/golang/go/issues/49085 func extractObjectQueriesResult[T Comment | Downtime | HostServiceRuntimeAttributes](jsonResp io.ReadCloser) ([]ObjectQueriesResult[T], error) { - defer func() { _ = jsonResp.Close() }() + defer func() { + _, _ = io.Copy(io.Discard, jsonResp) + _ = jsonResp.Close() + }() var objQueriesResults []ObjectQueriesResult[T] err := json.NewDecoder(jsonResp).Decode(&struct { @@ -40,6 +43,8 @@ func extractObjectQueriesResult[T Comment | Downtime | HostServiceRuntimeAttribu } // queryObjectsApi performs a configurable HTTP request against the Icinga 2 API and returns its raw response. +// +// The returned io.ReaderCloser MUST be both read to completion and closed to reuse connections. func (client *Client) queryObjectsApi( ctx context.Context, urlPaths []string, @@ -61,6 +66,7 @@ func (client *Client) queryObjectsApi( req.Header.Set(k, v) } + // The underlying network connection is reused by using client.ApiHttpTransport. httpClient := &http.Client{ Transport: &client.ApiHttpTransport, Timeout: 3 * time.Second, @@ -71,6 +77,7 @@ func (client *Client) queryObjectsApi( } if res.StatusCode != http.StatusOK { + _, _ = io.Copy(io.Discard, res.Body) _ = res.Body.Close() return nil, fmt.Errorf("unexpected HTTP status code %d", res.StatusCode) } @@ -180,7 +187,6 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev if err != nil { return err } - objQueriesResults, err := extractObjectQueriesResult[HostServiceRuntimeAttributes](jsonRaw) if err != nil { return err @@ -309,6 +315,7 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont case <-reqCtx.Done(): // This case might happen when this httpClient.Do and the time.After in the select below finish at round // about the exact same time, but httpClient.Do was slightly faster than reqCancel(). + _, _ = io.Copy(io.Discard, res.Body) _ = res.Body.Close() } }() @@ -352,8 +359,11 @@ func (client *Client) listenEventStream() error { if err != nil { return err } - defer cancel() - defer func() { _ = response.Body.Close() }() + defer func() { + cancel() + + _ = response.Body.Close() + }() select { case <-client.Ctx.Done(): From bd24d1c3cfc9af7c20f811c57240c71c2e2a146a Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 8 Nov 2023 15:34:19 +0100 Subject: [PATCH 48/65] eventstream: rename replay to catch-up phase Next to renaming the phase, the documentation was a bit polished, some log levels and messages were adjusted and some small cleanup was done. --- internal/eventstream/client.go | 150 ++++++++++++++--------------- internal/eventstream/client_api.go | 41 ++++---- internal/eventstream/util.go | 75 +-------------- 3 files changed, 97 insertions(+), 169 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index e7c92b77..523e1ea5 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -4,7 +4,6 @@ import ( "context" "crypto/tls" "crypto/x509" - "errors" "fmt" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" @@ -27,16 +26,16 @@ type eventMsg struct { apiTime time.Time } -// Client for the Icinga 2 Event Stream API with extended support for other Icinga 2 APIs to gather additional -// information and a replay either when starting up to catch up the Icinga's state or in case of a connection loss. +// Client for the Icinga 2 Event Stream API with support for other Icinga 2 APIs to gather additional information and +// perform a catch-up of unknown events either when starting up to or in case of a connection loss. // // Within the icinga-notifications scope, one or multiple Client instances can be generated from the configuration by // calling NewClientsFromConfig. // // A Client must be started by calling its Process method, which blocks until Ctx is marked as done. Reconnections and -// the necessary state replaying from the Icinga 2 API will be taken care off. Internally, the Client executes a worker -// within its own goroutine, which dispatches event.Event to the CallbackFn and enforces event.Event order during -// replaying after (re-)connections. +// the necessary state replaying in an internal catch-up-phase from the Icinga 2 API will be taken care off. Internally, +// the Client executes a worker within its own goroutine, which dispatches event.Event to the CallbackFn and enforces +// order during catching up after (re-)connections. type Client struct { // ApiHost et al. configure where and how the Icinga 2 API can be reached. ApiHost string @@ -49,7 +48,7 @@ type Client struct { // IcingaWebRoot points to the Icinga Web 2 endpoint for generated URLs. IcingaWebRoot string - // CallbackFn receives generated event.Events. + // CallbackFn receives generated event.Event objects. CallbackFn func(*event.Event) // Ctx for all web requests as well as internal wait loops. Ctx context.Context @@ -58,13 +57,13 @@ type Client struct { // eventDispatcherEventStream communicates Events to be processed from the Event Stream API. eventDispatcherEventStream chan *eventMsg - // replayPhaseRequest requests the main worker to switch to the replay phase and re-request the Icinga 2 API. - replayPhaseRequest chan struct{} + // catchupPhaseRequest requests the main worker to switch to the catch-up-phase to query the API for missed events. + catchupPhaseRequest chan struct{} } // NewClientsFromConfig returns all Clients defined in the conf.ConfigFile. // -// Those are prepared and just needed to be started by calling their Process method. +// Those are prepared and just needed to be started by calling their Client.Process method. func NewClientsFromConfig( ctx context.Context, logs *logging.Logging, @@ -76,6 +75,7 @@ func NewClientsFromConfig( for _, icinga2Api := range conf.Icinga2Apis { logger := logs.GetChildLogger(fmt.Sprintf("eventstream-%d", icinga2Api.NotificationsEventSourceId)) + callbackLogger := logs.GetChildLogger(fmt.Sprintf("eventstream-callback-%d", icinga2Api.NotificationsEventSourceId)) client := &Client{ ApiHost: icinga2Api.Host, @@ -102,7 +102,7 @@ func NewClientsFromConfig( IcingaNotificationsEventSourceId: icinga2Api.NotificationsEventSourceId, IcingaWebRoot: conf.Icingaweb2URL, - CallbackFn: makeProcessEvent(ctx, db, logger, logs, runtimeConfig), + CallbackFn: makeProcessEvent(ctx, db, callbackLogger, logs, runtimeConfig), Ctx: ctx, Logger: logger, } @@ -255,13 +255,12 @@ func (client *Client) buildAcknowledgementEvent(ctx context.Context, host, servi return ev, nil } -// startReplayWorker launches goroutines for replaying the Icinga 2 API state. +// startCatchupWorkers launches goroutines for catching up the Icinga 2 API state. // // Each event will be sent to the returned channel. When all launched workers have finished - either because all are // done or one has failed and the others were interrupted -, the channel will be closed. Those workers honor a context -// derived from the Client.Ctx and would either stop when the main context is done or when the returned -// context.CancelFunc is called. -func (client *Client) startReplayWorker() (chan *eventMsg, context.CancelFunc) { +// derived from the Client.Ctx and would either stop when this context is done or when the context.CancelFunc is called. +func (client *Client) startCatchupWorkers() (chan *eventMsg, context.CancelFunc) { startTime := time.Now() eventMsgCh := make(chan *eventMsg) @@ -275,7 +274,7 @@ func (client *Client) startReplayWorker() (chan *eventMsg, context.CancelFunc) { group.Go(func() error { err := client.checkMissedChanges(groupCtx, objType, eventMsgCh) if err != nil { - client.Logger.Errorw("Replaying API events failed", zap.String("object type", objType), zap.Error(err)) + client.Logger.Errorw("Catch-up-phase event worker failed", zap.String("object type", objType), zap.Error(err)) } return err }) @@ -284,9 +283,9 @@ func (client *Client) startReplayWorker() (chan *eventMsg, context.CancelFunc) { go func() { err := group.Wait() if err != nil { - client.Logger.Errorw("Replaying the API failed", zap.Error(err), zap.Duration("duration", time.Since(startTime))) + client.Logger.Errorw("Catching up the API failed", zap.Error(err), zap.Duration("duration", time.Since(startTime))) } else { - client.Logger.Infow("Replaying the API has finished", zap.Duration("duration", time.Since(startTime))) + client.Logger.Infow("Catching up the API has finished", zap.Duration("duration", time.Since(startTime))) } cancel() @@ -298,26 +297,26 @@ func (client *Client) startReplayWorker() (chan *eventMsg, context.CancelFunc) { // worker is the Client's main background worker, taking care of event.Event dispatching and mode switching. // -// When the Client is in the replay phase, requested by replayPhaseRequest, events from the Event Stream API will -// be cached until the replay phase has finished, while replayed events will be delivered directly. +// When the Client is in the catch-up-phase, requested by catchupPhaseRequest, events from the Event Stream API will +// be cached until the catch-up-phase has finished, while replayed events will be delivered directly. // -// Communication takes place over the eventDispatcherEventStream and replayPhaseRequest channels. +// Communication takes place over the eventDispatcherEventStream and catchupPhaseRequest channels. func (client *Client) worker() { var ( - // replayEventCh emits events generated during the replay phase from the replay worker. It will be closed when - // replaying is finished, which indicates the select below to switch phases. When this variable is nil, the + // catchupEventCh emits events generated during the catch-up-phase from catch-up-workers. It will be closed when + // catching up is done, which indicates the select below to switch phases. When this variable is nil, this // Client is in the normal operating phase. - replayEventCh chan *eventMsg - // replayCancel cancels, if not nil, the currently running replay worker, e.g., when restarting the replay. - replayCancel context.CancelFunc + catchupEventCh chan *eventMsg + // catchupCancel cancels, if not nil, all running catch-up-workers, e.g., when restarting catching-up. + catchupCancel context.CancelFunc - // replayBuffer holds Event Stream events to be replayed after the replay phase has finished. - replayBuffer = make([]*event.Event, 0) - // replayCache maps event.Events.Name to API time to skip replaying outdated events. - replayCache = make(map[string]time.Time) + // catchupBuffer holds Event Stream events to be replayed after the catch-up-phase has finished. + catchupBuffer = make([]*event.Event, 0) + // catchupCache maps event.Events.Name to API time to skip replaying outdated events. + catchupCache = make(map[string]time.Time) // dispatchQueue is a FIFO-like queue for events to be dispatched to the callback function without having to - // wait for the callback to finish, which, as being database-bound, might take some time for bulk phases. + // wait for the callback to finish, which, as being database-bound, might take some time during bulk phases. dispatchQueue = make(chan *event.Event, 1<<16) ) @@ -335,8 +334,8 @@ func (client *Client) worker() { } }() - // dispatchEvent enqueues the event to the dispatchQueue while honoring the Client.Ctx. It returns true if - // enqueueing worked and false either if the buffered queue is full for a whole minute or, more likely, the + // dispatchEvent enqueues the event to the dispatchQueue while honoring the Client.Ctx. It returns true when + // enqueueing worked and false either if the buffered queue is stuck for a whole minute or, more likely, the // context is done. dispatchEvent := func(ev *event.Event) bool { select { @@ -352,11 +351,11 @@ func (client *Client) worker() { } } - // replayCacheUpdate updates the replayCache if this eventMsg seems to be the latest of its kind. - replayCacheUpdate := func(ev *eventMsg) { - ts, ok := replayCache[ev.event.Name] + // catchupCacheUpdate updates the catchupCache if this eventMsg seems to be the latest of its kind. + catchupCacheUpdate := func(ev *eventMsg) { + ts, ok := catchupCache[ev.event.Name] if !ok || ev.apiTime.After(ts) { - replayCache[ev.event.Name] = ev.apiTime + catchupCache[ev.event.Name] = ev.apiTime } } @@ -366,33 +365,34 @@ func (client *Client) worker() { client.Logger.Warnw("Closing down main worker as context is finished", zap.Error(client.Ctx.Err())) return - case <-client.replayPhaseRequest: - if replayEventCh != nil { - client.Logger.Warn("Replaying was requested while already being in the replay phase; restart replay") + case <-client.catchupPhaseRequest: + if catchupEventCh != nil { + client.Logger.Warn("Switching to catch-up-phase was requested while already catching up, restarting phase") - // Drain the old replay phase producer's channel until it is closed as its context was canceled. - go func(replayEventCh chan *eventMsg) { - for _, ok := <-replayEventCh; ok; { + // Drain the old catch-up-phase producer channel until it is closed as its context will be canceled. + go func(catchupEventCh chan *eventMsg) { + for _, ok := <-catchupEventCh; ok; { } - }(replayEventCh) - replayCancel() + }(catchupEventCh) + catchupCancel() } - client.Logger.Debug("Worker enters replay phase, starting caching Event Stream events") - replayEventCh, replayCancel = client.startReplayWorker() + client.Logger.Info("Worker enters catch-up-phase, start caching up on Event Stream events") + catchupEventCh, catchupCancel = client.startCatchupWorkers() - case ev, ok := <-replayEventCh: + case ev, ok := <-catchupEventCh: // Process an incoming event if ok { _ = dispatchEvent(ev.event) - replayCacheUpdate(ev) + catchupCacheUpdate(ev) break } - // The channel was closed - replay and switch modes + // The channel was closed - replay cache and switch modes skipCounter := 0 - for _, ev := range replayBuffer { - ts, ok := replayCache[ev.Name] + + for _, ev := range catchupBuffer { + ts, ok := catchupCache[ev.Name] if ok && ev.Time.Before(ts) { client.Logger.Debugw("Skip replaying outdated Event Stream event", zap.Stringer("event", ev), zap.Time("event timestamp", ev.Time), zap.Time("cache timestamp", ts)) @@ -401,22 +401,22 @@ func (client *Client) worker() { } if !dispatchEvent(ev) { - client.Logger.Error("Aborting replay as an event could not be enqueued for dispatching") + client.Logger.Error("Aborting Event Stream replay as an event could not be enqueued for dispatching") break } } - client.Logger.Infow("Worker leaves replay phase, returning to normal operation", - zap.Int("cached events", len(replayBuffer)), zap.Int("skipped events", skipCounter)) + client.Logger.Infow("Worker leaves catch-up-phase, returning to normal operation", + zap.Int("cached events", len(catchupBuffer)), zap.Int("skipped cached events", skipCounter)) - replayEventCh, replayCancel = nil, nil - replayBuffer = make([]*event.Event, 0) - replayCache = make(map[string]time.Time) + catchupEventCh, catchupCancel = nil, nil + catchupBuffer = make([]*event.Event, 0) + catchupCache = make(map[string]time.Time) case ev := <-client.eventDispatcherEventStream: - // During replay phase, buffer Event Stream events - if replayEventCh != nil { - replayBuffer = append(replayBuffer, ev.event) - replayCacheUpdate(ev) + // During catch-up-phase, buffer Event Stream events + if catchupEventCh != nil { + catchupBuffer = append(catchupBuffer, ev.event) + catchupCacheUpdate(ev) break } @@ -425,29 +425,23 @@ func (client *Client) worker() { } } -// Process incoming objects and reconnect to the Event Stream with replaying objects if necessary. +// Process incoming events and reconnect to the Event Stream with catching up on missed objects if necessary. // -// This method blocks as long as the Client runs, which, unless its context is cancelled, is forever. While its internal -// loop takes care of reconnections, all those events will be logged while generated Events will be dispatched to the -// callback function. +// This method blocks as long as the Client runs, which, unless Ctx is cancelled, is forever. While its internal loop +// takes care of reconnections, messages are being logged while generated event.Event will be dispatched to the +// CallbackFn function. func (client *Client) Process() { client.eventDispatcherEventStream = make(chan *eventMsg) - client.replayPhaseRequest = make(chan struct{}) + client.catchupPhaseRequest = make(chan struct{}) go client.worker() - for { + for client.Ctx.Err() == nil { err := client.listenEventStream() - switch { - case errors.Is(err, context.Canceled): - client.Logger.Warnw("Stopping Event Stream Client as its context is done", zap.Error(err)) - return - - case err != nil: - client.Logger.Errorw("Event Stream processing failed", zap.Error(err)) - - default: - client.Logger.Warn("Event Stream closed stream; maybe Icinga 2 is reloading") + if err != nil { + client.Logger.Errorw("Event Stream processing was interrupted", zap.Error(err)) + } else { + client.Logger.Errorw("Event Stream processing was closed") } } } diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index 00427188..f69a88c2 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -21,12 +21,15 @@ import ( // extractObjectQueriesResult parses a typed ObjectQueriesResult array out of a JSON io.ReaderCloser. // +// The generic type T is currently limited to all later needed types, even when the API might also return other known or +// unknown types. When another type becomes necessary, T can be exceeded. +// // As Go 1.21 does not allow type parameters in methods[0], the logic was extracted into a function transforming the // JSON response - passed as an io.ReaderCloser which will be closed within this function - into the typed response to // be used within the methods below. // -// [0] https://github.com/golang/go/issues/49085 -func extractObjectQueriesResult[T Comment | Downtime | HostServiceRuntimeAttributes](jsonResp io.ReadCloser) ([]ObjectQueriesResult[T], error) { +// [0] https://github.com/golang/go/issues/49085 +func extractObjectQueriesResult[T Comment | HostServiceRuntimeAttributes](jsonResp io.ReadCloser) ([]ObjectQueriesResult[T], error) { defer func() { _, _ = io.Copy(io.Discard, jsonResp) _ = jsonResp.Close() @@ -132,8 +135,8 @@ func (client *Client) fetchHostServiceGroups(ctx context.Context, host, service } if len(objQueriesResults) != 1 { - return nil, fmt.Errorf("expected exactly one result for object type %q and %q instead of %d", - objType, objName, len(objQueriesResults)) + return nil, fmt.Errorf("expected exactly one result for %q as object type %q instead of %d", + objName, objType, len(objQueriesResults)) } return objQueriesResults[0].Attrs.Groups, nil @@ -146,6 +149,7 @@ func (client *Client) fetchHostServiceGroups(ctx context.Context, host, service // the Host's resp. Service's AcknowledgementLastChange field has NOT the same timestamp as the Comment; there is a // difference of some milliseconds. As there might be even multiple ACK comments, we have to find the closest one. func (client *Client) fetchAcknowledgementComment(ctx context.Context, host, service string, ackTime time.Time) (*Comment, error) { + // comment.entry_type = 4 is an Acknowledgement comment; Comment.EntryType filterExpr := "comment.entry_type == 4 && comment.host_name == comment_host_name" filterVars := map[string]string{"comment_host_name": host} if service != "" { @@ -178,7 +182,7 @@ func (client *Client) fetchAcknowledgementComment(ctx context.Context, host, ser return &objQueriesResults[0].Attrs, nil } -// checkMissedChanges queries objType (host, service) from the Icinga 2 API for replaying events. +// checkMissedChanges queries objType (host, service) from the Icinga 2 API to catch up on missed events. // // If the object's acknowledgement field is non-zero, an Acknowledgement Event will be constructed following the Host or // Service object. Each event will be delivered to the channel. @@ -193,6 +197,13 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev } var stateChangeEvents, acknowledgementEvents int + defer func() { + client.Logger.Debugw("Querying API emitted events", + zap.String("object type", objType), + zap.Int("state changes", stateChangeEvents), + zap.Int("acknowledgements", acknowledgementEvents)) + }() + for _, objQueriesResult := range objQueriesResults { var hostName, serviceName string switch objQueriesResult.Type { @@ -249,12 +260,6 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev acknowledgementEvents++ } } - - client.Logger.Infow("Replaying API emitted state changes", - zap.String("object type", objType), - zap.Int("state changes", stateChangeEvents), - zap.Int("acknowledgements", acknowledgementEvents)) - return nil } @@ -301,22 +306,21 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont go func() { defer close(resCh) - client.Logger.Info("Try to establish an Event Stream API connection") + client.Logger.Debug("Try to establish an Event Stream API connection") httpClient := &http.Client{Transport: &client.ApiHttpTransport} res, err := httpClient.Do(req) if err != nil { - client.Logger.Warnw("Establishing an Event Stream API connection failed; will be retried", zap.Error(err)) + client.Logger.Warnw("Establishing an Event Stream API connection failed, will be retried", zap.Error(err)) return } select { - case resCh <- res: - case <-reqCtx.Done(): // This case might happen when this httpClient.Do and the time.After in the select below finish at round // about the exact same time, but httpClient.Do was slightly faster than reqCancel(). _, _ = io.Copy(io.Discard, res.Body) _ = res.Body.Close() + case resCh <- res: } }() @@ -345,6 +349,7 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont // In case of a parsing or handling error, this error will be returned. If the server closes the connection, nil will // be returned. func (client *Client) listenEventStream() error { + // Ensure to implement a handler case in the type switch below for each requested type. response, cancel, err := client.connectEventStream([]string{ typeStateChange, typeAcknowledgementSet, @@ -367,12 +372,12 @@ func (client *Client) listenEventStream() error { select { case <-client.Ctx.Done(): - client.Logger.Warnw("Cannot request starting replay phase as context is finished", zap.Error(client.Ctx.Err())) + client.Logger.Warnw("Cannot request catch-up-phase as context is finished", zap.Error(client.Ctx.Err())) return client.Ctx.Err() - case client.replayPhaseRequest <- struct{}{}: + case client.catchupPhaseRequest <- struct{}{}: } - client.Logger.Info("Start listening on Icinga 2 Event Stream..") + client.Logger.Info("Start listening on Icinga 2 Event Stream") lineScanner := bufio.NewScanner(response.Body) for lineScanner.Scan() { diff --git a/internal/eventstream/util.go b/internal/eventstream/util.go index 3337f8be..596d5868 100644 --- a/internal/eventstream/util.go +++ b/internal/eventstream/util.go @@ -13,77 +13,6 @@ import ( "strings" ) -// ProcessEvent is a copy pasta version of the second half of Listener.ProcessEvent to be removed after #99 has landed. -func ProcessEvent( - ev *event.Event, - db *icingadb.DB, - logger *logging.Logger, - logs *logging.Logging, - runtimeConfig *config.RuntimeConfig, -) { - ctx := context.Background() - obj, err := object.FromEvent(ctx, db, ev) - if err != nil { - logger.Errorw("Can't sync object", zap.Error(err)) - return - } - - tx, err := db.BeginTxx(ctx, nil) - if err != nil { - logger.Errorw("Can't start a db transaction", zap.Error(err)) - return - } - defer func() { _ = tx.Rollback() }() - - if err := ev.Sync(ctx, tx, db, obj.ID); err != nil { - logger.Errorw("Failed to insert event and fetch its ID", zap.String("event", ev.String()), zap.Error(err)) - return - } - - createIncident := ev.Severity != event.SeverityNone && ev.Severity != event.SeverityOK - currentIncident, created, err := incident.GetCurrent( - ctx, - db, - obj, - logs.GetChildLogger("incident"), - runtimeConfig, - createIncident) - if err != nil { - logger.Errorw("Failed to get current incident", zap.Error(err)) - return - } - - if currentIncident == nil { - if ev.Type == event.TypeAcknowledgement { - logger.Warnf("%q doesn't have active incident. Ignoring acknowledgement event from source %d", obj.DisplayName(), ev.SourceId) - return - } - - if ev.Severity != event.SeverityOK { - logger.Error("non-OK state but no incident was created") - return - } - - logger.Warnw("Ignoring superfluous OK state event from source %d", zap.Int64("source", ev.SourceId), zap.String("object", obj.DisplayName())) - return - } - - logger.Debugf("Processing event %v", ev) - - if err := currentIncident.ProcessEvent(ctx, ev, created); err != nil { - logger.Errorw("Failed to process current incident", zap.Error(err)) - return - } - - if err = tx.Commit(); err != nil { - logger.Errorw( - "Can't commit db transaction", zap.String("object", obj.DisplayName()), - zap.String("incident", currentIncident.String()), zap.Error(err), - ) - return - } -} - // makeProcessEvent creates a closure function to process received events. // // This function contains glue code similar to those from Listener.ProcessEvent to check for incidents for the Event @@ -147,8 +76,8 @@ func makeProcessEvent( // as a valid substitution for space (' '). Unfortunately, Go's url.QueryEscape does this very substitution and // url.PathEscape does a bit too less and has a misleading name on top. // -// - https://www.php.net/manual/en/function.rawurlencode.php -// - https://github.com/php/php-src/blob/php-8.2.12/ext/standard/url.c#L538 +// - https://www.php.net/manual/en/function.rawurlencode.php +// - https://github.com/php/php-src/blob/php-8.2.12/ext/standard/url.c#L538 func rawurlencode(s string) string { return strings.ReplaceAll(url.QueryEscape(s), "+", "%20") } From 1a65d9adade35a1ddce521c1e4e4a04640e5b37e Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 16 Nov 2023 10:45:03 +0100 Subject: [PATCH 49/65] eventstream: directly deliver events from worker Instead of using another secondary cache, the events will now be directly delivered. To not block the select and all the channels for too long, the catchupBuffer is replayed one-by-one. --- internal/eventstream/client.go | 64 ++++++++-------------------------- 1 file changed, 15 insertions(+), 49 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 523e1ea5..5bb260f2 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -314,43 +314,8 @@ func (client *Client) worker() { catchupBuffer = make([]*event.Event, 0) // catchupCache maps event.Events.Name to API time to skip replaying outdated events. catchupCache = make(map[string]time.Time) - - // dispatchQueue is a FIFO-like queue for events to be dispatched to the callback function without having to - // wait for the callback to finish, which, as being database-bound, might take some time during bulk phases. - dispatchQueue = make(chan *event.Event, 1<<16) ) - // While the worker's main loop fills the dispatchQueue for outgoing events, this small goroutine drains the - // buffered channel and forwards each request to the callback function. - go func() { - for { - select { - case <-client.Ctx.Done(): - return - - case ev := <-dispatchQueue: - client.CallbackFn(ev) - } - } - }() - - // dispatchEvent enqueues the event to the dispatchQueue while honoring the Client.Ctx. It returns true when - // enqueueing worked and false either if the buffered queue is stuck for a whole minute or, more likely, the - // context is done. - dispatchEvent := func(ev *event.Event) bool { - select { - case <-client.Ctx.Done(): - return false - - case <-time.After(time.Minute): - client.Logger.Errorw("Abort event enqueueing for dispatching due to a timeout", zap.Stringer("event", ev)) - return false - - case dispatchQueue <- ev: - return true - } - } - // catchupCacheUpdate updates the catchupCache if this eventMsg seems to be the latest of its kind. catchupCacheUpdate := func(ev *eventMsg) { ts, ok := catchupCache[ev.event.Name] @@ -383,30 +348,31 @@ func (client *Client) worker() { case ev, ok := <-catchupEventCh: // Process an incoming event if ok { - _ = dispatchEvent(ev.event) + client.CallbackFn(ev.event) catchupCacheUpdate(ev) break } - // The channel was closed - replay cache and switch modes - skipCounter := 0 + // The channel is closed, replay cache and eventually switch modes + if len(catchupBuffer) > 0 { + // To not block the select and all channels too long, only one event will be processed per iteration. + ev := catchupBuffer[0] + catchupBuffer = catchupBuffer[1:] - for _, ev := range catchupBuffer { ts, ok := catchupCache[ev.Name] - if ok && ev.Time.Before(ts) { + if !ok { + client.Logger.Debugw("Event to be replayed is not in cache", zap.Stringer("event", ev)) + } else if ev.Time.Before(ts) { client.Logger.Debugw("Skip replaying outdated Event Stream event", zap.Stringer("event", ev), zap.Time("event timestamp", ev.Time), zap.Time("cache timestamp", ts)) - skipCounter++ - continue - } - - if !dispatchEvent(ev) { - client.Logger.Error("Aborting Event Stream replay as an event could not be enqueued for dispatching") break } + + client.CallbackFn(ev) + break } - client.Logger.Infow("Worker leaves catch-up-phase, returning to normal operation", - zap.Int("cached events", len(catchupBuffer)), zap.Int("skipped cached events", skipCounter)) + + client.Logger.Info("Worker leaves catch-up-phase, returning to normal operation") catchupEventCh, catchupCancel = nil, nil catchupBuffer = make([]*event.Event, 0) @@ -420,7 +386,7 @@ func (client *Client) worker() { break } - _ = dispatchEvent(ev.event) + client.CallbackFn(ev.event) } } } From 7890b3a600a70a7b92d19731bddd35fff645d874 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 4 Dec 2023 13:45:28 +0100 Subject: [PATCH 50/65] incident.ProcessEvent: refactor from common code Both Listener.ProcessEvent as well as makeProcessEvent were using roughly the same setup code before calling incident.ProcessEvent. This two very similar code paths were now unified into a common function. --- internal/eventstream/client.go | 12 +++++-- internal/eventstream/util.go | 65 ---------------------------------- internal/incident/incidents.go | 44 +++++++++++++++++++++++ internal/listener/listener.go | 41 +-------------------- 4 files changed, 54 insertions(+), 108 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 5bb260f2..76d36ef6 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -8,6 +8,7 @@ import ( "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" "github.com/icinga/icinga-notifications/internal/event" + "github.com/icinga/icinga-notifications/internal/incident" "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" @@ -102,9 +103,14 @@ func NewClientsFromConfig( IcingaNotificationsEventSourceId: icinga2Api.NotificationsEventSourceId, IcingaWebRoot: conf.Icingaweb2URL, - CallbackFn: makeProcessEvent(ctx, db, callbackLogger, logs, runtimeConfig), - Ctx: ctx, - Logger: logger, + CallbackFn: func(ev *event.Event) { + err := incident.ProcessEvent(ctx, db, logs, runtimeConfig, ev) + if err != nil { + callbackLogger.Warnw("Cannot process event", zap.Error(err)) + } + }, + Ctx: ctx, + Logger: logger, } if icinga2Api.IcingaCaFile != "" { diff --git a/internal/eventstream/util.go b/internal/eventstream/util.go index 596d5868..06a9082a 100644 --- a/internal/eventstream/util.go +++ b/internal/eventstream/util.go @@ -1,75 +1,10 @@ package eventstream import ( - "context" - "github.com/icinga/icinga-notifications/internal/config" - "github.com/icinga/icinga-notifications/internal/event" - "github.com/icinga/icinga-notifications/internal/incident" - "github.com/icinga/icinga-notifications/internal/object" - "github.com/icinga/icingadb/pkg/icingadb" - "github.com/icinga/icingadb/pkg/logging" - "go.uber.org/zap" "net/url" "strings" ) -// makeProcessEvent creates a closure function to process received events. -// -// This function contains glue code similar to those from Listener.ProcessEvent to check for incidents for the Event -// and, if existent, call *Incident.ProcessEvent on this incident. -func makeProcessEvent( - ctx context.Context, - db *icingadb.DB, - logger *logging.Logger, - logs *logging.Logging, - runtimeConfig *config.RuntimeConfig, -) func(*event.Event) { - return func(ev *event.Event) { - obj, err := object.FromEvent(ctx, db, ev) - if err != nil { - logger.Errorw("Cannot sync object", zap.Stringer("event", ev), zap.Error(err)) - return - } - - createIncident := ev.Severity != event.SeverityNone && ev.Severity != event.SeverityOK - currentIncident, created, err := incident.GetCurrent( - ctx, - db, - obj, - logs.GetChildLogger("incident"), - runtimeConfig, - createIncident) - if err != nil { - logger.Errorw("Failed to get current incident", zap.Error(err)) - return - } - - l := logger.With( - zap.String("object", obj.DisplayName()), - zap.Stringer("event", ev), - zap.Stringer("incident", currentIncident), - zap.Bool("created incident", created)) - - if currentIncident == nil { - switch { - case ev.Type == event.TypeAcknowledgement: - l.Warn("Object doesn't have active incident, ignoring acknowledgement event") - case ev.Severity != event.SeverityOK: - l.Error("Cannot process event with a non OK state without a known incident") - default: - l.Warn("Ignoring superfluous OK state event") - } - - return - } - - if err := currentIncident.ProcessEvent(ctx, ev, created); err != nil { - logger.Errorw("Failed to process current incident", zap.Error(err)) - return - } - } -} - // rawurlencode mimics PHP's rawurlencode to be used for parameter encoding. // // Icinga Web uses rawurldecode instead of urldecode, which, as its main difference, does not honor the plus char ('+') diff --git a/internal/incident/incidents.go b/internal/incident/incidents.go index 56d0b166..99b633c4 100644 --- a/internal/incident/incidents.go +++ b/internal/incident/incidents.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "errors" + "fmt" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/event" "github.com/icinga/icinga-notifications/internal/object" @@ -132,3 +133,46 @@ func GetCurrentIncidents() map[int64]*Incident { } return m } + +// ProcessEvent from an event.Event. +// +// This function first gets this Event's object.Object and its incident.Incident. Then, after performing some safety +// checks, it calls the Incident.ProcessEvent method. +func ProcessEvent( + ctx context.Context, + db *icingadb.DB, + logs *logging.Logging, + runtimeConfig *config.RuntimeConfig, + ev *event.Event, +) error { + obj, err := object.FromEvent(ctx, db, ev) + if err != nil { + return fmt.Errorf("cannot sync event object: %w", err) + } + + createIncident := ev.Severity != event.SeverityNone && ev.Severity != event.SeverityOK + currentIncident, created, err := GetCurrent( + ctx, + db, + obj, + logs.GetChildLogger("incident"), + runtimeConfig, + createIncident) + if err != nil { + return fmt.Errorf("cannot get current incident for %q: %w", obj.DisplayName(), err) + } + + if currentIncident == nil { + switch { + case ev.Type == event.TypeAcknowledgement: + return fmt.Errorf("%q does not have an active incident, ignoring acknowledgement event from source %d", + obj.DisplayName(), ev.SourceId) + case ev.Severity != event.SeverityOK: + panic(fmt.Sprintf("cannot process event %v with a non-OK state %v without a known incident", ev, ev.Severity)) + default: + return fmt.Errorf("ignoring superfluous OK state event from source %d", ev.SourceId) + } + } + + return currentIncident.ProcessEvent(ctx, ev, created) +} diff --git a/internal/listener/listener.go b/internal/listener/listener.go index 6ee5b41e..ad626fbb 100644 --- a/internal/listener/listener.go +++ b/internal/listener/listener.go @@ -10,7 +10,6 @@ import ( "github.com/icinga/icinga-notifications/internal/daemon" "github.com/icinga/icinga-notifications/internal/event" "github.com/icinga/icinga-notifications/internal/incident" - "github.com/icinga/icinga-notifications/internal/object" "github.com/icinga/icingadb/pkg/icingadb" "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" @@ -148,46 +147,8 @@ func (l *Listener) ProcessEvent(w http.ResponseWriter, req *http.Request) { } } - ctx := context.Background() - obj, err := object.FromEvent(ctx, l.db, &ev) - if err != nil { - l.logger.Errorw("Can't sync object", zap.Error(err)) - abort(http.StatusInternalServerError, &ev, err.Error()) - return - } - - createIncident := ev.Severity != event.SeverityNone && ev.Severity != event.SeverityOK - currentIncident, created, err := incident.GetCurrent(ctx, l.db, obj, l.logs.GetChildLogger("incident"), l.runtimeConfig, createIncident) - if err != nil { - abort(http.StatusInternalServerError, &ev, err.Error()) - return - } - - if currentIncident == nil { - w.WriteHeader(http.StatusNotAcceptable) - - if ev.Type == event.TypeAcknowledgement { - msg := fmt.Sprintf("%q doesn't have active incident. Ignoring acknowledgement event from source %d", obj.DisplayName(), ev.SourceId) - _, _ = fmt.Fprintln(w, msg) - - l.logger.Warnln(msg) - return - } - - if ev.Severity != event.SeverityOK { - panic("non-OK state but no incident was created") - } - - msg := fmt.Sprintf("Ignoring superfluous OK state event from source %d", ev.SourceId) - l.logger.Warnw(msg, zap.String("object", obj.DisplayName())) - - _, _ = fmt.Fprintln(w, msg) - return - } - l.logger.Infow("Processing event", zap.String("event", ev.String())) - - err = currentIncident.ProcessEvent(ctx, &ev, created) + err = incident.ProcessEvent(context.Background(), l.db, l.logs, l.runtimeConfig, &ev) if err != nil { abort(http.StatusInternalServerError, &ev, err.Error()) return From deaff4de95bedce56a7a2a69ffb4b22fb5170d79 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 23 Nov 2023 12:58:57 +0100 Subject: [PATCH 51/65] eventstream: hide internal Event Stream context As the prior context handling for the connectEventStream method was a bit unconventional, but the CancelFunc must not get lost, this logic is now hidden within a custom io.ReadCloser. --- internal/eventstream/client_api.go | 45 +++++++++++++++++++----------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index f69a88c2..e76cdbb8 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -263,19 +263,31 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev return nil } -// connectEventStream connects to the EventStream within an infinite loop until a connection was established. +// connectEventStreamReadCloser wraps io.ReadCloser with a context.CancelFunc to be returned in connectEventStream. +type connectEventStreamReadCloser struct { + io.ReadCloser + cancel context.CancelFunc +} + +// Close the internal ReadCloser with canceling the internal http.Request's context first. +func (e *connectEventStreamReadCloser) Close() error { + e.cancel() + return e.ReadCloser.Close() +} + +// connectEventStream connects to the EventStream, retries until a connection was established. // // The esTypes is a string array of required Event Stream types. // -// An error will be returned if reconnecting resp. retrying the (almost) same thing will not help fix it. -func (client *Client) connectEventStream(esTypes []string) (*http.Response, context.CancelFunc, error) { +// An error will only be returned if reconnecting - retrying the (almost) same thing - will not help. +func (client *Client) connectEventStream(esTypes []string) (io.ReadCloser, error) { apiUrl, err := url.JoinPath(client.ApiHost, "/v1/events") if err != nil { - return nil, nil, err + return nil, err } for i := 0; ; i++ { - // Always ensure an unique queue name to ensure no conflicts might occur. + // Always ensure an unique queue name to mitigate possible naming conflicts. queueNameRndBuff := make([]byte, 16) _, _ = rand.Read(queueNameRndBuff) @@ -284,17 +296,18 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont "types": esTypes, }) if err != nil { - return nil, nil, err + return nil, err } // Sub-context which might get canceled early if connecting takes to long. // The reqCancel function will be called after the select below or when leaving the function with an error. + // When leaving the function without an error, it is being called in connectEventStreamReadCloser.Close(). reqCtx, reqCancel := context.WithCancel(client.Ctx) req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, apiUrl, bytes.NewReader(reqBody)) if err != nil { reqCancel() - return nil, nil, err + return nil, err } req.SetBasicAuth(client.ApiBasicAuthUser, client.ApiBasicAuthPass) @@ -327,7 +340,11 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont select { case res, ok := <-resCh: if ok { - return res, reqCancel, nil + esReadCloser := &connectEventStreamReadCloser{ + ReadCloser: res.Body, + cancel: reqCancel, + } + return esReadCloser, nil } case <-time.After(3 * time.Second): @@ -339,7 +356,7 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont select { case <-time.After(min(3*time.Minute, time.Duration(math.Exp2(float64(i)))*time.Second)): case <-client.Ctx.Done(): - return nil, nil, client.Ctx.Err() + return nil, client.Ctx.Err() } } } @@ -350,7 +367,7 @@ func (client *Client) connectEventStream(esTypes []string) (*http.Response, cont // be returned. func (client *Client) listenEventStream() error { // Ensure to implement a handler case in the type switch below for each requested type. - response, cancel, err := client.connectEventStream([]string{ + eventStream, err := client.connectEventStream([]string{ typeStateChange, typeAcknowledgementSet, // typeAcknowledgementCleared, @@ -364,11 +381,7 @@ func (client *Client) listenEventStream() error { if err != nil { return err } - defer func() { - cancel() - - _ = response.Body.Close() - }() + defer func() { _ = eventStream.Close() }() select { case <-client.Ctx.Done(): @@ -379,7 +392,7 @@ func (client *Client) listenEventStream() error { client.Logger.Info("Start listening on Icinga 2 Event Stream") - lineScanner := bufio.NewScanner(response.Body) + lineScanner := bufio.NewScanner(eventStream) for lineScanner.Scan() { rawJson := lineScanner.Bytes() From e0b41447e85a511b0bcba100a13dd67f1434f8a1 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 29 Nov 2023 11:29:05 +0100 Subject: [PATCH 52/65] incident: custom superfluous state change error By introducing ErrSuperfluousStateChange to signal superfluous state changes and returning a wrapped error, those messages can now be suppressed (logged with the debug level) for Event Stream processing. --- internal/eventstream/client.go | 14 ++++++++++---- internal/incident/incident.go | 6 ++---- internal/incident/incidents.go | 7 ++++++- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 76d36ef6..79052eb4 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "crypto/x509" + "errors" "fmt" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" @@ -76,8 +77,6 @@ func NewClientsFromConfig( for _, icinga2Api := range conf.Icinga2Apis { logger := logs.GetChildLogger(fmt.Sprintf("eventstream-%d", icinga2Api.NotificationsEventSourceId)) - callbackLogger := logs.GetChildLogger(fmt.Sprintf("eventstream-callback-%d", icinga2Api.NotificationsEventSourceId)) - client := &Client{ ApiHost: icinga2Api.Host, ApiBasicAuthUser: icinga2Api.AuthUser, @@ -104,9 +103,16 @@ func NewClientsFromConfig( IcingaWebRoot: conf.Icingaweb2URL, CallbackFn: func(ev *event.Event) { + l := logger.With(zap.Stringer("event", ev)) + err := incident.ProcessEvent(ctx, db, logs, runtimeConfig, ev) - if err != nil { - callbackLogger.Warnw("Cannot process event", zap.Error(err)) + switch { + case errors.Is(err, incident.ErrSuperfluousStateChange): + l.Debugw("Stopped processing event with superfluous state change", zap.Error(err)) + case err != nil: + l.Errorw("Cannot process event", zap.Error(err)) + default: + l.Debug("Successfully processed event over callback") } }, Ctx: ctx, diff --git a/internal/incident/incident.go b/internal/incident/incident.go index b0d60aa6..d4bebb9c 100644 --- a/internal/incident/incident.go +++ b/internal/incident/incident.go @@ -249,10 +249,8 @@ func (i *Incident) processSeverityChangedEvent(ctx context.Context, tx *sqlx.Tx, oldSeverity := i.Severity newSeverity := ev.Severity if oldSeverity == newSeverity { - msg := fmt.Sprintf("Ignoring superfluous %q state event from source %d", ev.Severity.String(), ev.SourceId) - i.logger.Warnln(msg) - - return causedByHistoryId, errors.New(msg) + err := fmt.Errorf("%w: %s state event from source %d", ErrSuperfluousStateChange, ev.Severity.String(), ev.SourceId) + return causedByHistoryId, err } i.logger.Infof("Incident severity changed from %s to %s", oldSeverity.String(), newSeverity.String()) diff --git a/internal/incident/incidents.go b/internal/incident/incidents.go index 99b633c4..eba3499d 100644 --- a/internal/incident/incidents.go +++ b/internal/incident/incidents.go @@ -16,6 +16,9 @@ import ( "time" ) +// ErrSuperfluousStateChange indicates a superfluous state change being ignored and stopping further processing. +var ErrSuperfluousStateChange = errors.New("ignoring superfluous state change") + var ( currentIncidents = make(map[*object.Object]*Incident) currentIncidentsMu sync.Mutex @@ -138,6 +141,8 @@ func GetCurrentIncidents() map[int64]*Incident { // // This function first gets this Event's object.Object and its incident.Incident. Then, after performing some safety // checks, it calls the Incident.ProcessEvent method. +// +// The returned error might be wrapped around ErrSuperfluousStateChange. func ProcessEvent( ctx context.Context, db *icingadb.DB, @@ -170,7 +175,7 @@ func ProcessEvent( case ev.Severity != event.SeverityOK: panic(fmt.Sprintf("cannot process event %v with a non-OK state %v without a known incident", ev, ev.Severity)) default: - return fmt.Errorf("ignoring superfluous OK state event from source %d", ev.SourceId) + return fmt.Errorf("%w: ok state event from source %d", ErrSuperfluousStateChange, ev.SourceId) } } From e1452dda4b5934b976ae2af0e40d215fbea541be Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 1 Dec 2023 17:02:23 +0100 Subject: [PATCH 53/65] eventstream: use unified logger name with field --- internal/eventstream/client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index 79052eb4..b44eb674 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -55,7 +55,7 @@ type Client struct { // Ctx for all web requests as well as internal wait loops. Ctx context.Context // Logger to log to. - Logger *logging.Logger + Logger *zap.SugaredLogger // eventDispatcherEventStream communicates Events to be processed from the Event Stream API. eventDispatcherEventStream chan *eventMsg @@ -76,7 +76,7 @@ func NewClientsFromConfig( clients := make([]*Client, 0, len(conf.Icinga2Apis)) for _, icinga2Api := range conf.Icinga2Apis { - logger := logs.GetChildLogger(fmt.Sprintf("eventstream-%d", icinga2Api.NotificationsEventSourceId)) + logger := logs.GetChildLogger("eventstream").With(zap.Int64("source-id", icinga2Api.NotificationsEventSourceId)) client := &Client{ ApiHost: icinga2Api.Host, ApiBasicAuthUser: icinga2Api.AuthUser, From be4901bf89cd20c4e4162813a5442857c4d45b89 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 6 Dec 2023 13:27:23 +0100 Subject: [PATCH 54/65] Create Event Stream Clients from Source After #132 got merged and each Source's state is now within the database, the Event Stream's configuration could go there, too. This resulted in some level of refactoring as the data flow logic was now reversed at some points. Especially Golang's non-cyclic imports and the omnipresence of the RuntimeConfig made the "hack" of the eventstream.Launcher necessary to not have an importing edge from config to eventstream. --- README.md | 19 +--- cmd/icinga-notifications-daemon/main.go | 21 ++-- config.example.yml | 10 -- internal/config/runtime.go | 25 ++++- internal/config/source.go | 81 +++++++++++--- internal/daemon/config.go | 10 -- internal/eventstream/client.go | 107 ++---------------- internal/eventstream/client_api.go | 4 +- internal/eventstream/launcher.go | 138 ++++++++++++++++++++++++ schema/pgsql/schema.sql | 21 ++-- schema/pgsql/upgrades/022.sql | 22 ++++ 11 files changed, 293 insertions(+), 165 deletions(-) create mode 100644 internal/eventstream/launcher.go create mode 100644 schema/pgsql/upgrades/022.sql diff --git a/README.md b/README.md index 6aceecf0..d07c5cac 100644 --- a/README.md +++ b/README.md @@ -25,22 +25,11 @@ It is required that you have created a new database and imported the [schema](sc Additionally, it also requires you to manually insert items into the **source** table before starting the daemon. ```sql -INSERT INTO source (id, type, name, listener_password_hash) -VALUES (1, 'icinga2', 'Icinga 2', '$2y$10$QU8bJ7cpW1SmoVQ/RndX5O2J5L1PJF7NZ2dlIW7Rv3zUEcbUFg3z2'); +INSERT INTO source + (id, type, name, icinga2_base_url, icinga2_auth_user, icinga2_auth_pass, icinga2_insecure_tls) +VALUES + (1, 'icinga2', 'Local Icinga 2', 'https://localhost:5665', 'root', 'icinga', 'y'); ``` -The `listener_password_hash` is a [PHP `password_hash`](https://www.php.net/manual/en/function.password-hash.php) with the `PASSWORD_DEFAULT` algorithm, currently bcrypt. -In the example above, this is "correct horse battery staple". -This mimics Icinga Web 2's behavior, as stated in [its documentation](https://icinga.com/docs/icinga-web/latest/doc/20-Advanced-Topics/#manual-user-creation-for-database-authentication-backend). - -Currently, there are two ways how notifications get communicated between Icinga 2 and Icinga Notifications. -Please select only one, whereby the first is recommended: - -* Icinga Notifications can pull those from the Icinga 2 API when being configured in the YAML configuration file. - For each `source`, as inserted in the database above, an `icinga2-apis` endpoint must be defined. -* Otherwise, Icinga 2 can push the notifications to the Icinga Notification daemon. - Therefore, you need to copy the [Icinga 2 config](icinga2.conf) to `/etc/icinga2/features-enabled` on your master node(s) and restart the Icinga 2 service. - At the top of this file, you will find multiple configurations options that can be set in `/etc/icinga2/constants.conf`. - There are also Icinga2 `EventCommand` definitions in this file that will automatically match all your **checkables**, which may not work properly if the configuration already uses event commands for something else. Then, you can launch the daemon with the following command. ```go diff --git a/cmd/icinga-notifications-daemon/main.go b/cmd/icinga-notifications-daemon/main.go index 891a279f..d8cbacd8 100644 --- a/cmd/icinga-notifications-daemon/main.go +++ b/cmd/icinga-notifications-daemon/main.go @@ -86,11 +86,20 @@ func main() { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() - runtimeConfig := config.NewRuntimeConfig(db, logs) + esLauncher := &eventstream.Launcher{ + Ctx: ctx, + Logs: logs, + Db: db, + RuntimeConfig: nil, // Will be set below as it is interconnected.. + } + + runtimeConfig := config.NewRuntimeConfig(esLauncher.Launch, logs, db) if err := runtimeConfig.UpdateFromDatabase(ctx); err != nil { logger.Fatalw("failed to load config from database", zap.Error(err)) } + esLauncher.RuntimeConfig = runtimeConfig + go runtimeConfig.PeriodicUpdates(ctx, 1*time.Second) err = incident.LoadOpenIncidents(ctx, db, logs.GetChildLogger("incident"), runtimeConfig) @@ -98,14 +107,8 @@ func main() { logger.Fatalw("Can't load incidents from database", zap.Error(err)) } - esClients, err := eventstream.NewClientsFromConfig(ctx, logs, db, runtimeConfig, conf) - if err != nil { - logger.Fatalw("cannot prepare Event Stream API Clients form config", zap.Error(err)) - } - for _, esClient := range esClients { - go esClient.Process() - } - + // Wait to load open incidents from the database before either starting Event Stream Clients or starting the Listener. + esLauncher.Ready() if err := listener.NewListener(db, runtimeConfig, logs).Run(ctx); err != nil { logger.Errorw("Listener has finished with an error", zap.Error(err)) } else { diff --git a/config.example.yml b/config.example.yml index 934508da..4877f16f 100644 --- a/config.example.yml +++ b/config.example.yml @@ -9,16 +9,6 @@ icingaweb2-url: http://localhost/icingaweb2/ channel-plugin-dir: /usr/libexec/icinga-notifications/channel -icinga2-apis: - - notifications-event-source-id: 1 - host: https://localhost:5665 - auth-user: root - auth-pass: icinga - # The Icinga 2 API CA must either be in the system's CA store, be passed as - # icinga-ca-file or certificate verification can be disabled. - # icinga-ca-file: /path/to/icinga-ca.crt - # insecure-tls: true - database: type: pgsql host: /run/postgresql diff --git a/internal/config/runtime.go b/internal/config/runtime.go index ad6dd2e3..ec2b9a35 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -25,6 +25,10 @@ type RuntimeConfig struct { // Accessing it requires a lock that is obtained with RLock() and released with RUnlock(). ConfigSet + // EventStreamLaunchFunc is a callback to launch an Event Stream API Client. + // This became necessary due to circular imports, either with the incident or eventstream package. + EventStreamLaunchFunc func(source *Source) + // pending contains changes to config objects that are to be applied to the embedded live config. pending ConfigSet @@ -36,8 +40,18 @@ type RuntimeConfig struct { mu sync.RWMutex } -func NewRuntimeConfig(db *icingadb.DB, logs *logging.Logging) *RuntimeConfig { - return &RuntimeConfig{db: db, logs: logs, logger: logs.GetChildLogger("runtime-updates")} +func NewRuntimeConfig( + esLaunch func(source *Source), + logs *logging.Logging, + db *icingadb.DB, +) *RuntimeConfig { + return &RuntimeConfig{ + EventStreamLaunchFunc: esLaunch, + + logs: logs, + logger: logs.GetChildLogger("runtime-updates"), + db: db, + } } type ConfigSet struct { @@ -167,9 +181,14 @@ func (r *RuntimeConfig) GetSourceFromCredentials(user, pass string, logger *logg return nil } + if !source.ListenerPasswordHash.Valid { + logger.Debugw("Cannot check credentials for source without a listener_password_hash", zap.Int64("id", sourceId)) + return nil + } + // If either PHP's PASSWORD_DEFAULT changes or Icinga Web 2 starts using something else, e.g., Argon2id, this will // return a descriptive error as the identifier does no longer match the bcrypt "$2y$". - err = bcrypt.CompareHashAndPassword([]byte(source.ListenerPasswordHash), []byte(pass)) + err = bcrypt.CompareHashAndPassword([]byte(source.ListenerPasswordHash.String), []byte(pass)) if errors.Is(err, bcrypt.ErrMismatchedHashAndPassword) { logger.Debugw("Invalid password for this source", zap.Int64("id", sourceId)) return nil diff --git a/internal/config/source.go b/internal/config/source.go index a851980f..4597fb16 100644 --- a/internal/config/source.go +++ b/internal/config/source.go @@ -2,17 +2,54 @@ package config import ( "context" + "github.com/icinga/icingadb/pkg/types" "github.com/jmoiron/sqlx" "go.uber.org/zap" ) +// SourceTypeIcinga2 represents the "icinga2" Source Type for Event Stream API sources. +const SourceTypeIcinga2 = "icinga2" + // Source entry within the ConfigSet to describe a source. type Source struct { ID int64 `db:"id"` Type string `db:"type"` Name string `db:"name"` - ListenerPasswordHash string `db:"listener_password_hash"` + ListenerPasswordHash types.String `db:"listener_password_hash"` + + Icinga2BaseURL types.String `db:"icinga2_base_url"` + Icinga2AuthUser types.String `db:"icinga2_auth_user"` + Icinga2AuthPass types.String `db:"icinga2_auth_pass"` + Icinga2CAPem types.String `db:"icinga2_ca_pem"` + Icinga2InsecureTLS types.Bool `db:"icinga2_insecure_tls"` + + // Icinga2SourceConf for Event Stream API sources, only if Source.Type == SourceTypeIcinga2. + Icinga2SourceCancel context.CancelFunc `db:"-" json:"-"` +} + +// fieldEquals checks if this Source's database fields are equal to those of another Source. +func (source *Source) fieldEquals(other *Source) bool { + boolEq := func(a, b types.Bool) bool { return (!a.Valid && !b.Valid) || (a.Bool == b.Bool) } + stringEq := func(a, b types.String) bool { return (!a.Valid && !b.Valid) || (a.String == b.String) } + + return source.ID == other.ID && + source.Type == other.Type && + source.Name == other.Name && + stringEq(source.ListenerPasswordHash, other.ListenerPasswordHash) && + stringEq(source.Icinga2BaseURL, other.Icinga2BaseURL) && + stringEq(source.Icinga2AuthUser, other.Icinga2AuthUser) && + stringEq(source.Icinga2AuthPass, other.Icinga2AuthPass) && + stringEq(source.Icinga2CAPem, other.Icinga2CAPem) && + boolEq(source.Icinga2InsecureTLS, other.Icinga2InsecureTLS) +} + +// stop this Source's worker; currently only Icinga Event Stream API Client. +func (source *Source) stop() { + if source.Type == SourceTypeIcinga2 && source.Icinga2SourceCancel != nil { + source.Icinga2SourceCancel() + source.Icinga2SourceCancel = nil + } } func (r *RuntimeConfig) fetchSources(ctx context.Context, tx *sqlx.Tx) error { @@ -34,12 +71,12 @@ func (r *RuntimeConfig) fetchSources(ctx context.Context, tx *sqlx.Tx) error { zap.String("type", s.Type), ) if sourcesById[s.ID] != nil { - sourceLogger.Warnw("ignoring duplicate config for source ID") - } else { - sourcesById[s.ID] = s - - sourceLogger.Debugw("loaded source config") + sourceLogger.Error("Ignoring duplicate config for source ID") + continue } + + sourcesById[s.ID] = s + sourceLogger.Debug("loaded source config") } if r.Sources != nil { @@ -62,16 +99,36 @@ func (r *RuntimeConfig) applyPendingSources() { } for id, pendingSource := range r.pending.Sources { - if pendingSource == nil { - r.logger.Infow("Source has been removed", - zap.Int64("id", r.Sources[id].ID), - zap.String("name", r.Sources[id].Name), - zap.String("type", r.Sources[id].Type)) + logger := r.logger.With(zap.Int64("id", id)) + currentSource := r.Sources[id] + // Compare the pending source with an optional existing source; instruct the Event Source Client, if necessary. + if pendingSource == nil && currentSource != nil { + logger.Info("Source has been removed") + + currentSource.stop() delete(r.Sources, id) + continue + } else if pendingSource != nil && currentSource != nil { + if currentSource.fieldEquals(pendingSource) { + continue + } + + logger.Info("Source has been updated") + currentSource.stop() + } else if pendingSource != nil && currentSource == nil { + logger.Info("Source has been added") } else { - r.Sources[id] = pendingSource + // Neither an active nor a pending source? + logger.Error("Cannot applying pending configuration: neither an active nor a pending source") + continue + } + + if pendingSource.Type == SourceTypeIcinga2 { + r.EventStreamLaunchFunc(pendingSource) } + + r.Sources[id] = pendingSource } r.pending.Sources = nil diff --git a/internal/daemon/config.go b/internal/daemon/config.go index 9d499f83..bd4c4983 100644 --- a/internal/daemon/config.go +++ b/internal/daemon/config.go @@ -8,21 +8,11 @@ import ( "os" ) -type Icinga2ApiConfig struct { - NotificationsEventSourceId int64 `yaml:"notifications-event-source-id"` - Host string `yaml:"host"` - AuthUser string `yaml:"auth-user"` - AuthPass string `yaml:"auth-pass"` - IcingaCaFile string `yaml:"icinga-ca-file"` - InsecureTls bool `yaml:"insecure-tls"` -} - type ConfigFile struct { Listen string `yaml:"listen" default:"localhost:5680"` DebugPassword string `yaml:"debug-password"` ChannelPluginDir string `yaml:"channel-plugin-dir" default:"/usr/libexec/icinga-notifications/channel"` Icingaweb2URL string `yaml:"icingaweb2-url"` - Icinga2Apis []Icinga2ApiConfig `yaml:"icinga2-apis"` Database icingadbConfig.Database `yaml:"database"` Logging icingadbConfig.Logging `yaml:"logging"` } diff --git a/internal/eventstream/client.go b/internal/eventstream/client.go index b44eb674..d8e49e0e 100644 --- a/internal/eventstream/client.go +++ b/internal/eventstream/client.go @@ -2,21 +2,11 @@ package eventstream import ( "context" - "crypto/tls" - "crypto/x509" - "errors" - "fmt" - "github.com/icinga/icinga-notifications/internal/config" - "github.com/icinga/icinga-notifications/internal/daemon" "github.com/icinga/icinga-notifications/internal/event" - "github.com/icinga/icinga-notifications/internal/incident" - "github.com/icinga/icingadb/pkg/icingadb" - "github.com/icinga/icingadb/pkg/logging" "go.uber.org/zap" "golang.org/x/sync/errgroup" "net/http" "net/url" - "os" "time" ) @@ -39,21 +29,23 @@ type eventMsg struct { // the Client executes a worker within its own goroutine, which dispatches event.Event to the CallbackFn and enforces // order during catching up after (re-)connections. type Client struct { - // ApiHost et al. configure where and how the Icinga 2 API can be reached. - ApiHost string + // ApiBaseURL et al. configure where and how the Icinga 2 API can be reached. + ApiBaseURL string ApiBasicAuthUser string ApiBasicAuthPass string ApiHttpTransport http.Transport - // IcingaNotificationsEventSourceId to be reflected in generated event.Events. - IcingaNotificationsEventSourceId int64 + // EventSourceId to be reflected in generated event.Events. + EventSourceId int64 // IcingaWebRoot points to the Icinga Web 2 endpoint for generated URLs. IcingaWebRoot string // CallbackFn receives generated event.Event objects. CallbackFn func(*event.Event) - // Ctx for all web requests as well as internal wait loops. - Ctx context.Context + // Ctx for all web requests as well as internal wait loops. The CtxCancel can be used to stop this Client. + // Both fields are being populated with a new context from the NewClientFromConfig function. + Ctx context.Context + CtxCancel context.CancelFunc // Logger to log to. Logger *zap.SugaredLogger @@ -63,87 +55,6 @@ type Client struct { catchupPhaseRequest chan struct{} } -// NewClientsFromConfig returns all Clients defined in the conf.ConfigFile. -// -// Those are prepared and just needed to be started by calling their Client.Process method. -func NewClientsFromConfig( - ctx context.Context, - logs *logging.Logging, - db *icingadb.DB, - runtimeConfig *config.RuntimeConfig, - conf *daemon.ConfigFile, -) ([]*Client, error) { - clients := make([]*Client, 0, len(conf.Icinga2Apis)) - - for _, icinga2Api := range conf.Icinga2Apis { - logger := logs.GetChildLogger("eventstream").With(zap.Int64("source-id", icinga2Api.NotificationsEventSourceId)) - client := &Client{ - ApiHost: icinga2Api.Host, - ApiBasicAuthUser: icinga2Api.AuthUser, - ApiBasicAuthPass: icinga2Api.AuthPass, - ApiHttpTransport: http.Transport{ - // Hardened TLS config adjusted to Icinga 2's configuration: - // - https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-apilistener - // - https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#security - // - https://ssl-config.mozilla.org/#server=go&config=intermediate - TLSClientConfig: &tls.Config{ - MinVersion: tls.VersionTLS12, - CipherSuites: []uint16{ - tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, - tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, - tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, - tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, - tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, - tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, - }, - }, - }, - - IcingaNotificationsEventSourceId: icinga2Api.NotificationsEventSourceId, - IcingaWebRoot: conf.Icingaweb2URL, - - CallbackFn: func(ev *event.Event) { - l := logger.With(zap.Stringer("event", ev)) - - err := incident.ProcessEvent(ctx, db, logs, runtimeConfig, ev) - switch { - case errors.Is(err, incident.ErrSuperfluousStateChange): - l.Debugw("Stopped processing event with superfluous state change", zap.Error(err)) - case err != nil: - l.Errorw("Cannot process event", zap.Error(err)) - default: - l.Debug("Successfully processed event over callback") - } - }, - Ctx: ctx, - Logger: logger, - } - - if icinga2Api.IcingaCaFile != "" { - caData, err := os.ReadFile(icinga2Api.IcingaCaFile) - if err != nil { - return nil, fmt.Errorf("cannot read CA file %q for Event Stream ID %d, %w", - icinga2Api.IcingaCaFile, icinga2Api.NotificationsEventSourceId, err) - } - - certPool := x509.NewCertPool() - if !certPool.AppendCertsFromPEM(caData) { - return nil, fmt.Errorf("cannot add custom CA file to CA pool for Event Stream ID %d, %w", - icinga2Api.NotificationsEventSourceId, err) - } - - client.ApiHttpTransport.TLSClientConfig.RootCAs = certPool - } - - if icinga2Api.InsecureTls { - client.ApiHttpTransport.TLSClientConfig.InsecureSkipVerify = true - } - - clients = append(clients, client) - } - return clients, nil -} - // buildCommonEvent creates an event.Event based on Host and (optional) Service attributes to be specified later. // // The new Event's Time will be the current timestamp. @@ -206,7 +117,7 @@ func (client *Client) buildCommonEvent(ctx context.Context, host, service string return &event.Event{ Time: time.Now(), - SourceId: client.IcingaNotificationsEventSourceId, + SourceId: client.EventSourceId, Name: eventName, URL: eventUrl.String(), Tags: eventTags, diff --git a/internal/eventstream/client_api.go b/internal/eventstream/client_api.go index e76cdbb8..e632f8b1 100644 --- a/internal/eventstream/client_api.go +++ b/internal/eventstream/client_api.go @@ -55,7 +55,7 @@ func (client *Client) queryObjectsApi( body io.Reader, headers map[string]string, ) (io.ReadCloser, error) { - apiUrl, err := url.JoinPath(client.ApiHost, urlPaths...) + apiUrl, err := url.JoinPath(client.ApiBaseURL, urlPaths...) if err != nil { return nil, err } @@ -281,7 +281,7 @@ func (e *connectEventStreamReadCloser) Close() error { // // An error will only be returned if reconnecting - retrying the (almost) same thing - will not help. func (client *Client) connectEventStream(esTypes []string) (io.ReadCloser, error) { - apiUrl, err := url.JoinPath(client.ApiHost, "/v1/events") + apiUrl, err := url.JoinPath(client.ApiBaseURL, "/v1/events") if err != nil { return nil, err } diff --git a/internal/eventstream/launcher.go b/internal/eventstream/launcher.go new file mode 100644 index 00000000..a00baab3 --- /dev/null +++ b/internal/eventstream/launcher.go @@ -0,0 +1,138 @@ +package eventstream + +// This file contains the Launcher type to, well, launch new Event Stream Clients through a callback function. + +import ( + "context" + "crypto/tls" + "crypto/x509" + "errors" + "github.com/icinga/icinga-notifications/internal/config" + "github.com/icinga/icinga-notifications/internal/daemon" + "github.com/icinga/icinga-notifications/internal/event" + "github.com/icinga/icinga-notifications/internal/incident" + "github.com/icinga/icingadb/pkg/icingadb" + "github.com/icinga/icingadb/pkg/logging" + "go.uber.org/zap" + "net/http" + "sync" +) + +// Launcher allows starting a new Event Stream API Client through a callback from within the config package. +// +// This architecture became kind of necessary to work around circular imports due to the RuntimeConfig's omnipresence. +type Launcher struct { + Ctx context.Context + Logs *logging.Logging + Db *icingadb.DB + RuntimeConfig *config.RuntimeConfig + + mutex sync.Mutex + isReady bool + waitingSources []*config.Source +} + +// Launch either directly launches an Event Stream Client for this Source or enqueues it until the Launcher is Ready. +func (launcher *Launcher) Launch(src *config.Source) { + launcher.mutex.Lock() + defer launcher.mutex.Unlock() + + if !launcher.isReady { + launcher.Logs.GetChildLogger("eventstream"). + With(zap.Int64("source-id", src.ID)). + Debug("Postponing Event Stream Client Launch as Launcher is not ready yet") + launcher.waitingSources = append(launcher.waitingSources, src) + return + } + + launcher.launch(src) +} + +// Ready marks the Launcher as ready and launches all enqueued, postponed Sources. +func (launcher *Launcher) Ready() { + launcher.mutex.Lock() + defer launcher.mutex.Unlock() + + launcher.isReady = true + for _, src := range launcher.waitingSources { + launcher.Logs.GetChildLogger("eventstream"). + With(zap.Int64("source-id", src.ID)). + Debug("Launching postponed Event Stream Client") + launcher.launch(src) + } + launcher.waitingSources = nil +} + +// launch a new Event Stream API Client based on the Icinga2Source configuration. +func (launcher *Launcher) launch(src *config.Source) { + logger := launcher.Logs.GetChildLogger("eventstream").With(zap.Int64("source-id", src.ID)) + + if src.Type != config.SourceTypeIcinga2 || + !src.Icinga2BaseURL.Valid || + !src.Icinga2AuthUser.Valid || + !src.Icinga2AuthPass.Valid { + logger.Error("Source is either not of type icinga2 or not fully populated") + return + } + + subCtx, subCtxCancel := context.WithCancel(launcher.Ctx) + client := &Client{ + ApiBaseURL: src.Icinga2BaseURL.String, + ApiBasicAuthUser: src.Icinga2AuthUser.String, + ApiBasicAuthPass: src.Icinga2AuthPass.String, + ApiHttpTransport: http.Transport{ + // Hardened TLS config adjusted to Icinga 2's configuration: + // - https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-apilistener + // - https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#security + // - https://ssl-config.mozilla.org/#server=go&config=intermediate + TLSClientConfig: &tls.Config{ + MinVersion: tls.VersionTLS12, + CipherSuites: []uint16{ + tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, + tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, + tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, + }, + }, + }, + + EventSourceId: src.ID, + IcingaWebRoot: daemon.Config().Icingaweb2URL, + + CallbackFn: func(ev *event.Event) { + l := logger.With(zap.Stringer("event", ev)) + + err := incident.ProcessEvent(subCtx, launcher.Db, launcher.Logs, launcher.RuntimeConfig, ev) + switch { + case errors.Is(err, incident.ErrSuperfluousStateChange): + l.Debugw("Stopped processing event with superfluous state change", zap.Error(err)) + case err != nil: + l.Errorw("Cannot process event", zap.Error(err)) + default: + l.Debug("Successfully processed event over callback") + } + }, + Ctx: subCtx, + CtxCancel: subCtxCancel, + Logger: logger, + } + + if src.Icinga2CAPem.Valid { + certPool := x509.NewCertPool() + if !certPool.AppendCertsFromPEM([]byte(src.Icinga2CAPem.String)) { + logger.Error("Cannot add custom CA file to CA pool") + return + } + + client.ApiHttpTransport.TLSClientConfig.RootCAs = certPool + } + + if src.Icinga2InsecureTLS.Valid && src.Icinga2InsecureTLS.Bool { + client.ApiHttpTransport.TLSClientConfig.InsecureSkipVerify = true + } + + go client.Process() + src.Icinga2SourceCancel = subCtxCancel +} diff --git a/schema/pgsql/schema.sql b/schema/pgsql/schema.sql index a5abddf0..5bd4f7d5 100644 --- a/schema/pgsql/schema.sql +++ b/schema/pgsql/schema.sql @@ -124,21 +124,30 @@ CREATE TABLE schedule_member ( CREATE TABLE source ( id bigserial, + -- The type "icinga2" is special and requires (at least some of) the icinga2_ prefixed columns. type text NOT NULL, name text NOT NULL, -- will likely need a distinguishing value for multiple sources of the same type in the future, like for example -- the Icinga DB environment ID for Icinga 2 sources - -- listener_password_hash is required to limit API access for incoming connections to the Listener. The username is - -- "source-${id}", allowing an early verification before having to parse the POSTed event. - -- - -- This behavior might change in the future to become "type"-dependable. - listener_password_hash text NOT NULL, + -- The column listener_password_hash is type-dependent. + -- If type is not "icinga2", listener_password_hash is required to limit API access for incoming connections + -- to the Listener. The username will be "source-${id}", allowing early verification. + listener_password_hash text, + + -- Following columns are for the "icinga2" type. + -- At least icinga2_base_url, icinga2_auth_user, and icinga2_auth_pass are required - see CHECK below. + icinga2_base_url text, + icinga2_auth_user text, + icinga2_auth_pass text, + icinga2_ca_pem text, + icinga2_insecure_tls boolenum NOT NULL DEFAULT 'n', -- The hash is a PHP password_hash with PASSWORD_DEFAULT algorithm, defaulting to bcrypt. This check roughly ensures -- that listener_password_hash can only be populated with bcrypt hashes. -- https://icinga.com/docs/icinga-web/latest/doc/20-Advanced-Topics/#manual-user-creation-for-database-authentication-backend - CHECK (listener_password_hash LIKE '$2y$%'), + CHECK (listener_password_hash IS NULL OR listener_password_hash LIKE '$2y$%'), + CHECK (type != 'icinga2' OR (icinga2_base_url IS NOT NULL AND icinga2_auth_user IS NOT NULL AND icinga2_auth_pass IS NOT NULL)), CONSTRAINT pk_source PRIMARY KEY (id) ); diff --git a/schema/pgsql/upgrades/022.sql b/schema/pgsql/upgrades/022.sql new file mode 100644 index 00000000..3b26a161 --- /dev/null +++ b/schema/pgsql/upgrades/022.sql @@ -0,0 +1,22 @@ +ALTER TABLE source + ALTER COLUMN listener_password_hash DROP NOT NULL, + + ADD COLUMN icinga2_base_url text, + ADD COLUMN icinga2_auth_user text, + ADD COLUMN icinga2_auth_pass text, + ADD COLUMN icinga2_ca_pem text, + ADD COLUMN icinga2_insecure_tls boolenum NOT NULL DEFAULT 'n', + + DROP CONSTRAINT source_listener_password_hash_check; + +-- NOTE: Change those defaults as they most likely don't work with your installation! +UPDATE source + SET icinga2_base_url = 'https://localhost:5665/', + icinga2_auth_user = 'root', + icinga2_auth_pass = 'icinga', + icinga2_insecure_tls = 'y' + WHERE type = 'icinga2'; + +ALTER TABLE source + ADD CHECK (listener_password_hash IS NULL OR listener_password_hash LIKE '$2y$%'), + ADD CHECK (type != 'icinga2' OR (icinga2_base_url IS NOT NULL AND icinga2_auth_user IS NOT NULL AND icinga2_auth_pass IS NOT NULL)); From 902cdd7872e4ce1e7a0727e279308f75b5070fff Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 5 Jan 2024 10:28:17 +0100 Subject: [PATCH 55/65] icinga2: rename eventstream package to icinga2 --- cmd/icinga-notifications-daemon/main.go | 10 +++++----- internal/config/runtime.go | 2 +- internal/{eventstream => icinga2}/api_responses.go | 2 +- .../{eventstream => icinga2}/api_responses_test.go | 2 +- internal/{eventstream => icinga2}/client.go | 2 +- internal/{eventstream => icinga2}/client_api.go | 2 +- internal/{eventstream => icinga2}/launcher.go | 14 +++++++------- internal/{eventstream => icinga2}/util.go | 2 +- internal/{eventstream => icinga2}/util_test.go | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) rename internal/{eventstream => icinga2}/api_responses.go (99%) rename internal/{eventstream => icinga2}/api_responses_test.go (99%) rename internal/{eventstream => icinga2}/client.go (99%) rename internal/{eventstream => icinga2}/client_api.go (99%) rename internal/{eventstream => icinga2}/launcher.go (88%) rename internal/{eventstream => icinga2}/util.go (97%) rename internal/{eventstream => icinga2}/util_test.go (96%) diff --git a/cmd/icinga-notifications-daemon/main.go b/cmd/icinga-notifications-daemon/main.go index d8cbacd8..c5688933 100644 --- a/cmd/icinga-notifications-daemon/main.go +++ b/cmd/icinga-notifications-daemon/main.go @@ -8,7 +8,7 @@ import ( "github.com/icinga/icinga-notifications/internal/channel" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" - "github.com/icinga/icinga-notifications/internal/eventstream" + "github.com/icinga/icinga-notifications/internal/icinga2" "github.com/icinga/icinga-notifications/internal/incident" "github.com/icinga/icinga-notifications/internal/listener" "github.com/icinga/icingadb/pkg/logging" @@ -86,19 +86,19 @@ func main() { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() - esLauncher := &eventstream.Launcher{ + icinga2Launcher := &icinga2.Launcher{ Ctx: ctx, Logs: logs, Db: db, RuntimeConfig: nil, // Will be set below as it is interconnected.. } - runtimeConfig := config.NewRuntimeConfig(esLauncher.Launch, logs, db) + runtimeConfig := config.NewRuntimeConfig(icinga2Launcher.Launch, logs, db) if err := runtimeConfig.UpdateFromDatabase(ctx); err != nil { logger.Fatalw("failed to load config from database", zap.Error(err)) } - esLauncher.RuntimeConfig = runtimeConfig + icinga2Launcher.RuntimeConfig = runtimeConfig go runtimeConfig.PeriodicUpdates(ctx, 1*time.Second) @@ -108,7 +108,7 @@ func main() { } // Wait to load open incidents from the database before either starting Event Stream Clients or starting the Listener. - esLauncher.Ready() + icinga2Launcher.Ready() if err := listener.NewListener(db, runtimeConfig, logs).Run(ctx); err != nil { logger.Errorw("Listener has finished with an error", zap.Error(err)) } else { diff --git a/internal/config/runtime.go b/internal/config/runtime.go index ec2b9a35..acd74403 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -26,7 +26,7 @@ type RuntimeConfig struct { ConfigSet // EventStreamLaunchFunc is a callback to launch an Event Stream API Client. - // This became necessary due to circular imports, either with the incident or eventstream package. + // This became necessary due to circular imports, either with the incident or icinga2 package. EventStreamLaunchFunc func(source *Source) // pending contains changes to config objects that are to be applied to the embedded live config. diff --git a/internal/eventstream/api_responses.go b/internal/icinga2/api_responses.go similarity index 99% rename from internal/eventstream/api_responses.go rename to internal/icinga2/api_responses.go index 380fb894..1be57f7d 100644 --- a/internal/eventstream/api_responses.go +++ b/internal/icinga2/api_responses.go @@ -1,4 +1,4 @@ -package eventstream +package icinga2 import ( "encoding/json" diff --git a/internal/eventstream/api_responses_test.go b/internal/icinga2/api_responses_test.go similarity index 99% rename from internal/eventstream/api_responses_test.go rename to internal/icinga2/api_responses_test.go index 6a3832fb..45e762e2 100644 --- a/internal/eventstream/api_responses_test.go +++ b/internal/icinga2/api_responses_test.go @@ -1,4 +1,4 @@ -package eventstream +package icinga2 import ( "encoding/json" diff --git a/internal/eventstream/client.go b/internal/icinga2/client.go similarity index 99% rename from internal/eventstream/client.go rename to internal/icinga2/client.go index d8e49e0e..851e142a 100644 --- a/internal/eventstream/client.go +++ b/internal/icinga2/client.go @@ -1,4 +1,4 @@ -package eventstream +package icinga2 import ( "context" diff --git a/internal/eventstream/client_api.go b/internal/icinga2/client_api.go similarity index 99% rename from internal/eventstream/client_api.go rename to internal/icinga2/client_api.go index e632f8b1..3c4c987e 100644 --- a/internal/eventstream/client_api.go +++ b/internal/icinga2/client_api.go @@ -1,4 +1,4 @@ -package eventstream +package icinga2 import ( "bufio" diff --git a/internal/eventstream/launcher.go b/internal/icinga2/launcher.go similarity index 88% rename from internal/eventstream/launcher.go rename to internal/icinga2/launcher.go index a00baab3..007f036d 100644 --- a/internal/eventstream/launcher.go +++ b/internal/icinga2/launcher.go @@ -1,4 +1,4 @@ -package eventstream +package icinga2 // This file contains the Launcher type to, well, launch new Event Stream Clients through a callback function. @@ -18,7 +18,7 @@ import ( "sync" ) -// Launcher allows starting a new Event Stream API Client through a callback from within the config package. +// Launcher allows starting a new Icinga 2 Event Stream API Client through a callback from within the config package. // // This architecture became kind of necessary to work around circular imports due to the RuntimeConfig's omnipresence. type Launcher struct { @@ -32,13 +32,13 @@ type Launcher struct { waitingSources []*config.Source } -// Launch either directly launches an Event Stream Client for this Source or enqueues it until the Launcher is Ready. +// Launch either directly launches an Icinga 2 Event Stream Client for this Source or enqueues it until the Launcher is Ready. func (launcher *Launcher) Launch(src *config.Source) { launcher.mutex.Lock() defer launcher.mutex.Unlock() if !launcher.isReady { - launcher.Logs.GetChildLogger("eventstream"). + launcher.Logs.GetChildLogger("icinga2"). With(zap.Int64("source-id", src.ID)). Debug("Postponing Event Stream Client Launch as Launcher is not ready yet") launcher.waitingSources = append(launcher.waitingSources, src) @@ -55,7 +55,7 @@ func (launcher *Launcher) Ready() { launcher.isReady = true for _, src := range launcher.waitingSources { - launcher.Logs.GetChildLogger("eventstream"). + launcher.Logs.GetChildLogger("icinga2"). With(zap.Int64("source-id", src.ID)). Debug("Launching postponed Event Stream Client") launcher.launch(src) @@ -63,9 +63,9 @@ func (launcher *Launcher) Ready() { launcher.waitingSources = nil } -// launch a new Event Stream API Client based on the Icinga2Source configuration. +// launch a new Icinga 2 Event Stream API Client based on the config.Source configuration. func (launcher *Launcher) launch(src *config.Source) { - logger := launcher.Logs.GetChildLogger("eventstream").With(zap.Int64("source-id", src.ID)) + logger := launcher.Logs.GetChildLogger("icinga2").With(zap.Int64("source-id", src.ID)) if src.Type != config.SourceTypeIcinga2 || !src.Icinga2BaseURL.Valid || diff --git a/internal/eventstream/util.go b/internal/icinga2/util.go similarity index 97% rename from internal/eventstream/util.go rename to internal/icinga2/util.go index 06a9082a..3cd772ab 100644 --- a/internal/eventstream/util.go +++ b/internal/icinga2/util.go @@ -1,4 +1,4 @@ -package eventstream +package icinga2 import ( "net/url" diff --git a/internal/eventstream/util_test.go b/internal/icinga2/util_test.go similarity index 96% rename from internal/eventstream/util_test.go rename to internal/icinga2/util_test.go index 2f531106..d7e4c73e 100644 --- a/internal/eventstream/util_test.go +++ b/internal/icinga2/util_test.go @@ -1,4 +1,4 @@ -package eventstream +package icinga2 import ( "github.com/stretchr/testify/assert" From 425a6a0a109c68a33d34018b37442b40c0d2375d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 5 Jan 2024 10:47:43 +0100 Subject: [PATCH 56/65] icinga2: rework Icinga2Time to UnixFloat This refactoring converges this representation of the Icinga 2 API Unix timestamp to those of Icinga DB. Eventually, those common or similar code will be extracted into the icinga-go-library. --- internal/icinga2/api_responses.go | 90 +++++++++++------------ internal/icinga2/api_responses_test.go | 99 +++++++++++++------------- internal/icinga2/client_api.go | 19 ++--- 3 files changed, 106 insertions(+), 102 deletions(-) diff --git a/internal/icinga2/api_responses.go b/internal/icinga2/api_responses.go index 1be57f7d..afcfdaab 100644 --- a/internal/icinga2/api_responses.go +++ b/internal/icinga2/api_responses.go @@ -7,19 +7,21 @@ import ( "time" ) -// Icinga2Time is a custom time.Time type for JSON unmarshalling from Icinga 2's unix timestamp type. -type Icinga2Time struct { - time.Time +// UnixFloat is a custom time.Time type for millisecond Unix timestamp, as used in Icinga 2's API. +type UnixFloat time.Time + +// Time returns the time.Time of UnixFloat. +func (t *UnixFloat) Time() time.Time { + return time.Time(*t) } -func (iciTime *Icinga2Time) UnmarshalJSON(data []byte) error { +func (t *UnixFloat) UnmarshalJSON(data []byte) error { unixTs, err := strconv.ParseFloat(string(data), 64) if err != nil { return err } - unixMicro := int64(unixTs * 1_000_000) - iciTime.Time = time.UnixMicro(unixMicro) + *t = UnixFloat(time.UnixMicro(int64(unixTs * 1_000_000))) return nil } @@ -31,23 +33,23 @@ func (iciTime *Icinga2Time) UnmarshalJSON(data []byte) error { // // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-comment type Comment struct { - Host string `json:"host_name"` - Service string `json:"service_name"` - Author string `json:"author"` - Text string `json:"text"` - EntryTime Icinga2Time `json:"entry_time"` - EntryType int `json:"entry_type"` + Host string `json:"host_name"` + Service string `json:"service_name"` + Author string `json:"author"` + Text string `json:"text"` + EntryTime UnixFloat `json:"entry_time"` + EntryType int `json:"entry_type"` } // CheckResult represents the Icinga 2 API CheckResult object. // // https://icinga.com/docs/icinga-2/latest/doc/08-advanced-topics/#advanced-value-types-checkresult type CheckResult struct { - ExitStatus int `json:"exit_status"` - Output string `json:"output"` - State int `json:"state"` - ExecutionStart Icinga2Time `json:"execution_start"` - ExecutionEnd Icinga2Time `json:"execution_end"` + ExitStatus int `json:"exit_status"` + Output string `json:"output"` + State int `json:"state"` + ExecutionStart UnixFloat `json:"execution_start"` + ExecutionEnd UnixFloat `json:"execution_end"` } // Downtime represents the Icinga 2 API Downtime object. @@ -82,10 +84,10 @@ type HostServiceRuntimeAttributes struct { Groups []string `json:"groups"` State int `json:"state"` LastCheckResult CheckResult `json:"last_check_result"` - LastStateChange Icinga2Time `json:"last_state_change"` + LastStateChange UnixFloat `json:"last_state_change"` DowntimeDepth int `json:"downtime_depth"` Acknowledgement int `json:"acknowledgement"` - AcknowledgementLastChange Icinga2Time `json:"acknowledgement_last_change"` + AcknowledgementLastChange UnixFloat `json:"acknowledgement_last_change"` } // ObjectQueriesResult represents the Icinga 2 API Object Queries Result wrapper object. @@ -120,7 +122,7 @@ const ( // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-statechange type StateChange struct { - Timestamp Icinga2Time `json:"timestamp"` + Timestamp UnixFloat `json:"timestamp"` Host string `json:"host"` Service string `json:"service"` State int `json:"state"` @@ -139,13 +141,13 @@ type StateChange struct { // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementset type AcknowledgementSet struct { - Timestamp Icinga2Time `json:"timestamp"` - Host string `json:"host"` - Service string `json:"service"` - State int `json:"state"` - StateType int `json:"state_type"` - Author string `json:"author"` - Comment string `json:"comment"` + Timestamp UnixFloat `json:"timestamp"` + Host string `json:"host"` + Service string `json:"service"` + State int `json:"state"` + StateType int `json:"state_type"` + Author string `json:"author"` + Comment string `json:"comment"` } // AcknowledgementCleared represents the Icinga 2 API Event Stream AcknowledgementCleared response for acknowledgements cleared on hosts/services. @@ -157,59 +159,59 @@ type AcknowledgementSet struct { // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared type AcknowledgementCleared struct { - Timestamp Icinga2Time `json:"timestamp"` - Host string `json:"host"` - Service string `json:"service"` - State int `json:"state"` - StateType int `json:"state_type"` + Timestamp UnixFloat `json:"timestamp"` + Host string `json:"host"` + Service string `json:"service"` + State int `json:"state"` + StateType int `json:"state_type"` } // CommentAdded represents the Icinga 2 API Event Stream CommentAdded response for added host/service comments. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentadded type CommentAdded struct { - Timestamp Icinga2Time `json:"timestamp"` - Comment Comment `json:"comment"` + Timestamp UnixFloat `json:"timestamp"` + Comment Comment `json:"comment"` } // CommentRemoved represents the Icinga 2 API Event Stream CommentRemoved response for removed host/service comments. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentremoved type CommentRemoved struct { - Timestamp Icinga2Time `json:"timestamp"` - Comment Comment `json:"comment"` + Timestamp UnixFloat `json:"timestamp"` + Comment Comment `json:"comment"` } // DowntimeAdded represents the Icinga 2 API Event Stream DowntimeAdded response for added downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimeadded type DowntimeAdded struct { - Timestamp Icinga2Time `json:"timestamp"` - Downtime Downtime `json:"downtime"` + Timestamp UnixFloat `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // DowntimeRemoved represents the Icinga 2 API Event Stream DowntimeRemoved response for removed downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-commentremoved type DowntimeRemoved struct { - Timestamp Icinga2Time `json:"timestamp"` - Downtime Downtime `json:"downtime"` + Timestamp UnixFloat `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // DowntimeStarted represents the Icinga 2 API Event Stream DowntimeStarted response for started downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimestarted type DowntimeStarted struct { - Timestamp Icinga2Time `json:"timestamp"` - Downtime Downtime `json:"downtime"` + Timestamp UnixFloat `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // DowntimeTriggered represents the Icinga 2 API Event Stream DowntimeTriggered response for triggered downtimes on host/services. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-downtimetriggered type DowntimeTriggered struct { - Timestamp Icinga2Time `json:"timestamp"` - Downtime Downtime `json:"downtime"` + Timestamp UnixFloat `json:"timestamp"` + Downtime Downtime `json:"downtime"` } // UnmarshalEventStreamResponse unmarshal a JSON response line from the Icinga 2 API Event Stream. diff --git a/internal/icinga2/api_responses_test.go b/internal/icinga2/api_responses_test.go index 45e762e2..25a5f9fa 100644 --- a/internal/icinga2/api_responses_test.go +++ b/internal/icinga2/api_responses_test.go @@ -2,17 +2,18 @@ package icinga2 import ( "encoding/json" - "github.com/stretchr/testify/assert" "testing" "time" + + "github.com/stretchr/testify/assert" ) -func TestIcinga2Time_UnmarshalJSON(t *testing.T) { +func TestUnixFloat_UnmarshalJSON(t *testing.T) { tests := []struct { name string jsonData string isError bool - expected Icinga2Time + expected UnixFloat }{ { name: "json-empty", @@ -32,30 +33,30 @@ func TestIcinga2Time_UnmarshalJSON(t *testing.T) { { name: "epoch-time", jsonData: "0.0", - expected: Icinga2Time{time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)}, + expected: UnixFloat(time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)), }, { name: "example-time", jsonData: "1697207144.746333", - expected: Icinga2Time{time.Date(2023, time.October, 13, 14, 25, 44, 746333000, time.UTC)}, + expected: UnixFloat(time.Date(2023, time.October, 13, 14, 25, 44, 746333000, time.UTC)), }, { name: "example-time-location", jsonData: "1697207144.746333", - expected: Icinga2Time{time.Date(2023, time.October, 13, 16, 25, 44, 746333000, - time.FixedZone("Europe/Berlin summer", 2*60*60))}, + expected: UnixFloat(time.Date(2023, time.October, 13, 16, 25, 44, 746333000, + time.FixedZone("Europe/Berlin summer", 2*60*60))), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - var ici2time Icinga2Time + var ici2time UnixFloat err := json.Unmarshal([]byte(test.jsonData), &ici2time) assert.Equal(t, test.isError, err != nil, "unexpected error state; %v", err) if err != nil { return } - assert.WithinDuration(t, test.expected.Time, ici2time.Time, time.Duration(0)) + assert.WithinDuration(t, test.expected.Time(), ici2time.Time(), time.Duration(0)) }) } } @@ -95,7 +96,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Host: "dummy-0", Author: "icingaadmin", Text: "foo bar", - EntryTime: Icinga2Time{time.UnixMicro(1697454753536457)}, + EntryTime: UnixFloat(time.UnixMicro(1697454753536457)), EntryType: 1, }, }, @@ -114,7 +115,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Author: "icingaadmin", Text: "adfadsfasdfasdf", EntryType: 1, - EntryTime: Icinga2Time{time.UnixMicro(1697197701307516)}, + EntryTime: UnixFloat(time.UnixMicro(1697197701307516)), }, }, }, @@ -165,13 +166,13 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { ExitStatus: 0, Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", State: 0, - ExecutionStart: Icinga2Time{time.UnixMicro(1697459643863147)}, - ExecutionEnd: Icinga2Time{time.UnixMicro(1697459643868893)}, + ExecutionStart: UnixFloat(time.UnixMicro(1697459643863147)), + ExecutionEnd: UnixFloat(time.UnixMicro(1697459643868893)), }, - LastStateChange: Icinga2Time{time.UnixMicro(1697099900637215)}, + LastStateChange: UnixFloat(time.UnixMicro(1697099900637215)), DowntimeDepth: 0, Acknowledgement: 0, - AcknowledgementLastChange: Icinga2Time{time.UnixMicro(0)}, + AcknowledgementLastChange: UnixFloat(time.UnixMicro(0)), }, }, }, @@ -192,13 +193,13 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { ExitStatus: 2, Output: "connect to address 127.0.0.1 and port 22: Connection refused", State: 2, - ExecutionStart: Icinga2Time{time.UnixMicro(1697460711130247)}, - ExecutionEnd: Icinga2Time{time.UnixMicro(1697460711134875)}, + ExecutionStart: UnixFloat(time.UnixMicro(1697460711130247)), + ExecutionEnd: UnixFloat(time.UnixMicro(1697460711134875)), }, - LastStateChange: Icinga2Time{time.UnixMicro(1697099896120829)}, + LastStateChange: UnixFloat(time.UnixMicro(1697099896120829)), DowntimeDepth: 0, Acknowledgement: 1, - AcknowledgementLastChange: Icinga2Time{time.UnixMicro(1697460655878141)}, + AcknowledgementLastChange: UnixFloat(time.UnixMicro(1697460655878141)), }, }, }, @@ -219,13 +220,13 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { ExitStatus: 0, Output: "Icinga 2 has been running for 26 seconds. Version: v2.14.0-35-g31b1294ac", State: 0, - ExecutionStart: Icinga2Time{time.UnixMicro(1698673636068106)}, - ExecutionEnd: Icinga2Time{time.UnixMicro(1698673636071483)}, + ExecutionStart: UnixFloat(time.UnixMicro(1698673636068106)), + ExecutionEnd: UnixFloat(time.UnixMicro(1698673636071483)), }, - LastStateChange: Icinga2Time{time.UnixMicro(1697704135756310)}, + LastStateChange: UnixFloat(time.UnixMicro(1697704135756310)), DowntimeDepth: 0, Acknowledgement: 0, - AcknowledgementLastChange: Icinga2Time{time.UnixMicro(0)}, + AcknowledgementLastChange: UnixFloat(time.UnixMicro(0)), }, }, }, @@ -265,7 +266,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "statechange-host-valid", jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697188278.202986,"execution_start":1697188278.194409,"exit_status":2,"output":"If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway","performance_data":[],"previous_hard_state":99,"schedule_end":1697188278.203036,"schedule_start":1697188278.1938322,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":true,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":true,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-158","state":1,"state_type":0,"timestamp":1697188278.203504,"type":"StateChange"}`, expected: &StateChange{ - Timestamp: Icinga2Time{time.UnixMicro(1697188278203504)}, + Timestamp: UnixFloat(time.UnixMicro(1697188278203504)), Host: "dummy-158", State: 1, StateType: 0, @@ -273,8 +274,8 @@ func TestApiResponseUnmarshal(t *testing.T) { ExitStatus: 2, Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", State: 2, - ExecutionStart: Icinga2Time{time.UnixMicro(1697188278194409)}, - ExecutionEnd: Icinga2Time{time.UnixMicro(1697188278202986)}, + ExecutionStart: UnixFloat(time.UnixMicro(1697188278194409)), + ExecutionEnd: UnixFloat(time.UnixMicro(1697188278202986)), }, DowntimeDepth: 0, Acknowledgement: false, @@ -284,7 +285,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "statechange-service-valid", jsonData: `{"acknowledgement":false,"check_result":{"active":true,"check_source":"docker-master","command":["/bin/bash","-c","/usr/games/fortune; exit $0","2"],"execution_end":1697184778.611465,"execution_start":1697184778.600973,"exit_status":2,"output":"You're growing out of some of your problems, but there are others that\nyou're growing into.","performance_data":[],"previous_hard_state":0,"schedule_end":1697184778.611557,"schedule_start":1697184778.6,"scheduling_source":"docker-master","state":2,"ttl":0,"type":"CheckResult","vars_after":{"attempt":2,"reachable":false,"state":2,"state_type":0},"vars_before":{"attempt":1,"reachable":false,"state":2,"state_type":0}},"downtime_depth":0,"host":"dummy-280","service":"random fortune","state":2,"state_type":0,"timestamp":1697184778.612108,"type":"StateChange"}`, expected: &StateChange{ - Timestamp: Icinga2Time{time.UnixMicro(1697184778612108)}, + Timestamp: UnixFloat(time.UnixMicro(1697184778612108)), Host: "dummy-280", Service: "random fortune", State: 2, @@ -293,8 +294,8 @@ func TestApiResponseUnmarshal(t *testing.T) { ExitStatus: 2, Output: "You're growing out of some of your problems, but there are others that\nyou're growing into.", State: 2, - ExecutionStart: Icinga2Time{time.UnixMicro(1697184778600973)}, - ExecutionEnd: Icinga2Time{time.UnixMicro(1697184778611465)}, + ExecutionStart: UnixFloat(time.UnixMicro(1697184778600973)), + ExecutionEnd: UnixFloat(time.UnixMicro(1697184778611465)), }, DowntimeDepth: 0, Acknowledgement: false, @@ -304,7 +305,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementset-host", jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"working on it","expiry":0,"host":"dummy-805","notify":true,"persistent":false,"state":1,"state_type":1,"timestamp":1697201074.579106,"type":"AcknowledgementSet"}`, expected: &AcknowledgementSet{ - Timestamp: Icinga2Time{time.UnixMicro(1697201074579106)}, + Timestamp: UnixFloat(time.UnixMicro(1697201074579106)), Host: "dummy-805", State: 1, StateType: 1, @@ -316,7 +317,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementset-service", jsonData: `{"acknowledgement_type":1,"author":"icingaadmin","comment":"will be fixed soon","expiry":0,"host":"docker-master","notify":true,"persistent":false,"service":"ssh","state":2,"state_type":1,"timestamp":1697201107.64792,"type":"AcknowledgementSet"}`, expected: &AcknowledgementSet{ - Timestamp: Icinga2Time{time.UnixMicro(1697201107647920)}, + Timestamp: UnixFloat(time.UnixMicro(1697201107647920)), Host: "docker-master", Service: "ssh", State: 2, @@ -329,7 +330,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementcleared-host", jsonData: `{"acknowledgement_type":0,"host":"dummy-805","state":1,"state_type":1,"timestamp":1697201082.440148,"type":"AcknowledgementCleared"}`, expected: &AcknowledgementCleared{ - Timestamp: Icinga2Time{time.UnixMicro(1697201082440148)}, + Timestamp: UnixFloat(time.UnixMicro(1697201082440148)), Host: "dummy-805", State: 1, StateType: 1, @@ -339,7 +340,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "acknowledgementcleared-service", jsonData: `{"acknowledgement_type":0,"host":"docker-master","service":"ssh","state":2,"state_type":1,"timestamp":1697201110.220349,"type":"AcknowledgementCleared"}`, expected: &AcknowledgementCleared{ - Timestamp: Icinga2Time{time.UnixMicro(1697201110220349)}, + Timestamp: UnixFloat(time.UnixMicro(1697201110220349)), Host: "docker-master", Service: "ssh", State: 2, @@ -350,13 +351,13 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentadded-host", jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191791.099201,"type":"CommentAdded"}`, expected: &CommentAdded{ - Timestamp: Icinga2Time{time.UnixMicro(1697191791099201)}, + Timestamp: UnixFloat(time.UnixMicro(1697191791099201)), Comment: Comment{ Host: "dummy-912", Author: "icingaadmin", Text: "oh noes", EntryType: 1, - EntryTime: Icinga2Time{time.UnixMicro(1697191791097852)}, + EntryTime: UnixFloat(time.UnixMicro(1697191791097852)), }, }, }, @@ -364,14 +365,14 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentadded-service", jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197990.037244,"type":"CommentAdded"}`, expected: &CommentAdded{ - Timestamp: Icinga2Time{time.UnixMicro(1697197990037244)}, + Timestamp: UnixFloat(time.UnixMicro(1697197990037244)), Comment: Comment{ Host: "dummy-912", Service: "ping4", Author: "icingaadmin", Text: "if in doubt, check ticket #23", EntryType: 1, - EntryTime: Icinga2Time{time.UnixMicro(1697197990035889)}, + EntryTime: UnixFloat(time.UnixMicro(1697197990035889)), }, }, }, @@ -379,13 +380,13 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentremoved-host", jsonData: `{"comment":{"__name":"dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3","author":"icingaadmin","entry_time":1697191791.097852,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":1,"name":"f653e951-2210-432d-bca6-e3719ea74ca3","package":"_api","persistent":false,"service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!f653e951-2210-432d-bca6-e3719ea74ca3.conf"},"sticky":false,"templates":["f653e951-2210-432d-bca6-e3719ea74ca3"],"text":"oh noes","type":"Comment","version":1697191791.097867,"zone":"master"},"timestamp":1697191807.910093,"type":"CommentRemoved"}`, expected: &CommentRemoved{ - Timestamp: Icinga2Time{time.UnixMicro(1697191807910093)}, + Timestamp: UnixFloat(time.UnixMicro(1697191807910093)), Comment: Comment{ Host: "dummy-912", Author: "icingaadmin", Text: "oh noes", EntryType: 1, - EntryTime: Icinga2Time{time.UnixMicro(1697191791097852)}, + EntryTime: UnixFloat(time.UnixMicro(1697191791097852)), }, }, }, @@ -393,14 +394,14 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "commentremoved-service", jsonData: `{"comment":{"__name":"dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","author":"icingaadmin","entry_time":1697197990.035889,"entry_type":1,"expire_time":0,"host_name":"dummy-912","legacy_id":8,"name":"8c00fb6a-5948-4249-a9d5-d1b6eb8945d0","package":"_api","persistent":false,"service_name":"ping4","source_location":{"first_column":0,"first_line":1,"last_column":68,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/comments/dummy-912!ping4!8c00fb6a-5948-4249-a9d5-d1b6eb8945d0.conf"},"sticky":false,"templates":["8c00fb6a-5948-4249-a9d5-d1b6eb8945d0"],"text":"if in doubt, check ticket #23","type":"Comment","version":1697197990.035905,"zone":"master"},"timestamp":1697197996.584392,"type":"CommentRemoved"}`, expected: &CommentRemoved{ - Timestamp: Icinga2Time{time.UnixMicro(1697197996584392)}, + Timestamp: UnixFloat(time.UnixMicro(1697197996584392)), Comment: Comment{ Host: "dummy-912", Service: "ping4", Author: "icingaadmin", Text: "if in doubt, check ticket #23", EntryType: 1, - EntryTime: Icinga2Time{time.UnixMicro(1697197990035889)}, + EntryTime: UnixFloat(time.UnixMicro(1697197990035889)), }, }, }, @@ -408,7 +409,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeadded-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511293,"type":"DowntimeAdded"}`, expected: &DowntimeAdded{ - Timestamp: Icinga2Time{time.UnixMicro(1697207050511293)}, + Timestamp: UnixFloat(time.UnixMicro(1697207050511293)), Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -420,7 +421,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeadded-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217425,"type":"DowntimeAdded"}`, expected: &DowntimeAdded{ - Timestamp: Icinga2Time{time.UnixMicro(1697207141217425)}, + Timestamp: UnixFloat(time.UnixMicro(1697207141217425)), Downtime: Downtime{ Host: "docker-master", Service: "http", @@ -433,7 +434,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimestarted-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511378,"type":"DowntimeStarted"}`, expected: &DowntimeStarted{ - Timestamp: Icinga2Time{time.UnixMicro(1697207050511378)}, + Timestamp: UnixFloat(time.UnixMicro(1697207050511378)), Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -445,7 +446,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimestarted-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":0,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217507,"type":"DowntimeStarted"}`, expected: &DowntimeStarted{ - Timestamp: Icinga2Time{time.UnixMicro(1697207141217507)}, + Timestamp: UnixFloat(time.UnixMicro(1697207141217507)), Downtime: Downtime{ Host: "docker-master", Service: "http", @@ -458,7 +459,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimetriggered-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207050.511608,"type":"DowntimeTriggered"}`, expected: &DowntimeTriggered{ - Timestamp: Icinga2Time{time.UnixMicro(1697207050511608)}, + Timestamp: UnixFloat(time.UnixMicro(1697207050511608)), Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -470,7 +471,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimetriggered-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":0,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207141.217726,"type":"DowntimeTriggered"}`, expected: &DowntimeTriggered{ - Timestamp: Icinga2Time{time.UnixMicro(1697207141217726)}, + Timestamp: UnixFloat(time.UnixMicro(1697207141217726)), Downtime: Downtime{ Host: "docker-master", Service: "http", @@ -483,7 +484,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeremoved-host", jsonData: `{"downtime":{"__name":"dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","author":"icingaadmin","authoritative_zone":"","comment":"updates","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210639,"entry_time":1697207050.509957,"fixed":true,"host_name":"dummy-157","legacy_id":3,"name":"e5d4d4ac-615a-4995-ab8f-09d9cd9503b1","package":"_api","parent":"","remove_time":1697207096.187718,"scheduled_by":"","service_name":"","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/dummy-157!e5d4d4ac-615a-4995-ab8f-09d9cd9503b1.conf"},"start_time":1697207039,"templates":["e5d4d4ac-615a-4995-ab8f-09d9cd9503b1"],"trigger_time":1697207050.509957,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207050.509971,"zone":"master"},"timestamp":1697207096.187866,"type":"DowntimeRemoved"}`, expected: &DowntimeRemoved{ - Timestamp: Icinga2Time{time.UnixMicro(1697207096187866)}, + Timestamp: UnixFloat(time.UnixMicro(1697207096187866)), Downtime: Downtime{ Host: "dummy-157", Author: "icingaadmin", @@ -495,7 +496,7 @@ func TestApiResponseUnmarshal(t *testing.T) { name: "downtimeremoved-service", jsonData: `{"downtime":{"__name":"docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f","author":"icingaadmin","authoritative_zone":"","comment":"broken until Monday","config_owner":"","config_owner_hash":"","duration":0,"end_time":1697210716,"entry_time":1697207141.216009,"fixed":true,"host_name":"docker-master","legacy_id":4,"name":"3dabe7e7-32b2-4112-ba8f-a6567e5be79f","package":"_api","parent":"","remove_time":1697207144.746117,"scheduled_by":"","service_name":"http","source_location":{"first_column":0,"first_line":1,"last_column":69,"last_line":1,"path":"/var/lib/icinga2/api/packages/_api/997346d3-374d-443f-b734-80789fd59b31/conf.d/downtimes/docker-master!http!3dabe7e7-32b2-4112-ba8f-a6567e5be79f.conf"},"start_time":1697207116,"templates":["3dabe7e7-32b2-4112-ba8f-a6567e5be79f"],"trigger_time":1697207141.216009,"triggered_by":"","triggers":[],"type":"Downtime","version":1697207141.216025,"zone":""},"timestamp":1697207144.746333,"type":"DowntimeRemoved"}`, expected: &DowntimeRemoved{ - Timestamp: Icinga2Time{time.UnixMicro(1697207144746333)}, + Timestamp: UnixFloat(time.UnixMicro(1697207144746333)), Downtime: Downtime{ Host: "docker-master", Service: "http", diff --git a/internal/icinga2/client_api.go b/internal/icinga2/client_api.go index 3c4c987e..12ecc72c 100644 --- a/internal/icinga2/client_api.go +++ b/internal/icinga2/client_api.go @@ -3,6 +3,7 @@ package icinga2 import ( "bufio" "bytes" + "cmp" "context" "crypto/rand" "encoding/json" @@ -171,11 +172,11 @@ func (client *Client) fetchAcknowledgementComment(ctx context.Context, host, ser } slices.SortFunc(objQueriesResults, func(a, b ObjectQueriesResult[Comment]) int { - distA := a.Attrs.EntryTime.Time.Sub(ackTime).Abs() - distB := b.Attrs.EntryTime.Time.Sub(ackTime).Abs() - return int(distA - distB) + distA := a.Attrs.EntryTime.Time().Sub(ackTime).Abs() + distB := b.Attrs.EntryTime.Time().Sub(ackTime).Abs() + return cmp.Compare(distA, distB) }) - if objQueriesResults[0].Attrs.EntryTime.Sub(ackTime).Abs() > time.Second { + if objQueriesResults[0].Attrs.EntryTime.Time().Sub(ackTime).Abs() > time.Second { return nil, fmt.Errorf("found no ACK Comment for %q with %v close to %v", filterExpr, filterVars, ackTime) } @@ -229,7 +230,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev select { case <-ctx.Done(): return ctx.Err() - case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: + case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time()}: stateChangeEvents++ } @@ -241,7 +242,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev ackComment, err := client.fetchAcknowledgementComment( ctx, hostName, serviceName, - objQueriesResult.Attrs.AcknowledgementLastChange.Time) + objQueriesResult.Attrs.AcknowledgementLastChange.Time()) if err != nil { return fmt.Errorf("fetching acknowledgement comment for %v failed, %w", ev, err) } @@ -256,7 +257,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev select { case <-ctx.Done(): return ctx.Err() - case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time}: + case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time()}: acknowledgementEvents++ } } @@ -408,11 +409,11 @@ func (client *Client) listenEventStream() error { switch respT := resp.(type) { case *StateChange: ev, err = client.buildHostServiceEvent(client.Ctx, respT.CheckResult, respT.State, respT.Host, respT.Service) - evTime = respT.Timestamp.Time + evTime = respT.Timestamp.Time() case *AcknowledgementSet: ev, err = client.buildAcknowledgementEvent(client.Ctx, respT.Host, respT.Service, respT.Author, respT.Comment) - evTime = respT.Timestamp.Time + evTime = respT.Timestamp.Time() // case *AcknowledgementCleared: // case *CommentAdded: From 2f534b4a4b85a4bdf2e782f5d4627c8422ba3e4b Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 8 Jan 2024 11:02:18 +0100 Subject: [PATCH 57/65] icinga2: consts for numeric Icinga 2 API results Some values are returned as constant integer values, e.g., 0 for an OK state service. Those known integers were now replaced by consts. --- internal/icinga2/api_responses.go | 49 +++++++++++++--- internal/icinga2/api_responses_test.go | 77 +++++++++++++------------- internal/icinga2/client.go | 10 ++-- 3 files changed, 85 insertions(+), 51 deletions(-) diff --git a/internal/icinga2/api_responses.go b/internal/icinga2/api_responses.go index afcfdaab..b10c7d4e 100644 --- a/internal/icinga2/api_responses.go +++ b/internal/icinga2/api_responses.go @@ -25,11 +25,40 @@ func (t *UnixFloat) UnmarshalJSON(data []byte) error { return nil } +// The following const values are representing constant integer values, e.g., 0 for an OK state service. +const ( + // ACKNOWLEDGEMENT_* consts are describing an acknowledgement, e.g., from HostServiceRuntimeAttributes. + ACKNOWLEDGEMENT_NONE = 0 + ACKNOWLEDGEMENT_NORMAL = 1 + ACKNOWLEDGEMENT_STICKY = 2 + + // ENTRY_TYPE_* consts are describing an entry_type, e.g., from Comment. + ENTRY_TYPE_USER = 1 + ENTRY_TYPE_DOWNTIME = 2 + ENTRY_TYPE_FLAPPING = 3 + ENTRY_TYPE_ACKNOWLEDGEMENT = 4 + + // STATE_HOST_* consts are describing a host state, e.g., from CheckResult. + STATE_HOST_UP = 0 + STATE_HOST_DOWN = 1 + + // STATE_SERVICE_* consts are describing a service state, e.g., from CheckResult. + STATE_SERVICE_OK = 0 + STATE_SERVICE_WARNING = 1 + STATE_SERVICE_CRITICAL = 2 + STATE_SERVICE_UNKNOWN = 3 + + // STATE_TYPE_* consts are describing a state type, e.g., from HostServiceRuntimeAttributes. + STATE_TYPE_SOFT = 0 + STATE_TYPE_HARD = 1 +) + // Comment represents the Icinga 2 API Comment object. // // NOTE: // - An empty Service field indicates a host comment. -// - The optional EntryType should be User = 1, Downtime = 2, Flapping = 3, Acknowledgement = 4. +// - The optional EntryType should be User = ENTRY_TYPE_USER, Downtime = ENTRY_TYPE_DOWNTIME, +// Flapping = ENTRY_TYPE_FLAPPING, Acknowledgement = ENTRY_TYPE_ACKNOWLEDGEMENT (ENTRY_TYPE_* consts) // // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-comment type Comment struct { @@ -73,8 +102,9 @@ type Downtime struct { // NOTE: // - Name is either the Host or the Service name. // - Host is empty for Host objects; Host contains the Service's Host object name for Services. -// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. -// - Acknowledgement type is 0 = NONE, 1 = NORMAL, 2 = STICKY. +// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. +// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. +// - Acknowledgement type might be ACKNOWLEDGEMENT_{NONE,NORMAL,STICKY}. // // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#host // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#service @@ -83,6 +113,7 @@ type HostServiceRuntimeAttributes struct { Host string `json:"host_name,omitempty"` Groups []string `json:"groups"` State int `json:"state"` + StateType int `json:"state_type"` LastCheckResult CheckResult `json:"last_check_result"` LastStateChange UnixFloat `json:"last_state_change"` DowntimeDepth int `json:"downtime_depth"` @@ -117,8 +148,8 @@ const ( // // NOTE: // - An empty Service field indicates a host state change. -// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. -// - StateType might be 0 = SOFT, 1 = HARD. +// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. +// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-statechange type StateChange struct { @@ -136,8 +167,8 @@ type StateChange struct { // // NOTE: // - An empty Service field indicates a host acknowledgement. -// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. -// - StateType might be 0 = SOFT, 1 = HARD. +// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. +// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementset type AcknowledgementSet struct { @@ -154,8 +185,8 @@ type AcknowledgementSet struct { // // NOTE: // - An empty Service field indicates a host acknowledgement. -// - State might be 0 = UP, 1 = DOWN for hosts and 0 = OK, 1 = WARNING, 2 = CRITICAL, 3 = UNKNOWN for services. -// - StateType might be 0 = SOFT, 1 = HARD. +// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. +// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared type AcknowledgementCleared struct { diff --git a/internal/icinga2/api_responses_test.go b/internal/icinga2/api_responses_test.go index 25a5f9fa..f2b09667 100644 --- a/internal/icinga2/api_responses_test.go +++ b/internal/icinga2/api_responses_test.go @@ -97,7 +97,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Author: "icingaadmin", Text: "foo bar", EntryTime: UnixFloat(time.UnixMicro(1697454753536457)), - EntryType: 1, + EntryType: ENTRY_TYPE_USER, }, }, }, @@ -114,7 +114,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Service: "ping6", Author: "icingaadmin", Text: "adfadsfasdfasdf", - EntryType: 1, + EntryType: ENTRY_TYPE_USER, EntryTime: UnixFloat(time.UnixMicro(1697197701307516)), }, }, @@ -159,19 +159,20 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "dummy-244", Type: "Host", Attrs: HostServiceRuntimeAttributes{ - Name: "dummy-244", - Groups: []string{"app-network", "department-dev", "env-qa", "location-rome"}, - State: 0, + Name: "dummy-244", + Groups: []string{"app-network", "department-dev", "env-qa", "location-rome"}, + State: STATE_HOST_UP, + StateType: STATE_TYPE_HARD, LastCheckResult: CheckResult{ ExitStatus: 0, Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", - State: 0, + State: STATE_HOST_UP, ExecutionStart: UnixFloat(time.UnixMicro(1697459643863147)), ExecutionEnd: UnixFloat(time.UnixMicro(1697459643868893)), }, LastStateChange: UnixFloat(time.UnixMicro(1697099900637215)), DowntimeDepth: 0, - Acknowledgement: 0, + Acknowledgement: ACKNOWLEDGEMENT_NONE, AcknowledgementLastChange: UnixFloat(time.UnixMicro(0)), }, }, @@ -185,20 +186,21 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "docker-master!ssh", Type: "Service", Attrs: HostServiceRuntimeAttributes{ - Name: "ssh", - Host: "docker-master", - Groups: []string{}, - State: 2, + Name: "ssh", + Host: "docker-master", + Groups: []string{}, + State: STATE_SERVICE_CRITICAL, + StateType: STATE_TYPE_HARD, LastCheckResult: CheckResult{ ExitStatus: 2, Output: "connect to address 127.0.0.1 and port 22: Connection refused", - State: 2, + State: STATE_SERVICE_CRITICAL, ExecutionStart: UnixFloat(time.UnixMicro(1697460711130247)), ExecutionEnd: UnixFloat(time.UnixMicro(1697460711134875)), }, LastStateChange: UnixFloat(time.UnixMicro(1697099896120829)), DowntimeDepth: 0, - Acknowledgement: 1, + Acknowledgement: ACKNOWLEDGEMENT_NORMAL, AcknowledgementLastChange: UnixFloat(time.UnixMicro(1697460655878141)), }, }, @@ -212,20 +214,21 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "docker-master!icinga", Type: "Service", Attrs: HostServiceRuntimeAttributes{ - Name: "icinga", - Host: "docker-master", - Groups: []string{}, - State: 0, + Name: "icinga", + Host: "docker-master", + Groups: []string{}, + State: STATE_SERVICE_OK, + StateType: STATE_TYPE_HARD, LastCheckResult: CheckResult{ ExitStatus: 0, Output: "Icinga 2 has been running for 26 seconds. Version: v2.14.0-35-g31b1294ac", - State: 0, + State: STATE_SERVICE_OK, ExecutionStart: UnixFloat(time.UnixMicro(1698673636068106)), ExecutionEnd: UnixFloat(time.UnixMicro(1698673636071483)), }, LastStateChange: UnixFloat(time.UnixMicro(1697704135756310)), DowntimeDepth: 0, - Acknowledgement: 0, + Acknowledgement: ACKNOWLEDGEMENT_NONE, AcknowledgementLastChange: UnixFloat(time.UnixMicro(0)), }, }, @@ -268,12 +271,12 @@ func TestApiResponseUnmarshal(t *testing.T) { expected: &StateChange{ Timestamp: UnixFloat(time.UnixMicro(1697188278203504)), Host: "dummy-158", - State: 1, - StateType: 0, + State: STATE_HOST_DOWN, + StateType: STATE_TYPE_SOFT, CheckResult: CheckResult{ ExitStatus: 2, Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", - State: 2, + State: 2, // check returned an invalid state for a host check ExecutionStart: UnixFloat(time.UnixMicro(1697188278194409)), ExecutionEnd: UnixFloat(time.UnixMicro(1697188278202986)), }, @@ -288,12 +291,12 @@ func TestApiResponseUnmarshal(t *testing.T) { Timestamp: UnixFloat(time.UnixMicro(1697184778612108)), Host: "dummy-280", Service: "random fortune", - State: 2, - StateType: 0, + State: STATE_SERVICE_CRITICAL, + StateType: STATE_TYPE_SOFT, CheckResult: CheckResult{ ExitStatus: 2, Output: "You're growing out of some of your problems, but there are others that\nyou're growing into.", - State: 2, + State: STATE_SERVICE_CRITICAL, ExecutionStart: UnixFloat(time.UnixMicro(1697184778600973)), ExecutionEnd: UnixFloat(time.UnixMicro(1697184778611465)), }, @@ -307,8 +310,8 @@ func TestApiResponseUnmarshal(t *testing.T) { expected: &AcknowledgementSet{ Timestamp: UnixFloat(time.UnixMicro(1697201074579106)), Host: "dummy-805", - State: 1, - StateType: 1, + State: STATE_HOST_DOWN, + StateType: STATE_TYPE_HARD, Author: "icingaadmin", Comment: "working on it", }, @@ -320,8 +323,8 @@ func TestApiResponseUnmarshal(t *testing.T) { Timestamp: UnixFloat(time.UnixMicro(1697201107647920)), Host: "docker-master", Service: "ssh", - State: 2, - StateType: 1, + State: STATE_SERVICE_CRITICAL, + StateType: STATE_TYPE_HARD, Author: "icingaadmin", Comment: "will be fixed soon", }, @@ -332,8 +335,8 @@ func TestApiResponseUnmarshal(t *testing.T) { expected: &AcknowledgementCleared{ Timestamp: UnixFloat(time.UnixMicro(1697201082440148)), Host: "dummy-805", - State: 1, - StateType: 1, + State: STATE_HOST_DOWN, + StateType: STATE_TYPE_HARD, }, }, { @@ -343,8 +346,8 @@ func TestApiResponseUnmarshal(t *testing.T) { Timestamp: UnixFloat(time.UnixMicro(1697201110220349)), Host: "docker-master", Service: "ssh", - State: 2, - StateType: 1, + State: STATE_SERVICE_CRITICAL, + StateType: STATE_TYPE_HARD, }, }, { @@ -356,7 +359,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Host: "dummy-912", Author: "icingaadmin", Text: "oh noes", - EntryType: 1, + EntryType: ENTRY_TYPE_USER, EntryTime: UnixFloat(time.UnixMicro(1697191791097852)), }, }, @@ -371,7 +374,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Service: "ping4", Author: "icingaadmin", Text: "if in doubt, check ticket #23", - EntryType: 1, + EntryType: ENTRY_TYPE_USER, EntryTime: UnixFloat(time.UnixMicro(1697197990035889)), }, }, @@ -385,7 +388,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Host: "dummy-912", Author: "icingaadmin", Text: "oh noes", - EntryType: 1, + EntryType: ENTRY_TYPE_USER, EntryTime: UnixFloat(time.UnixMicro(1697191791097852)), }, }, @@ -400,7 +403,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Service: "ping4", Author: "icingaadmin", Text: "if in doubt, check ticket #23", - EntryType: 1, + EntryType: ENTRY_TYPE_USER, EntryTime: UnixFloat(time.UnixMicro(1697197990035889)), }, }, diff --git a/internal/icinga2/client.go b/internal/icinga2/client.go index 851e142a..c36a9369 100644 --- a/internal/icinga2/client.go +++ b/internal/icinga2/client.go @@ -132,20 +132,20 @@ func (client *Client) buildHostServiceEvent(ctx context.Context, result CheckRes if service != "" { switch state { - case 0: // OK + case STATE_SERVICE_OK: eventSeverity = event.SeverityOK - case 1: // WARNING + case STATE_SERVICE_WARNING: eventSeverity = event.SeverityWarning - case 2: // CRITICAL + case STATE_SERVICE_CRITICAL: eventSeverity = event.SeverityCrit default: // UNKNOWN or faulty eventSeverity = event.SeverityErr } } else { switch state { - case 0: // UP + case STATE_HOST_UP: eventSeverity = event.SeverityOK - case 1: // DOWN + case STATE_HOST_DOWN: eventSeverity = event.SeverityCrit default: // faulty eventSeverity = event.SeverityErr From 86cc1136efe175885936fea0a67731a8267b24a7 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 8 Jan 2024 11:27:12 +0100 Subject: [PATCH 58/65] icinga2: only process HARD state changes --- internal/icinga2/client_api.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/internal/icinga2/client_api.go b/internal/icinga2/client_api.go index 12ecc72c..c98c1ae0 100644 --- a/internal/icinga2/client_api.go +++ b/internal/icinga2/client_api.go @@ -219,7 +219,13 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev return fmt.Errorf("querying API delivered a wrong object type %q", objQueriesResult.Type) } - // State change event first + // Only process HARD states + if objQueriesResult.Attrs.StateType == STATE_TYPE_SOFT { + client.Logger.Debugf("Skipping SOFT event, %#v", objQueriesResult.Attrs) + continue + } + + // First: State change event ev, err := client.buildHostServiceEvent( ctx, objQueriesResult.Attrs.LastCheckResult, objQueriesResult.Attrs.State, @@ -234,7 +240,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev stateChangeEvents++ } - // Optional acknowledgement event second + // Second: Optional acknowledgement event if objQueriesResult.Attrs.Acknowledgement == 0 { continue } @@ -408,6 +414,12 @@ func (client *Client) listenEventStream() error { ) switch respT := resp.(type) { case *StateChange: + // Only process HARD states + if respT.StateType == STATE_TYPE_SOFT { + client.Logger.Debugf("Skipping SOFT State Change, %#v", respT) + continue + } + ev, err = client.buildHostServiceEvent(client.Ctx, respT.CheckResult, respT.State, respT.Host, respT.Service) evTime = respT.Timestamp.Time() From f66f72613ffdda3a3471cf60f4130bf9b134757d Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Mon, 8 Jan 2024 13:11:55 +0100 Subject: [PATCH 59/65] icinga2: rename integer consts to Go-like names --- internal/icinga2/api_responses.go | 61 +++++++++++----------- internal/icinga2/api_responses_test.go | 70 +++++++++++++------------- internal/icinga2/client.go | 10 ++-- internal/icinga2/client_api.go | 4 +- 4 files changed, 73 insertions(+), 72 deletions(-) diff --git a/internal/icinga2/api_responses.go b/internal/icinga2/api_responses.go index b10c7d4e..0a0b2624 100644 --- a/internal/icinga2/api_responses.go +++ b/internal/icinga2/api_responses.go @@ -27,38 +27,37 @@ func (t *UnixFloat) UnmarshalJSON(data []byte) error { // The following const values are representing constant integer values, e.g., 0 for an OK state service. const ( - // ACKNOWLEDGEMENT_* consts are describing an acknowledgement, e.g., from HostServiceRuntimeAttributes. - ACKNOWLEDGEMENT_NONE = 0 - ACKNOWLEDGEMENT_NORMAL = 1 - ACKNOWLEDGEMENT_STICKY = 2 + // Acknowledgement* consts are describing an acknowledgement, e.g., from HostServiceRuntimeAttributes. + AcknowledgementNone = 0 + AcknowledgementNormal = 1 + AcknowledgementSticky = 2 - // ENTRY_TYPE_* consts are describing an entry_type, e.g., from Comment. - ENTRY_TYPE_USER = 1 - ENTRY_TYPE_DOWNTIME = 2 - ENTRY_TYPE_FLAPPING = 3 - ENTRY_TYPE_ACKNOWLEDGEMENT = 4 + // EntryType* consts are describing an entry_type, e.g., from Comment. + EntryTypeUser = 1 + EntryTypeDowntime = 2 + EntryTypeFlapping = 3 + EntryTypeAcknowledgement = 4 - // STATE_HOST_* consts are describing a host state, e.g., from CheckResult. - STATE_HOST_UP = 0 - STATE_HOST_DOWN = 1 + // StateHost* consts are describing a host state, e.g., from CheckResult. + StateHostUp = 0 + StateHostDown = 1 - // STATE_SERVICE_* consts are describing a service state, e.g., from CheckResult. - STATE_SERVICE_OK = 0 - STATE_SERVICE_WARNING = 1 - STATE_SERVICE_CRITICAL = 2 - STATE_SERVICE_UNKNOWN = 3 + // StateService* consts are describing a service state, e.g., from CheckResult. + StateServiceOk = 0 + StateServiceWarning = 1 + StateServiceCritical = 2 + StateServiceUnknown = 3 - // STATE_TYPE_* consts are describing a state type, e.g., from HostServiceRuntimeAttributes. - STATE_TYPE_SOFT = 0 - STATE_TYPE_HARD = 1 + // StateType* consts are describing a state type, e.g., from HostServiceRuntimeAttributes. + StateTypeSoft = 0 + StateTypeHard = 1 ) // Comment represents the Icinga 2 API Comment object. // // NOTE: // - An empty Service field indicates a host comment. -// - The optional EntryType should be User = ENTRY_TYPE_USER, Downtime = ENTRY_TYPE_DOWNTIME, -// Flapping = ENTRY_TYPE_FLAPPING, Acknowledgement = ENTRY_TYPE_ACKNOWLEDGEMENT (ENTRY_TYPE_* consts) +// - The optional EntryType should be represented by one of the EntryType* consts. // // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-comment type Comment struct { @@ -102,9 +101,9 @@ type Downtime struct { // NOTE: // - Name is either the Host or the Service name. // - Host is empty for Host objects; Host contains the Service's Host object name for Services. -// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. -// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. -// - Acknowledgement type might be ACKNOWLEDGEMENT_{NONE,NORMAL,STICKY}. +// - State might be StateHost{Up,Down} for hosts or StateService{Ok,Warning,Critical,Unknown} for services. +// - StateType might be StateTypeSoft or StateTypeHard. +// - Acknowledgement type might be acknowledgement{None,Normal,Sticky}. // // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#host // https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#service @@ -148,8 +147,8 @@ const ( // // NOTE: // - An empty Service field indicates a host state change. -// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. -// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. +// - State might be StateHost{Up,Down} for hosts or StateService{Ok,Warning,Critical,Unknown} for services. +// - StateType might be StateTypeSoft or StateTypeHard. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-statechange type StateChange struct { @@ -167,8 +166,8 @@ type StateChange struct { // // NOTE: // - An empty Service field indicates a host acknowledgement. -// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. -// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. +// - State might be StateHost{Up,Down} for hosts or StateService{Ok,Warning,Critical,Unknown} for services. +// - StateType might be StateTypeSoft or StateTypeHard. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementset type AcknowledgementSet struct { @@ -185,8 +184,8 @@ type AcknowledgementSet struct { // // NOTE: // - An empty Service field indicates a host acknowledgement. -// - State might be STATE_HOST_{UP,DOWN} for hosts or STATE_SERVICE_{OK,WARNING,CRITICAL,UNKNOWN} for services. -// - StateType might be STATE_TYPE_SOFT or STATE_TYPE_HARD. +// - State might be StateHost{Up,Down} for hosts or StateService{Ok,Warning,Critical,Unknown} for services. +// - StateType might be StateTypeSoft or StateTypeHard. // // https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared type AcknowledgementCleared struct { diff --git a/internal/icinga2/api_responses_test.go b/internal/icinga2/api_responses_test.go index f2b09667..04a35c7e 100644 --- a/internal/icinga2/api_responses_test.go +++ b/internal/icinga2/api_responses_test.go @@ -97,7 +97,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Author: "icingaadmin", Text: "foo bar", EntryTime: UnixFloat(time.UnixMicro(1697454753536457)), - EntryType: ENTRY_TYPE_USER, + EntryType: EntryTypeUser, }, }, }, @@ -114,7 +114,7 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Service: "ping6", Author: "icingaadmin", Text: "adfadsfasdfasdf", - EntryType: ENTRY_TYPE_USER, + EntryType: EntryTypeUser, EntryTime: UnixFloat(time.UnixMicro(1697197701307516)), }, }, @@ -161,18 +161,18 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Attrs: HostServiceRuntimeAttributes{ Name: "dummy-244", Groups: []string{"app-network", "department-dev", "env-qa", "location-rome"}, - State: STATE_HOST_UP, - StateType: STATE_TYPE_HARD, + State: StateHostUp, + StateType: StateTypeHard, LastCheckResult: CheckResult{ ExitStatus: 0, Output: "If you think last Tuesday was a drag, wait till you see what happens tomorrow!", - State: STATE_HOST_UP, + State: StateHostUp, ExecutionStart: UnixFloat(time.UnixMicro(1697459643863147)), ExecutionEnd: UnixFloat(time.UnixMicro(1697459643868893)), }, LastStateChange: UnixFloat(time.UnixMicro(1697099900637215)), DowntimeDepth: 0, - Acknowledgement: ACKNOWLEDGEMENT_NONE, + Acknowledgement: AcknowledgementNone, AcknowledgementLastChange: UnixFloat(time.UnixMicro(0)), }, }, @@ -189,18 +189,18 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "ssh", Host: "docker-master", Groups: []string{}, - State: STATE_SERVICE_CRITICAL, - StateType: STATE_TYPE_HARD, + State: StateServiceCritical, + StateType: StateTypeHard, LastCheckResult: CheckResult{ ExitStatus: 2, Output: "connect to address 127.0.0.1 and port 22: Connection refused", - State: STATE_SERVICE_CRITICAL, + State: StateServiceCritical, ExecutionStart: UnixFloat(time.UnixMicro(1697460711130247)), ExecutionEnd: UnixFloat(time.UnixMicro(1697460711134875)), }, LastStateChange: UnixFloat(time.UnixMicro(1697099896120829)), DowntimeDepth: 0, - Acknowledgement: ACKNOWLEDGEMENT_NORMAL, + Acknowledgement: AcknowledgementNormal, AcknowledgementLastChange: UnixFloat(time.UnixMicro(1697460655878141)), }, }, @@ -217,18 +217,18 @@ func TestObjectQueriesResult_UnmarshalJSON(t *testing.T) { Name: "icinga", Host: "docker-master", Groups: []string{}, - State: STATE_SERVICE_OK, - StateType: STATE_TYPE_HARD, + State: StateServiceOk, + StateType: StateTypeHard, LastCheckResult: CheckResult{ ExitStatus: 0, Output: "Icinga 2 has been running for 26 seconds. Version: v2.14.0-35-g31b1294ac", - State: STATE_SERVICE_OK, + State: StateServiceOk, ExecutionStart: UnixFloat(time.UnixMicro(1698673636068106)), ExecutionEnd: UnixFloat(time.UnixMicro(1698673636071483)), }, LastStateChange: UnixFloat(time.UnixMicro(1697704135756310)), DowntimeDepth: 0, - Acknowledgement: ACKNOWLEDGEMENT_NONE, + Acknowledgement: AcknowledgementNone, AcknowledgementLastChange: UnixFloat(time.UnixMicro(0)), }, }, @@ -271,12 +271,14 @@ func TestApiResponseUnmarshal(t *testing.T) { expected: &StateChange{ Timestamp: UnixFloat(time.UnixMicro(1697188278203504)), Host: "dummy-158", - State: STATE_HOST_DOWN, - StateType: STATE_TYPE_SOFT, + State: StateHostDown, + StateType: StateTypeSoft, CheckResult: CheckResult{ - ExitStatus: 2, - Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", - State: 2, // check returned an invalid state for a host check + ExitStatus: 2, + Output: "If two people love each other, there can be no happy end to it.\n\t\t-- Ernest Hemingway", + // The State will be mapped to StateHostDown within Icinga 2, as shown in the outer StateChange + // State field. https://github.com/Icinga/icinga2/blob/v2.14.1/lib/icinga/host.cpp#L141-L155 + State: StateServiceCritical, ExecutionStart: UnixFloat(time.UnixMicro(1697188278194409)), ExecutionEnd: UnixFloat(time.UnixMicro(1697188278202986)), }, @@ -291,12 +293,12 @@ func TestApiResponseUnmarshal(t *testing.T) { Timestamp: UnixFloat(time.UnixMicro(1697184778612108)), Host: "dummy-280", Service: "random fortune", - State: STATE_SERVICE_CRITICAL, - StateType: STATE_TYPE_SOFT, + State: StateServiceCritical, + StateType: StateTypeSoft, CheckResult: CheckResult{ ExitStatus: 2, Output: "You're growing out of some of your problems, but there are others that\nyou're growing into.", - State: STATE_SERVICE_CRITICAL, + State: StateServiceCritical, ExecutionStart: UnixFloat(time.UnixMicro(1697184778600973)), ExecutionEnd: UnixFloat(time.UnixMicro(1697184778611465)), }, @@ -310,8 +312,8 @@ func TestApiResponseUnmarshal(t *testing.T) { expected: &AcknowledgementSet{ Timestamp: UnixFloat(time.UnixMicro(1697201074579106)), Host: "dummy-805", - State: STATE_HOST_DOWN, - StateType: STATE_TYPE_HARD, + State: StateHostDown, + StateType: StateTypeHard, Author: "icingaadmin", Comment: "working on it", }, @@ -323,8 +325,8 @@ func TestApiResponseUnmarshal(t *testing.T) { Timestamp: UnixFloat(time.UnixMicro(1697201107647920)), Host: "docker-master", Service: "ssh", - State: STATE_SERVICE_CRITICAL, - StateType: STATE_TYPE_HARD, + State: StateServiceCritical, + StateType: StateTypeHard, Author: "icingaadmin", Comment: "will be fixed soon", }, @@ -335,8 +337,8 @@ func TestApiResponseUnmarshal(t *testing.T) { expected: &AcknowledgementCleared{ Timestamp: UnixFloat(time.UnixMicro(1697201082440148)), Host: "dummy-805", - State: STATE_HOST_DOWN, - StateType: STATE_TYPE_HARD, + State: StateHostDown, + StateType: StateTypeHard, }, }, { @@ -346,8 +348,8 @@ func TestApiResponseUnmarshal(t *testing.T) { Timestamp: UnixFloat(time.UnixMicro(1697201110220349)), Host: "docker-master", Service: "ssh", - State: STATE_SERVICE_CRITICAL, - StateType: STATE_TYPE_HARD, + State: StateServiceCritical, + StateType: StateTypeHard, }, }, { @@ -359,7 +361,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Host: "dummy-912", Author: "icingaadmin", Text: "oh noes", - EntryType: ENTRY_TYPE_USER, + EntryType: EntryTypeUser, EntryTime: UnixFloat(time.UnixMicro(1697191791097852)), }, }, @@ -374,7 +376,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Service: "ping4", Author: "icingaadmin", Text: "if in doubt, check ticket #23", - EntryType: ENTRY_TYPE_USER, + EntryType: EntryTypeUser, EntryTime: UnixFloat(time.UnixMicro(1697197990035889)), }, }, @@ -388,7 +390,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Host: "dummy-912", Author: "icingaadmin", Text: "oh noes", - EntryType: ENTRY_TYPE_USER, + EntryType: EntryTypeUser, EntryTime: UnixFloat(time.UnixMicro(1697191791097852)), }, }, @@ -403,7 +405,7 @@ func TestApiResponseUnmarshal(t *testing.T) { Service: "ping4", Author: "icingaadmin", Text: "if in doubt, check ticket #23", - EntryType: ENTRY_TYPE_USER, + EntryType: EntryTypeUser, EntryTime: UnixFloat(time.UnixMicro(1697197990035889)), }, }, diff --git a/internal/icinga2/client.go b/internal/icinga2/client.go index c36a9369..be18dd3a 100644 --- a/internal/icinga2/client.go +++ b/internal/icinga2/client.go @@ -132,20 +132,20 @@ func (client *Client) buildHostServiceEvent(ctx context.Context, result CheckRes if service != "" { switch state { - case STATE_SERVICE_OK: + case StateServiceOk: eventSeverity = event.SeverityOK - case STATE_SERVICE_WARNING: + case StateServiceWarning: eventSeverity = event.SeverityWarning - case STATE_SERVICE_CRITICAL: + case StateServiceCritical: eventSeverity = event.SeverityCrit default: // UNKNOWN or faulty eventSeverity = event.SeverityErr } } else { switch state { - case STATE_HOST_UP: + case StateHostUp: eventSeverity = event.SeverityOK - case STATE_HOST_DOWN: + case StateHostDown: eventSeverity = event.SeverityCrit default: // faulty eventSeverity = event.SeverityErr diff --git a/internal/icinga2/client_api.go b/internal/icinga2/client_api.go index c98c1ae0..bf0570e9 100644 --- a/internal/icinga2/client_api.go +++ b/internal/icinga2/client_api.go @@ -220,7 +220,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev } // Only process HARD states - if objQueriesResult.Attrs.StateType == STATE_TYPE_SOFT { + if objQueriesResult.Attrs.StateType == StateTypeSoft { client.Logger.Debugf("Skipping SOFT event, %#v", objQueriesResult.Attrs) continue } @@ -415,7 +415,7 @@ func (client *Client) listenEventStream() error { switch respT := resp.(type) { case *StateChange: // Only process HARD states - if respT.StateType == STATE_TYPE_SOFT { + if respT.StateType == StateTypeSoft { client.Logger.Debugf("Skipping SOFT State Change, %#v", respT) continue } From d6c4d36f07a3a27e6bbd62c522d4aef3de6dd2ad Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 11 Jan 2024 11:48:24 +0100 Subject: [PATCH 60/65] icinga2: restart catch-up-phase on error The catch-up-phase logic was extended to also propagate back an error if the state was left unsuccessfully. In this case - and not if another catch-up-phase was requested and the old phase worker got canceled - another attempt will be made. --- internal/icinga2/client.go | 95 ++++++++++++++++++++++++++-------- internal/icinga2/client_api.go | 6 +-- 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/internal/icinga2/client.go b/internal/icinga2/client.go index be18dd3a..ae88669c 100644 --- a/internal/icinga2/client.go +++ b/internal/icinga2/client.go @@ -2,6 +2,7 @@ package icinga2 import ( "context" + "errors" "github.com/icinga/icinga-notifications/internal/event" "go.uber.org/zap" "golang.org/x/sync/errgroup" @@ -18,6 +19,15 @@ type eventMsg struct { apiTime time.Time } +// catchupEventMsg propagates either an eventMsg or an error back from the catch-up worker. +// +// The type must be used as a sum-type like data structure holding either an eventMsg pointer or an error. The error +// should have a higher precedence than the eventMsg. +type catchupEventMsg struct { + *eventMsg + error +} + // Client for the Icinga 2 Event Stream API with support for other Icinga 2 APIs to gather additional information and // perform a catch-up of unknown events either when starting up to or in case of a connection loss. // @@ -181,11 +191,14 @@ func (client *Client) buildAcknowledgementEvent(ctx context.Context, host, servi // startCatchupWorkers launches goroutines for catching up the Icinga 2 API state. // // Each event will be sent to the returned channel. When all launched workers have finished - either because all are -// done or one has failed and the others were interrupted -, the channel will be closed. Those workers honor a context -// derived from the Client.Ctx and would either stop when this context is done or when the context.CancelFunc is called. -func (client *Client) startCatchupWorkers() (chan *eventMsg, context.CancelFunc) { +// done or one has failed and the others were interrupted -, the channel will be closed. In case of a failure, _one_ +// final error will be sent back. +// +// Those workers honor a context derived from the Client.Ctx and would either stop when this context is done or when the +// context.CancelFunc is called. +func (client *Client) startCatchupWorkers() (chan *catchupEventMsg, context.CancelFunc) { startTime := time.Now() - eventMsgCh := make(chan *eventMsg) + catchupEventCh := make(chan *catchupEventMsg) // Unfortunately, the errgroup context is hidden, that's why another context is necessary. ctx, cancel := context.WithCancel(client.Ctx) @@ -195,7 +208,7 @@ func (client *Client) startCatchupWorkers() (chan *eventMsg, context.CancelFunc) for _, objType := range objTypes { objType := objType // https://go.dev/doc/faq#closures_and_goroutines group.Go(func() error { - err := client.checkMissedChanges(groupCtx, objType, eventMsgCh) + err := client.checkMissedChanges(groupCtx, objType, catchupEventCh) if err != nil { client.Logger.Errorw("Catch-up-phase event worker failed", zap.String("object type", objType), zap.Error(err)) } @@ -205,17 +218,27 @@ func (client *Client) startCatchupWorkers() (chan *eventMsg, context.CancelFunc) go func() { err := group.Wait() - if err != nil { - client.Logger.Errorw("Catching up the API failed", zap.Error(err), zap.Duration("duration", time.Since(startTime))) - } else { + if err == nil { client.Logger.Infow("Catching up the API has finished", zap.Duration("duration", time.Since(startTime))) + } else if errors.Is(err, context.Canceled) { + // The context is either canceled when the Client got canceled or, more likely, when another catchup-worker + // was requested. In the first case, the already sent messages will be discarded as the worker's main loop + // was left. In the other case, the message buffers will be reset to an empty state. + client.Logger.Warnw("Catching up the API was interrupted", zap.Duration("duration", time.Since(startTime))) + } else { + client.Logger.Errorw("Catching up the API failed", zap.Error(err), zap.Duration("duration", time.Since(startTime))) + + select { + case <-ctx.Done(): + case catchupEventCh <- &catchupEventMsg{error: err}: + } } cancel() - close(eventMsgCh) + close(catchupEventCh) }() - return eventMsgCh, cancel + return catchupEventCh, cancel } // worker is the Client's main background worker, taking care of event.Event dispatching and mode switching. @@ -226,10 +249,10 @@ func (client *Client) startCatchupWorkers() (chan *eventMsg, context.CancelFunc) // Communication takes place over the eventDispatcherEventStream and catchupPhaseRequest channels. func (client *Client) worker() { var ( - // catchupEventCh emits events generated during the catch-up-phase from catch-up-workers. It will be closed when - // catching up is done, which indicates the select below to switch phases. When this variable is nil, this - // Client is in the normal operating phase. - catchupEventCh chan *eventMsg + // catchupEventCh either emits events generated during the catch-up-phase from catch-up-workers or one final + // error if something went wrong. It will be closed when catching up is done, which indicates the select below + // to switch phases. When this variable is nil, this Client is in the normal operating phase. + catchupEventCh chan *catchupEventMsg // catchupCancel cancels, if not nil, all running catch-up-workers, e.g., when restarting catching-up. catchupCancel context.CancelFunc @@ -237,8 +260,19 @@ func (client *Client) worker() { catchupBuffer = make([]*event.Event, 0) // catchupCache maps event.Events.Name to API time to skip replaying outdated events. catchupCache = make(map[string]time.Time) + + // catchupErr might hold an error received from catchupEventCh, indicating another catch-up-phase run. + catchupErr error ) + // catchupReset resets all catchup variables to their initial empty state. + catchupReset := func() { + catchupEventCh, catchupCancel = nil, nil + catchupBuffer = make([]*event.Event, 0) + catchupCache = make(map[string]time.Time) + catchupErr = nil + } + // catchupCacheUpdate updates the catchupCache if this eventMsg seems to be the latest of its kind. catchupCacheUpdate := func(ev *eventMsg) { ts, ok := catchupCache[ev.event.Name] @@ -258,7 +292,7 @@ func (client *Client) worker() { client.Logger.Warn("Switching to catch-up-phase was requested while already catching up, restarting phase") // Drain the old catch-up-phase producer channel until it is closed as its context will be canceled. - go func(catchupEventCh chan *eventMsg) { + go func(catchupEventCh chan *catchupEventMsg) { for _, ok := <-catchupEventCh; ok; { } }(catchupEventCh) @@ -266,13 +300,20 @@ func (client *Client) worker() { } client.Logger.Info("Worker enters catch-up-phase, start caching up on Event Stream events") + catchupReset() catchupEventCh, catchupCancel = client.startCatchupWorkers() - case ev, ok := <-catchupEventCh: + case catchupMsg, ok := <-catchupEventCh: // Process an incoming event - if ok { - client.CallbackFn(ev.event) - catchupCacheUpdate(ev) + if ok && catchupMsg.error == nil { + client.CallbackFn(catchupMsg.eventMsg.event) + catchupCacheUpdate(catchupMsg.eventMsg) + break + } + + // Store an incoming error as the catchupErr to be processed below + if ok && catchupMsg.error != nil { + catchupErr = catchupMsg.error break } @@ -295,11 +336,19 @@ func (client *Client) worker() { break } - client.Logger.Info("Worker leaves catch-up-phase, returning to normal operation") + if catchupErr != nil { + client.Logger.Warnw("Worker leaves catch-up-phase with an error, another attempt will be made", zap.Error(catchupErr)) + go func() { + select { + case <-client.Ctx.Done(): + case client.catchupPhaseRequest <- struct{}{}: + } + }() + } else { + client.Logger.Info("Worker leaves catch-up-phase, returning to normal operation") + } - catchupEventCh, catchupCancel = nil, nil - catchupBuffer = make([]*event.Event, 0) - catchupCache = make(map[string]time.Time) + catchupReset() case ev := <-client.eventDispatcherEventStream: // During catch-up-phase, buffer Event Stream events diff --git a/internal/icinga2/client_api.go b/internal/icinga2/client_api.go index bf0570e9..4e02e3a7 100644 --- a/internal/icinga2/client_api.go +++ b/internal/icinga2/client_api.go @@ -187,7 +187,7 @@ func (client *Client) fetchAcknowledgementComment(ctx context.Context, host, ser // // If the object's acknowledgement field is non-zero, an Acknowledgement Event will be constructed following the Host or // Service object. Each event will be delivered to the channel. -func (client *Client) checkMissedChanges(ctx context.Context, objType string, eventCh chan *eventMsg) error { +func (client *Client) checkMissedChanges(ctx context.Context, objType string, catchupEventCh chan *catchupEventMsg) error { jsonRaw, err := client.queryObjectsApiDirect(ctx, objType, "") if err != nil { return err @@ -236,7 +236,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev select { case <-ctx.Done(): return ctx.Err() - case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time()}: + case catchupEventCh <- &catchupEventMsg{eventMsg: &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time()}}: stateChangeEvents++ } @@ -263,7 +263,7 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ev select { case <-ctx.Done(): return ctx.Err() - case eventCh <- &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time()}: + case catchupEventCh <- &catchupEventMsg{eventMsg: &eventMsg{ev, objQueriesResult.Attrs.LastStateChange.Time()}}: acknowledgementEvents++ } } From 24a484370dd372d6f657f92a2c554ec0965228bd Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Wed, 17 Jan 2024 11:19:18 +0100 Subject: [PATCH 61/65] icinga2: custom certificate CN icinga2_common_name, or Icinga2CommonName in Go, allows overriding the expected Common Name of the Certificate from the Icinga 2 API. For testing, I acquired the CA's PEM by: > openssl s_client \ > -connect docker-master:5665 \ > -showcerts < /dev/null 2> /dev/null \ > | awk '/BEGIN CERTIFICATE/ || p { p = 1; print } /END CERTIFICATE/ { exit }' and populated the source table as follows: > UPDATE source SET > icinga2_ca_pem = $$-----BEGIN CERTIFICATE----- > [ . . . ] > -----END CERTIFICATE-----$$, > icinga2_common_name = 'docker-master', > icinga2_insecure_tls = 'n'; Afterwards, one can verify the check by altering icinga2_common_name either to NULL or an invalid common name. --- internal/config/source.go | 2 ++ internal/icinga2/launcher.go | 4 ++++ schema/pgsql/schema.sql | 4 ++++ schema/pgsql/upgrades/022.sql | 1 + 4 files changed, 11 insertions(+) diff --git a/internal/config/source.go b/internal/config/source.go index 4597fb16..dce0a3f2 100644 --- a/internal/config/source.go +++ b/internal/config/source.go @@ -22,6 +22,7 @@ type Source struct { Icinga2AuthUser types.String `db:"icinga2_auth_user"` Icinga2AuthPass types.String `db:"icinga2_auth_pass"` Icinga2CAPem types.String `db:"icinga2_ca_pem"` + Icinga2CommonName types.String `db:"icinga2_common_name"` Icinga2InsecureTLS types.Bool `db:"icinga2_insecure_tls"` // Icinga2SourceConf for Event Stream API sources, only if Source.Type == SourceTypeIcinga2. @@ -41,6 +42,7 @@ func (source *Source) fieldEquals(other *Source) bool { stringEq(source.Icinga2AuthUser, other.Icinga2AuthUser) && stringEq(source.Icinga2AuthPass, other.Icinga2AuthPass) && stringEq(source.Icinga2CAPem, other.Icinga2CAPem) && + stringEq(source.Icinga2CommonName, other.Icinga2CommonName) && boolEq(source.Icinga2InsecureTLS, other.Icinga2InsecureTLS) } diff --git a/internal/icinga2/launcher.go b/internal/icinga2/launcher.go index 007f036d..65718b53 100644 --- a/internal/icinga2/launcher.go +++ b/internal/icinga2/launcher.go @@ -129,6 +129,10 @@ func (launcher *Launcher) launch(src *config.Source) { client.ApiHttpTransport.TLSClientConfig.RootCAs = certPool } + if src.Icinga2CommonName.Valid { + client.ApiHttpTransport.TLSClientConfig.ServerName = src.Icinga2CommonName.String + } + if src.Icinga2InsecureTLS.Valid && src.Icinga2InsecureTLS.Bool { client.ApiHttpTransport.TLSClientConfig.InsecureSkipVerify = true } diff --git a/schema/pgsql/schema.sql b/schema/pgsql/schema.sql index 5bd4f7d5..a6e65045 100644 --- a/schema/pgsql/schema.sql +++ b/schema/pgsql/schema.sql @@ -140,7 +140,11 @@ CREATE TABLE source ( icinga2_base_url text, icinga2_auth_user text, icinga2_auth_pass text, + -- icinga2_ca_pem specifies a custom CA to be used in the PEM format, if not NULL. icinga2_ca_pem text, + -- icinga2_common_name requires Icinga 2's certificate to hold this Common Name if not NULL. This allows using a + -- differing Common Name - maybe an Icinga 2 Endpoint object name - from the FQDN within icinga2_base_url. + icinga2_common_name text, icinga2_insecure_tls boolenum NOT NULL DEFAULT 'n', -- The hash is a PHP password_hash with PASSWORD_DEFAULT algorithm, defaulting to bcrypt. This check roughly ensures diff --git a/schema/pgsql/upgrades/022.sql b/schema/pgsql/upgrades/022.sql index 3b26a161..143b1053 100644 --- a/schema/pgsql/upgrades/022.sql +++ b/schema/pgsql/upgrades/022.sql @@ -5,6 +5,7 @@ ALTER TABLE source ADD COLUMN icinga2_auth_user text, ADD COLUMN icinga2_auth_pass text, ADD COLUMN icinga2_ca_pem text, + ADD COLUMN icinga2_common_name text, ADD COLUMN icinga2_insecure_tls boolenum NOT NULL DEFAULT 'n', DROP CONSTRAINT source_listener_password_hash_check; From 00e4a8d3970172de369a87663d4bd5807def5fd6 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Tue, 19 Mar 2024 10:51:02 +0100 Subject: [PATCH 62/65] config: Fix {bool,string}Eq to compare Sources --- internal/config/source.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/config/source.go b/internal/config/source.go index dce0a3f2..fc32e34f 100644 --- a/internal/config/source.go +++ b/internal/config/source.go @@ -31,8 +31,8 @@ type Source struct { // fieldEquals checks if this Source's database fields are equal to those of another Source. func (source *Source) fieldEquals(other *Source) bool { - boolEq := func(a, b types.Bool) bool { return (!a.Valid && !b.Valid) || (a.Bool == b.Bool) } - stringEq := func(a, b types.String) bool { return (!a.Valid && !b.Valid) || (a.String == b.String) } + boolEq := func(a, b types.Bool) bool { return (!a.Valid && !b.Valid) || (a == b) } + stringEq := func(a, b types.String) bool { return (!a.Valid && !b.Valid) || (a == b) } return source.ID == other.ID && source.Type == other.Type && From aaae894953700c452b575ab161118d849d35172a Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Tue, 19 Mar 2024 10:51:50 +0100 Subject: [PATCH 63/65] icinga2: Client fixes - Rate limit catch-up-phase worker start. In case of a network disruption during the catch-up-phase, this will result in an error and infinite retries. Those, however, might result in lots of useless logging, which can be rate limited. - Remove the both useless and broken catchupEventCh drainage logic. All sends are being protected by context checks. - Abort early on errors received from the catchupEventCh and don't store them for later. --- internal/icinga2/client.go | 57 ++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/internal/icinga2/client.go b/internal/icinga2/client.go index ae88669c..8720c27a 100644 --- a/internal/icinga2/client.go +++ b/internal/icinga2/client.go @@ -6,6 +6,7 @@ import ( "github.com/icinga/icinga-notifications/internal/event" "go.uber.org/zap" "golang.org/x/sync/errgroup" + "math" "net/http" "net/url" "time" @@ -21,8 +22,8 @@ type eventMsg struct { // catchupEventMsg propagates either an eventMsg or an error back from the catch-up worker. // -// The type must be used as a sum-type like data structure holding either an eventMsg pointer or an error. The error -// should have a higher precedence than the eventMsg. +// The type must be used as a sum-type like data structure holding either an error or an eventMsg pointer. The error has +// a higher precedence than the eventMsg. type catchupEventMsg struct { *eventMsg error @@ -196,7 +197,10 @@ func (client *Client) buildAcknowledgementEvent(ctx context.Context, host, servi // // Those workers honor a context derived from the Client.Ctx and would either stop when this context is done or when the // context.CancelFunc is called. -func (client *Client) startCatchupWorkers() (chan *catchupEventMsg, context.CancelFunc) { +// +// The startup time might be delayed through the parameter. This lets the goroutines sleep to rate-limit reconnection +// attempts during network hiccups. +func (client *Client) startCatchupWorkers(delay time.Duration) (chan *catchupEventMsg, context.CancelFunc) { startTime := time.Now() catchupEventCh := make(chan *catchupEventMsg) @@ -208,6 +212,12 @@ func (client *Client) startCatchupWorkers() (chan *catchupEventMsg, context.Canc for _, objType := range objTypes { objType := objType // https://go.dev/doc/faq#closures_and_goroutines group.Go(func() error { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(delay): + } + err := client.checkMissedChanges(groupCtx, objType, catchupEventCh) if err != nil { client.Logger.Errorw("Catch-up-phase event worker failed", zap.String("object type", objType), zap.Error(err)) @@ -261,8 +271,9 @@ func (client *Client) worker() { // catchupCache maps event.Events.Name to API time to skip replaying outdated events. catchupCache = make(map[string]time.Time) - // catchupErr might hold an error received from catchupEventCh, indicating another catch-up-phase run. - catchupErr error + // catchupFailCounter indicates how many prior catch-up-phase attempts have failed. It will be used to + // rate limit catch-up-phase restarts. + catchupFailCounter int ) // catchupReset resets all catchup variables to their initial empty state. @@ -270,7 +281,6 @@ func (client *Client) worker() { catchupEventCh, catchupCancel = nil, nil catchupBuffer = make([]*event.Event, 0) catchupCache = make(map[string]time.Time) - catchupErr = nil } // catchupCacheUpdate updates the catchupCache if this eventMsg seems to be the latest of its kind. @@ -290,18 +300,13 @@ func (client *Client) worker() { case <-client.catchupPhaseRequest: if catchupEventCh != nil { client.Logger.Warn("Switching to catch-up-phase was requested while already catching up, restarting phase") - - // Drain the old catch-up-phase producer channel until it is closed as its context will be canceled. - go func(catchupEventCh chan *catchupEventMsg) { - for _, ok := <-catchupEventCh; ok; { - } - }(catchupEventCh) catchupCancel() } client.Logger.Info("Worker enters catch-up-phase, start caching up on Event Stream events") catchupReset() - catchupEventCh, catchupCancel = client.startCatchupWorkers() + catchupEventCh, catchupCancel = client.startCatchupWorkers( + min(3*time.Minute, time.Duration(math.Exp2(float64(catchupFailCounter))-1)*time.Second)) case catchupMsg, ok := <-catchupEventCh: // Process an incoming event @@ -311,9 +316,17 @@ func (client *Client) worker() { break } - // Store an incoming error as the catchupErr to be processed below + // Abort and restart the catch-up-phase when receiving an error. if ok && catchupMsg.error != nil { - catchupErr = catchupMsg.error + client.Logger.Warnw("Worker leaves catch-up-phase with an error, another attempt will be made", zap.Error(catchupMsg.error)) + go func() { + select { + case <-client.Ctx.Done(): + case client.catchupPhaseRequest <- struct{}{}: + } + }() + catchupReset() + catchupFailCounter++ break } @@ -336,19 +349,9 @@ func (client *Client) worker() { break } - if catchupErr != nil { - client.Logger.Warnw("Worker leaves catch-up-phase with an error, another attempt will be made", zap.Error(catchupErr)) - go func() { - select { - case <-client.Ctx.Done(): - case client.catchupPhaseRequest <- struct{}{}: - } - }() - } else { - client.Logger.Info("Worker leaves catch-up-phase, returning to normal operation") - } - + client.Logger.Info("Worker leaves catch-up-phase, returning to normal operation") catchupReset() + catchupFailCounter = 0 case ev := <-client.eventDispatcherEventStream: // During catch-up-phase, buffer Event Stream events From 786d2871ed2bf04bf69cd787a30428a0d710fcfc Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 21 Mar 2024 10:22:07 +0100 Subject: [PATCH 64/65] icinga2: Rework catch-up-worker processing --- internal/icinga2/client.go | 81 ++++++++++++++++++++++------------ internal/icinga2/client_api.go | 9 ++-- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/internal/icinga2/client.go b/internal/icinga2/client.go index 8720c27a..304a124c 100644 --- a/internal/icinga2/client.go +++ b/internal/icinga2/client.go @@ -3,10 +3,10 @@ package icinga2 import ( "context" "errors" + "github.com/google/uuid" "github.com/icinga/icinga-notifications/internal/event" "go.uber.org/zap" "golang.org/x/sync/errgroup" - "math" "net/http" "net/url" "time" @@ -200,10 +200,18 @@ func (client *Client) buildAcknowledgementEvent(ctx context.Context, host, servi // // The startup time might be delayed through the parameter. This lets the goroutines sleep to rate-limit reconnection // attempts during network hiccups. +// +// To distinguish different catch-up-phase workers - for example, when one worker was canceled by its context and +// another one was just started -, all log their debug messages with a UUID. func (client *Client) startCatchupWorkers(delay time.Duration) (chan *catchupEventMsg, context.CancelFunc) { + workerId := uuid.New() startTime := time.Now() catchupEventCh := make(chan *catchupEventMsg) + client.Logger.Debugw("Catch-up-phase worker has started", + zap.Stringer("worker", workerId), + zap.Duration("delay", delay)) + // Unfortunately, the errgroup context is hidden, that's why another context is necessary. ctx, cancel := context.WithCancel(client.Ctx) group, groupCtx := errgroup.WithContext(ctx) @@ -219,8 +227,11 @@ func (client *Client) startCatchupWorkers(delay time.Duration) (chan *catchupEve } err := client.checkMissedChanges(groupCtx, objType, catchupEventCh) - if err != nil { - client.Logger.Errorw("Catch-up-phase event worker failed", zap.String("object type", objType), zap.Error(err)) + if err != nil && !errors.Is(err, context.Canceled) { + client.Logger.Debugw("Catch-up-phase event worker failed", + zap.Stringer("worker", workerId), + zap.String("object type", objType), + zap.Error(err)) } return err }) @@ -229,14 +240,21 @@ func (client *Client) startCatchupWorkers(delay time.Duration) (chan *catchupEve go func() { err := group.Wait() if err == nil { - client.Logger.Infow("Catching up the API has finished", zap.Duration("duration", time.Since(startTime))) + client.Logger.Debugw("Catching up the API has finished", + zap.Stringer("worker", workerId), + zap.Duration("duration", time.Since(startTime))) } else if errors.Is(err, context.Canceled) { - // The context is either canceled when the Client got canceled or, more likely, when another catchup-worker + // The context is either canceled when the Client got canceled or, more likely, when another catch-up-worker // was requested. In the first case, the already sent messages will be discarded as the worker's main loop // was left. In the other case, the message buffers will be reset to an empty state. - client.Logger.Warnw("Catching up the API was interrupted", zap.Duration("duration", time.Since(startTime))) + client.Logger.Debugw("Catching up the API was interrupted", + zap.Stringer("worker", workerId), + zap.Duration("duration", time.Since(startTime))) } else { - client.Logger.Errorw("Catching up the API failed", zap.Error(err), zap.Duration("duration", time.Since(startTime))) + client.Logger.Debugw("Catching up the API failed", + zap.Stringer("worker", workerId), + zap.Error(err), + zap.Duration("duration", time.Since(startTime))) select { case <-ctx.Done(): @@ -271,9 +289,8 @@ func (client *Client) worker() { // catchupCache maps event.Events.Name to API time to skip replaying outdated events. catchupCache = make(map[string]time.Time) - // catchupFailCounter indicates how many prior catch-up-phase attempts have failed. It will be used to - // rate limit catch-up-phase restarts. - catchupFailCounter int + // catchupWorkerDelay slows down future catch-up-phase workers if prior attempts have failed. + catchupWorkerDelay time.Duration ) // catchupReset resets all catchup variables to their initial empty state. @@ -291,6 +308,18 @@ func (client *Client) worker() { } } + // catchupWorkerStart starts a catch-up-phase worker and stops already running workers, if necessary. + catchupWorkerStart := func() { + if catchupEventCh != nil { + client.Logger.Debug("Switching to catch-up-phase was requested while still catching up, stopping old worker") + catchupCancel() + } + + client.Logger.Info("Worker enters catch-up-phase, start caching up on Event Stream events") + catchupReset() + catchupEventCh, catchupCancel = client.startCatchupWorkers(catchupWorkerDelay) + } + for { select { case <-client.Ctx.Done(): @@ -298,15 +327,7 @@ func (client *Client) worker() { return case <-client.catchupPhaseRequest: - if catchupEventCh != nil { - client.Logger.Warn("Switching to catch-up-phase was requested while already catching up, restarting phase") - catchupCancel() - } - - client.Logger.Info("Worker enters catch-up-phase, start caching up on Event Stream events") - catchupReset() - catchupEventCh, catchupCancel = client.startCatchupWorkers( - min(3*time.Minute, time.Duration(math.Exp2(float64(catchupFailCounter))-1)*time.Second)) + catchupWorkerStart() case catchupMsg, ok := <-catchupEventCh: // Process an incoming event @@ -318,15 +339,17 @@ func (client *Client) worker() { // Abort and restart the catch-up-phase when receiving an error. if ok && catchupMsg.error != nil { - client.Logger.Warnw("Worker leaves catch-up-phase with an error, another attempt will be made", zap.Error(catchupMsg.error)) - go func() { - select { - case <-client.Ctx.Done(): - case client.catchupPhaseRequest <- struct{}{}: - } - }() - catchupReset() - catchupFailCounter++ + if catchupWorkerDelay == 0 { + catchupWorkerDelay = time.Second + } else { + catchupWorkerDelay = min(3*time.Minute, 2*catchupWorkerDelay) + } + + client.Logger.Warnw("Catch-up-phase was interrupted by an error, another attempt will be made", + zap.Error(catchupMsg.error), + zap.Duration("delay", catchupWorkerDelay)) + + catchupWorkerStart() break } @@ -351,7 +374,7 @@ func (client *Client) worker() { client.Logger.Info("Worker leaves catch-up-phase, returning to normal operation") catchupReset() - catchupFailCounter = 0 + catchupWorkerDelay = 0 case ev := <-client.eventDispatcherEventStream: // During catch-up-phase, buffer Event Stream events diff --git a/internal/icinga2/client_api.go b/internal/icinga2/client_api.go index 4e02e3a7..275445e5 100644 --- a/internal/icinga2/client_api.go +++ b/internal/icinga2/client_api.go @@ -11,7 +11,6 @@ import ( "github.com/icinga/icinga-notifications/internal/event" "go.uber.org/zap" "io" - "math" "net/http" "net/url" "slices" @@ -293,7 +292,7 @@ func (client *Client) connectEventStream(esTypes []string) (io.ReadCloser, error return nil, err } - for i := 0; ; i++ { + for retryDelay := time.Second; ; retryDelay = min(3*time.Minute, 2*retryDelay) { // Always ensure an unique queue name to mitigate possible naming conflicts. queueNameRndBuff := make([]byte, 16) _, _ = rand.Read(queueNameRndBuff) @@ -330,7 +329,9 @@ func (client *Client) connectEventStream(esTypes []string) (io.ReadCloser, error httpClient := &http.Client{Transport: &client.ApiHttpTransport} res, err := httpClient.Do(req) if err != nil { - client.Logger.Warnw("Establishing an Event Stream API connection failed, will be retried", zap.Error(err)) + client.Logger.Warnw("Establishing an Event Stream API connection failed, will be retried", + zap.Error(err), + zap.Duration("delay", retryDelay)) return } @@ -361,7 +362,7 @@ func (client *Client) connectEventStream(esTypes []string) (io.ReadCloser, error // Rate limit API reconnections: slow down for successive failed attempts but limit to three minutes. // 1s, 2s, 4s, 8s, 16s, 32s, 1m4s, 2m8s, 3m, 3m, 3m, ... select { - case <-time.After(min(3*time.Minute, time.Duration(math.Exp2(float64(i)))*time.Second)): + case <-time.After(retryDelay): case <-client.Ctx.Done(): return nil, client.Ctx.Err() } From 4673b2ccb66ddf27a51cbbabca22a3d0b197e924 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Thu, 21 Mar 2024 11:21:10 +0100 Subject: [PATCH 65/65] icinga2: Custom http.Transport to set User-Agent --- internal/icinga2/client.go | 2 +- internal/icinga2/client_api.go | 16 +++++++++-- internal/icinga2/launcher.go | 52 ++++++++++++++++++---------------- 3 files changed, 43 insertions(+), 27 deletions(-) diff --git a/internal/icinga2/client.go b/internal/icinga2/client.go index 304a124c..62dc0d3a 100644 --- a/internal/icinga2/client.go +++ b/internal/icinga2/client.go @@ -44,7 +44,7 @@ type Client struct { ApiBaseURL string ApiBasicAuthUser string ApiBasicAuthPass string - ApiHttpTransport http.Transport + ApiHttpTransport http.RoundTripper // EventSourceId to be reflected in generated event.Events. EventSourceId int64 diff --git a/internal/icinga2/client_api.go b/internal/icinga2/client_api.go index 275445e5..33fc67e2 100644 --- a/internal/icinga2/client_api.go +++ b/internal/icinga2/client_api.go @@ -19,6 +19,18 @@ import ( // This file contains Icinga 2 API related methods. +// transport wraps http.Transport and overrides http.RoundTripper to set a custom User-Agent for all requests. +type transport struct { + http.Transport + userAgent string +} + +// RoundTrip implements http.RoundTripper to set a custom User-Agent header. +func (trans *transport) RoundTrip(req *http.Request) (*http.Response, error) { + req.Header.Set("User-Agent", trans.userAgent) + return trans.Transport.RoundTrip(req) +} + // extractObjectQueriesResult parses a typed ObjectQueriesResult array out of a JSON io.ReaderCloser. // // The generic type T is currently limited to all later needed types, even when the API might also return other known or @@ -71,7 +83,7 @@ func (client *Client) queryObjectsApi( // The underlying network connection is reused by using client.ApiHttpTransport. httpClient := &http.Client{ - Transport: &client.ApiHttpTransport, + Transport: client.ApiHttpTransport, Timeout: 3 * time.Second, } res, err := httpClient.Do(req) @@ -326,7 +338,7 @@ func (client *Client) connectEventStream(esTypes []string) (io.ReadCloser, error defer close(resCh) client.Logger.Debug("Try to establish an Event Stream API connection") - httpClient := &http.Client{Transport: &client.ApiHttpTransport} + httpClient := &http.Client{Transport: client.ApiHttpTransport} res, err := httpClient.Do(req) if err != nil { client.Logger.Warnw("Establishing an Event Stream API connection failed, will be retried", diff --git a/internal/icinga2/launcher.go b/internal/icinga2/launcher.go index 65718b53..346ae8b5 100644 --- a/internal/icinga2/launcher.go +++ b/internal/icinga2/launcher.go @@ -7,6 +7,7 @@ import ( "crypto/tls" "crypto/x509" "errors" + "github.com/icinga/icinga-notifications/internal" "github.com/icinga/icinga-notifications/internal/config" "github.com/icinga/icinga-notifications/internal/daemon" "github.com/icinga/icinga-notifications/internal/event" @@ -75,12 +76,8 @@ func (launcher *Launcher) launch(src *config.Source) { return } - subCtx, subCtxCancel := context.WithCancel(launcher.Ctx) - client := &Client{ - ApiBaseURL: src.Icinga2BaseURL.String, - ApiBasicAuthUser: src.Icinga2AuthUser.String, - ApiBasicAuthPass: src.Icinga2AuthPass.String, - ApiHttpTransport: http.Transport{ + trans := &transport{ + Transport: http.Transport{ // Hardened TLS config adjusted to Icinga 2's configuration: // - https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-apilistener // - https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#security @@ -97,6 +94,31 @@ func (launcher *Launcher) launch(src *config.Source) { }, }, }, + userAgent: "icinga-notifications/" + internal.Version.Version, + } + + if src.Icinga2CAPem.Valid { + certPool := x509.NewCertPool() + if !certPool.AppendCertsFromPEM([]byte(src.Icinga2CAPem.String)) { + logger.Error("Cannot add custom CA file to CA pool") + return + } + + trans.TLSClientConfig.RootCAs = certPool + } + if src.Icinga2CommonName.Valid { + trans.TLSClientConfig.ServerName = src.Icinga2CommonName.String + } + if src.Icinga2InsecureTLS.Valid && src.Icinga2InsecureTLS.Bool { + trans.TLSClientConfig.InsecureSkipVerify = true + } + + subCtx, subCtxCancel := context.WithCancel(launcher.Ctx) + client := &Client{ + ApiBaseURL: src.Icinga2BaseURL.String, + ApiBasicAuthUser: src.Icinga2AuthUser.String, + ApiBasicAuthPass: src.Icinga2AuthPass.String, + ApiHttpTransport: trans, EventSourceId: src.ID, IcingaWebRoot: daemon.Config().Icingaweb2URL, @@ -119,24 +141,6 @@ func (launcher *Launcher) launch(src *config.Source) { Logger: logger, } - if src.Icinga2CAPem.Valid { - certPool := x509.NewCertPool() - if !certPool.AppendCertsFromPEM([]byte(src.Icinga2CAPem.String)) { - logger.Error("Cannot add custom CA file to CA pool") - return - } - - client.ApiHttpTransport.TLSClientConfig.RootCAs = certPool - } - - if src.Icinga2CommonName.Valid { - client.ApiHttpTransport.TLSClientConfig.ServerName = src.Icinga2CommonName.String - } - - if src.Icinga2InsecureTLS.Valid && src.Icinga2InsecureTLS.Bool { - client.ApiHttpTransport.TLSClientConfig.InsecureSkipVerify = true - } - go client.Process() src.Icinga2SourceCancel = subCtxCancel }