Skip to content

Commit fe86d20

Browse files
Add a health check endpoint (#36)
* Update profiler and configuration to serve on a non-default mux * Add a healthcheck handler, configuration, and HTTP endpoint * Update example config and README with healthcheck configuration
1 parent 9493736 commit fe86d20

File tree

8 files changed

+201
-17
lines changed

8 files changed

+201
-17
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ line options.
111111
| DATASET_FILTER | --dataset-filter | BigQuery label to filter datasets for metric collection |
112112
| GCP_PROJECT_ID | --gcp-project-id | (Required) The Google Cloud project containing the BigQuery tables to retrieve metrics from |
113113
| GOOGLE_APPLICATION_CREDENTIALS | | File containing service account details to authenticate to Google Cloud using |
114+
| HEALTHCHECK_ENABLED | --healthcheck.enabled | Whether to enable the health check endpoint at /health. Defaults to *false* |
115+
| HEALTHCHECK_PORT | --healthcheck.port | The port to run the health check server on. Defaults to *8080* |
114116
| LOG_LEVEL | | The logging level (e.g. trace, debug, info, warn, error). Defaults to *info* |
115117
| METRIC_INTERVAL | --metric-interval | The interval between metric collection rounds. Must contain a unit and valid units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Defaults to *30s* |
116118
| METRIC_PREFIX | --metric-prefix | The prefix for the metric names exported to Datadog. Defaults to *custom.gcp.bigquery* |

cmd/bqmetricsd/main.go

+26-4
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ import (
55
"fmt"
66
"github.com/ovotech/bigquery-metrics-extractor/pkg/config"
77
"github.com/ovotech/bigquery-metrics-extractor/pkg/daemon"
8+
"github.com/ovotech/bigquery-metrics-extractor/pkg/health"
89
"github.com/rs/zerolog"
910
"github.com/rs/zerolog/log"
1011
"net/http"
11-
_ "net/http/pprof"
12+
"net/http/pprof"
1213
"os"
1314
"os/signal"
1415
)
@@ -22,12 +23,33 @@ func main() {
2223
log.Fatal().Err(err).Msg("Failed to parse config")
2324
}
2425

25-
if cfg.Profiling {
26-
addr := "localhost:6060"
26+
if cfg.Profiler.Enabled {
27+
addr := fmt.Sprintf("localhost:%d", cfg.Profiler.Port)
2728
log.Info().Msgf("Running profiler on %s", addr)
2829

30+
mux := http.NewServeMux()
31+
mux.HandleFunc("/debug/pprof/", pprof.Index)
32+
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
33+
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
34+
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
35+
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
36+
37+
go func() {
38+
log.Err(http.ListenAndServe(addr, mux)).Msg("Shutting down profiler")
39+
}()
40+
}
41+
42+
if cfg.HealthCheck.Enabled {
43+
addr := fmt.Sprintf("localhost:%d", cfg.HealthCheck.Port)
44+
log.Info().Msgf("Running healthcheck server on %s", addr)
45+
46+
healthsrv := health.ServiceStatus{Status: health.Ok}
47+
48+
mux := http.NewServeMux()
49+
mux.HandleFunc("/health", healthsrv.Handler)
50+
2951
go func() {
30-
log.Err(http.ListenAndServe(addr, nil)).Msg("Shutting down profiler")
52+
log.Err(http.ListenAndServe(addr, mux)).Msg("Shutting down healthcheck server")
3153
}()
3254
}
3355

example-config.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,11 @@
5757
# SELECT APPROX_COUNT_DISTINCT(`my-column-1`) AS `my-column-1`,
5858
# APPROX_COUNT_DISTINCT(`my-column-2`) AS `my-column-2`
5959
# FROM `my-project.my-dataset.my-table`
60+
61+
###
62+
# Configuration for the healthcheck endpoint, used to determine whether the
63+
# service is healthy or not
64+
#
65+
# healthcheck:
66+
# enabled: true
67+
# port: 8080

pkg/config/config.go

+31-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ type Config struct {
4444
MetricTags []string `viper:"metric-tags"`
4545
MetricInterval time.Duration `viper:"metric-interval"`
4646
CustomMetrics []CustomMetric `viper:"custom-metrics"`
47-
Profiling bool `viper:"enable-profiler"`
47+
Profiler Profiler `viper:"profiler"`
48+
HealthCheck HealthCheck `viper:"healthcheck"`
4849
}
4950

5051
// CustomMetric holds details about a metric generated from an SQL query
@@ -55,6 +56,18 @@ type CustomMetric struct {
5556
SQL string `viper:"sql"`
5657
}
5758

59+
// Profiler holds configuration details for the profiler
60+
type Profiler struct {
61+
Enabled bool `viper:"enabled"`
62+
Port int `viper:"port"`
63+
}
64+
65+
// HealthCheck holds configuration details for the health endpoint
66+
type HealthCheck struct {
67+
Enabled bool `viper:"enabled"`
68+
Port int `viper:"port"`
69+
}
70+
5871
// NewConfig creates a config struct using the package viper for configuration
5972
// construction. Configuration can either be passed in a config file, as flags
6073
// when running the application, or as environment variables. Priority is as
@@ -150,6 +163,18 @@ func ValidateConfig(c *Config) error {
150163
}
151164
}
152165

166+
if c.HealthCheck.Enabled {
167+
if c.HealthCheck.Port <= 0 || c.HealthCheck.Port > 65535 {
168+
return ErrInvalidPort
169+
}
170+
}
171+
172+
if c.Profiler.Enabled {
173+
if c.Profiler.Port <= 0 || c.Profiler.Port > 65535 {
174+
return ErrInvalidPort
175+
}
176+
}
177+
153178
return nil
154179
}
155180

@@ -176,7 +201,10 @@ func configFlags(name string) *pflag.FlagSet {
176201
flags.String("metric-prefix", DefaultMetricPrefix, fmt.Sprintf("The prefix for the metrics names exported to Datadog (Default %s)", DefaultMetricPrefix))
177202
flags.Duration("metric-interval", defInterval, fmt.Sprintf("The interval between metrics submissions (Default %s)", DefaultMetricInterval))
178203
flags.StringSlice("metric-tags", []string{}, "Comma-delimited list of tags to attach to metrics")
179-
flags.Bool("enable-profiler", false, "Enables the profiler")
204+
flags.Bool("profiler.enabled", false, "Enables the profiler")
205+
flags.Int("profiler.port", 6060, "The port on which to run the profiler server")
206+
flags.Bool("healthcheck.enabled", false, "Enables the health check endpoint")
207+
flags.Int("healthcheck.port", 8080, "The port on which to run the server providing the health check endpoint")
180208

181209
_ = flags.Parse(os.Args[1:])
182210

@@ -255,6 +283,7 @@ func handleEnvBindings(vpr *viper.Viper, fs *pflag.FlagSet) {
255283

256284
fs.VisitAll(func(f *pflag.Flag) {
257285
env := strings.ReplaceAll(f.Name, "-", "_")
286+
env = strings.ReplaceAll(env, ".", "_")
258287
_ = vpr.BindEnv(f.Name, strings.ToUpper(env))
259288
})
260289
}

pkg/config/config_test.go

+35-11
Original file line numberDiff line numberDiff line change
@@ -41,47 +41,52 @@ func TestNewConfig(t *testing.T) {
4141
want *Config
4242
wantErr bool
4343
}{
44-
{"all via env", setup([]string{"DATADOG_API_KEY=abc123", "DATASET_FILTER=bqmetrics:enabled", "GCP_PROJECT_ID=my-project-id", "METRIC_PREFIX=custom.gcp.bigquery.stats", "METRIC_TAGS=env:prod", "METRIC_INTERVAL=2m"}, nil, ""), args{"bqmetricstest"}, &Config{
44+
{"all via env", setup([]string{"DATADOG_API_KEY=abc123", "DATASET_FILTER=bqmetrics:enabled", "GCP_PROJECT_ID=my-project-id", "METRIC_PREFIX=custom.gcp.bigquery.stats", "METRIC_TAGS=env:prod", "METRIC_INTERVAL=2m", "HEALTHCHECK_ENABLED=true"}, nil, ""), args{"bqmetricstest"}, &Config{
4545
DatadogAPIKey: "abc123",
4646
DatasetFilter: "bqmetrics:enabled",
4747
GcpProject: "my-project-id",
4848
MetricPrefix: "custom.gcp.bigquery.stats",
4949
MetricTags: []string{"env:prod"},
5050
MetricInterval: 2 * time.Minute,
51-
Profiling: false,
51+
Profiler: Profiler{false, 6060},
52+
HealthCheck: HealthCheck{true, 8080},
5253
}, false},
53-
{"all via cmd", setup(nil, []string{"--datadog-api-key-file=/tmp/dd.key", "--dataset-filter=bqmetrics:enabled", "--gcp-project-id=my-project-id", "--metric-prefix=custom.gcp.bigquery.stats", "--metric-tags=env:prod", "--metric-interval=2m", "--enable-profiler"}, "abc123"), args{"bqmetricstest"}, &Config{
54+
{"all via cmd", setup(nil, []string{"--datadog-api-key-file=/tmp/dd.key", "--dataset-filter=bqmetrics:enabled", "--gcp-project-id=my-project-id", "--metric-prefix=custom.gcp.bigquery.stats", "--metric-tags=env:prod", "--metric-interval=2m", "--profiler.enabled"}, "abc123"), args{"bqmetricstest"}, &Config{
5455
DatadogAPIKey: "abc123",
5556
DatasetFilter: "bqmetrics:enabled",
5657
GcpProject: "my-project-id",
5758
MetricPrefix: "custom.gcp.bigquery.stats",
5859
MetricTags: []string{"env:prod"},
5960
MetricInterval: 2 * time.Minute,
60-
Profiling: true,
61+
Profiler: Profiler{true, 6060},
62+
HealthCheck: HealthCheck{false, 8080},
6163
}, false},
6264
{"mixture of sources", setup([]string{"DATADOG_API_KEY=abc123", "GCP_PROJECT_ID=my-project-id"}, []string{"--metric-prefix=custom.gcp.bigquery.stats", "--metric-tags=env:prod", "--metric-interval=2m"}, ""), args{"bqmetricstest"}, &Config{
6365
DatadogAPIKey: "abc123",
6466
GcpProject: "my-project-id",
6567
MetricPrefix: "custom.gcp.bigquery.stats",
6668
MetricTags: []string{"env:prod"},
6769
MetricInterval: 2 * time.Minute,
68-
Profiling: false,
70+
Profiler: Profiler{false, 6060},
71+
HealthCheck: HealthCheck{false, 8080},
6972
}, false},
7073
{"minimum required config", setup([]string{"DATADOG_API_KEY=abc123", "GCP_PROJECT_ID=my-project-id"}, nil, ""), args{"bqmetricstest"}, &Config{
7174
DatadogAPIKey: "abc123",
7275
GcpProject: "my-project-id",
7376
MetricPrefix: DefaultMetricPrefix,
7477
MetricTags: nil,
7578
MetricInterval: 30 * time.Second,
76-
Profiling: false,
79+
Profiler: Profiler{false, 6060},
80+
HealthCheck: HealthCheck{false, 8080},
7781
}, false},
7882
{"default credentials", setup([]string{"DATADOG_API_KEY=abc123", "GOOGLE_APPLICATION_CREDENTIALS=/tmp/dd.key"}, nil, "{\"type\": \"service_account\", \"project_id\": \"my-project-id\"}"), args{"bqmetricstest"}, &Config{
7983
DatadogAPIKey: "abc123",
8084
GcpProject: "my-project-id",
8185
MetricPrefix: DefaultMetricPrefix,
8286
MetricTags: nil,
8387
MetricInterval: 30 * time.Second,
84-
Profiling: false,
88+
Profiler: Profiler{false, 6060},
89+
HealthCheck: HealthCheck{false, 8080},
8590
}, false},
8691
{"unreadable key file", setup([]string{"DATADOG_API_KEY_FILE=/tmp/not-found.key", "GCP_PROJECT_ID=my-project-id"}, nil, "abc123"), args{"bqmetricstest"}, nil, true},
8792
{"missing key", setup([]string{"GCP_PROJECT_ID=my-project-id"}, nil, ""), args{"bqmetricstest"}, nil, true},
@@ -113,7 +118,7 @@ func TestNewConfig_configFile(t *testing.T) {
113118
_ = os.Remove(n)
114119
}()
115120

116-
data := []byte("{\"datadog-api-key\": \"abc123\", \"dataset-filter\": \"bqmetrics:enabled\", \"gcp-project-id\": \"my-project-id\", \"metric-prefix\": \"custom.gcp.bigquery.stats\", \"metric-tags\": \"env:prod,team:my-team\", \"metric-interval\": \"2m\"}")
121+
data := []byte("{\"datadog-api-key\": \"abc123\", \"dataset-filter\": \"bqmetrics:enabled\", \"gcp-project-id\": \"my-project-id\", \"metric-prefix\": \"custom.gcp.bigquery.stats\", \"metric-tags\": \"env:prod,team:my-team\", \"metric-interval\": \"2m\", \"healthcheck\": {\"enabled\": true, \"port\": 8081}}")
117122
if _, err = f.Write(data); err != nil {
118123
t.Fatalf("error when writing test config file: %s", err)
119124
}
@@ -126,7 +131,8 @@ func TestNewConfig_configFile(t *testing.T) {
126131
MetricPrefix: "custom.gcp.bigquery.stats",
127132
MetricTags: []string{"env:prod", "team:my-team"},
128133
MetricInterval: 2 * time.Minute,
129-
Profiling: false,
134+
Profiler: Profiler{false, 6060},
135+
HealthCheck: HealthCheck{true, 8081},
130136
}
131137

132138
got, err := NewConfig("bqmetricstest")
@@ -186,13 +192,14 @@ func TestNewConfig_configFileWithCustomQueries(t *testing.T) {
186192
MetricPrefix: "custom.gcp.bigquery.stats",
187193
MetricTags: []string{"env:prod", "team:my-team"},
188194
MetricInterval: 2 * time.Minute,
189-
Profiling: false,
195+
Profiler: Profiler{false, 6060},
190196
CustomMetrics: []CustomMetric{{
191197
MetricName: "my_metric",
192198
MetricTags: []string{"table_id:table"},
193199
MetricInterval: 2 * time.Minute,
194200
SQL: "SELECT COUNT(DISTINCT *) FROM `table`",
195201
}},
202+
HealthCheck: HealthCheck{false, 8080},
196203
}
197204

198205
got, err := NewConfig("bqmetricstest")
@@ -262,7 +269,7 @@ func TestValidateConfig(t *testing.T) {
262269
MetricPrefix: "custom.gcp.bigquery.stats",
263270
MetricTags: []string{"env:prod"},
264271
MetricInterval: time.Duration(30000),
265-
Profiling: false,
272+
Profiler: Profiler{false, 6060},
266273
}}, false},
267274
{"custom metrics okay", args{&Config{
268275
DatadogAPIKey: "abc123",
@@ -276,6 +283,7 @@ func TestValidateConfig(t *testing.T) {
276283
MetricInterval: time.Duration(36000000),
277284
SQL: "SELECT COUNT(DISTINCT `my-column`) FROM `my-dataset.my-table`",
278285
}},
286+
HealthCheck: HealthCheck{false, 8080},
279287
}}, false},
280288
{"custom metrics missing name", args{&Config{
281289
DatadogAPIKey: "abc123",
@@ -299,6 +307,22 @@ func TestValidateConfig(t *testing.T) {
299307
MetricInterval: time.Duration(36000000),
300308
}},
301309
}}, true},
310+
{"health check disabled", args{&Config{
311+
DatadogAPIKey: "abc123",
312+
GcpProject: "my-project-id",
313+
MetricPrefix: "custom.gcp.bigquery.stats",
314+
MetricTags: []string{"env:prod"},
315+
MetricInterval: time.Duration(30000),
316+
HealthCheck: HealthCheck{false, 8080},
317+
}}, false},
318+
{"health check port invalid", args{&Config{
319+
DatadogAPIKey: "abc123",
320+
GcpProject: "my-project-id",
321+
MetricPrefix: "custom.gcp.bigquery.stats",
322+
MetricTags: []string{"env:prod"},
323+
MetricInterval: time.Duration(30000),
324+
HealthCheck: HealthCheck{true, -8080},
325+
}}, true},
302326
}
303327
for _, tt := range tests {
304328
t.Run(tt.name, func(t *testing.T) {

pkg/config/errors.go

+3
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,7 @@ var (
2020

2121
// ErrMissingCustomMetricSQL is the error returned when a CustomMetric is missing SQL
2222
ErrMissingCustomMetricSQL = errors.New("no custom metric sql query configured")
23+
24+
// ErrInvalidPort is the error returned when an invalid port is specified
25+
ErrInvalidPort = errors.New("invalid port specified")
2326
)

pkg/health/http.go

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package health
2+
3+
import (
4+
"encoding/json"
5+
"github.com/rs/zerolog/log"
6+
"net/http"
7+
)
8+
9+
// Status is the health status of the service
10+
type Status string
11+
12+
const (
13+
// Ok status means that the service is operating nominally
14+
Ok = Status("OK")
15+
16+
// Error status means that something is going wrong with the service
17+
Error = Status("Error")
18+
)
19+
20+
// ServiceStatus holds information about the health of the bqmetricsd service
21+
type ServiceStatus struct {
22+
Status Status `json:"status"`
23+
}
24+
25+
// Handler will handle HTTP requests to the health endpoint
26+
func (hs ServiceStatus) Handler(w http.ResponseWriter, _ *http.Request) {
27+
data, err := json.Marshal(hs)
28+
if err != nil {
29+
http.Error(w, err.Error(), http.StatusInternalServerError)
30+
return
31+
}
32+
33+
w.Header().Set("Content-Type", "application/json")
34+
switch hs.Status {
35+
case Ok:
36+
w.WriteHeader(http.StatusOK)
37+
default:
38+
w.WriteHeader(http.StatusInternalServerError)
39+
}
40+
41+
if _, err = w.Write(data); err != nil {
42+
log.Err(err).Msg("error when writing health http response")
43+
}
44+
}

pkg/health/http_test.go

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package health
2+
3+
import (
4+
"net/http"
5+
"net/http/httptest"
6+
"testing"
7+
)
8+
9+
func TestServiceStatus_Handler(t *testing.T) {
10+
type fields struct {
11+
Status Status
12+
}
13+
type want struct {
14+
status int
15+
body string
16+
}
17+
tests := []struct {
18+
name string
19+
fields fields
20+
want want
21+
}{
22+
{"health ok", fields{Status: Ok}, want{200, "{\"status\":\"OK\"}"}},
23+
{"health fail", fields{Status: Error}, want{500, "{\"status\":\"Error\"}"}},
24+
}
25+
for _, tt := range tests {
26+
t.Run(tt.name, func(t *testing.T) {
27+
hs := ServiceStatus{
28+
Status: tt.fields.Status,
29+
}
30+
31+
req, err := http.NewRequest("GET", "/health", nil)
32+
if err != nil {
33+
t.Fatal(err)
34+
}
35+
36+
rr := httptest.NewRecorder()
37+
handler := http.HandlerFunc(hs.Handler)
38+
39+
handler.ServeHTTP(rr, req)
40+
41+
if rr.Code != tt.want.status {
42+
t.Errorf("handler returned wrong status code: got %v want %v",
43+
rr.Code, tt.want.status)
44+
}
45+
46+
if rr.Body.String() != tt.want.body {
47+
t.Errorf("handler returned unexpected body: got %v want %v",
48+
rr.Body.String(), tt.want.body)
49+
}
50+
})
51+
}
52+
}

0 commit comments

Comments
 (0)