Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for opensearch searchable snapshot metrics #860

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ elasticsearch_exporter
*-stamp
.tarballs
/vendor
.idea
124 changes: 123 additions & 1 deletion collector/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func getRoles(node NodeStatsNodeResponse) map[string]bool {
"data_content": false,
"ml": false,
"remote_cluster_client": false,
"search": false,
"transform": false,
"ingest": false,
"client": true,
Expand Down Expand Up @@ -92,7 +93,7 @@ func createRoleMetric(role string) *nodeMetric {
}

var (
defaultNodeLabels = []string{"cluster", "host", "name", "es_master_node", "es_data_node", "es_ingest_node", "es_client_node"}
defaultNodeLabels = []string{"cluster", "host", "name", "es_master_node", "es_data_node", "es_ingest_node", "es_client_node", "es_search_node"}
defaultRoleLabels = []string{"cluster", "host", "name"}
defaultThreadPoolLabels = append(defaultNodeLabels, "type")
defaultBreakerLabels = append(defaultNodeLabels, "breaker")
Expand All @@ -110,6 +111,7 @@ var (
fmt.Sprintf("%t", roles["data"]),
fmt.Sprintf("%t", roles["ingest"]),
fmt.Sprintf("%t", roles["client"]),
fmt.Sprintf("%t", roles["search"]),
}
}
defaultThreadPoolLabelValues = func(cluster string, node NodeStatsNodeResponse, pool string) []string {
Expand Down Expand Up @@ -171,6 +173,13 @@ type filesystemIODeviceMetric struct {
Labels func(cluster string, node NodeStatsNodeResponse, device string) []string
}

type fileCacheMetric struct {
Type prometheus.ValueType
Desc *prometheus.Desc
Value func(fileCacheStats NodeStatsFileCacheResponse) float64
Labels func(cluster string, node NodeStatsNodeResponse) []string
}

// Nodes information struct
type Nodes struct {
logger log.Logger
Expand All @@ -188,6 +197,7 @@ type Nodes struct {
threadPoolMetrics []*threadPoolMetric
filesystemDataMetrics []*filesystemDataMetric
filesystemIODeviceMetrics []*filesystemIODeviceMetric
fileCacheMetrics []*fileCacheMetric
}

// NewNodes defines Nodes Prometheus metrics
Expand Down Expand Up @@ -1781,6 +1791,104 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no
Labels: defaultFilesystemIODeviceLabelValues,
},
},
fileCacheMetrics: []*fileCacheMetric{
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "active_in_bytes"),
"file_cache active memory in bytes",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.ActiveInBytes)
},
Labels: defaultNodeLabelValues,
},
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "total_in_bytes"),
"file_cache total memory in bytes",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.TotalInBytes)
},
Labels: defaultNodeLabelValues,
},
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "used_in_bytes"),
"file_cache used memory in bytes",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.UsedInBytes)
},
Labels: defaultNodeLabelValues,
},
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "evictions_in_bytes"),
"file_cache evicted memory in bytes",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.EvictionsInBytes)
},
Labels: defaultNodeLabelValues,
},
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "active_percent"),
"file_cache active memory as percent",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.ActivePercent)
},
Labels: defaultNodeLabelValues,
},
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "used_percent"),
"file_cache used memory as percent",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.UsedPercent)
},
Labels: defaultNodeLabelValues,
},
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "hit_count"),
"file_cache hit count",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.HitCount)
},
Labels: defaultNodeLabelValues,
},
{
Type: prometheus.GaugeValue,
Desc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "filecache", "miss_count"),
"file_cache miss count",
defaultNodeLabels, nil,
),
Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 {
return float64(fileCacheStats.MissCount)
},
Labels: defaultNodeLabelValues,
},
},
}
}

Expand All @@ -1801,6 +1909,9 @@ func (c *Nodes) Describe(ch chan<- *prometheus.Desc) {
for _, metric := range c.filesystemIODeviceMetrics {
ch <- metric.Desc
}
for _, metric := range c.fileCacheMetrics {
ch <- metric.Desc
}
ch <- c.up.Desc()
ch <- c.totalScrapes.Desc()
ch <- c.jsonParseFailures.Desc()
Expand Down Expand Up @@ -1955,5 +2066,16 @@ func (c *Nodes) Collect(ch chan<- prometheus.Metric) {
}
}

// File cache Stats
for _, metric := range c.fileCacheMetrics {
ch <- prometheus.MustNewConstMetric(
metric.Desc,
metric.Type,
metric.Value(node.FileCache),
metric.Labels(nodeStatsResp.ClusterName, node)...,
)
}

}

}
13 changes: 13 additions & 0 deletions collector/nodes_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type NodeStatsNodeResponse struct {
HTTP map[string]interface{} `json:"http"`
Transport NodeStatsTransportResponse `json:"transport"`
Process NodeStatsProcessResponse `json:"process"`
FileCache NodeStatsFileCacheResponse `json:"file_cache"`
}

// NodeStatsBreakersResponse is a representation of a statistics about the field data circuit breaker
Expand Down Expand Up @@ -384,3 +385,15 @@ type ClusterHealthResponse struct {
TimedOut bool `json:"timed_out"`
UnassignedShards int64 `json:"unassigned_shards"`
}

// NodeStatsFileCacheResponse is a representation of OpenSearch Searchable Snapshots File_cache
type NodeStatsFileCacheResponse struct {
ActiveInBytes int64 `json:"active_in_bytes"`
TotalInBytes int64 `json:"total_in_bytes"`
UsedInBytes int64 `json:"used_in_bytes"`
EvictionsInBytes int64 `json:"evictions_in_bytes"`
ActivePercent int64 `json:"active_percent"`
UsedPercent int64 `json:"used_percent"`
HitCount int64 `json:"hit_count"`
MissCount int64 `json:"miss_count"`
}