diff --git a/README.md b/README.md index e02c2b18..be7555be 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,9 @@ Further Information | elasticsearch_indices_translog_size_in_bytes | counter | 1 | Total translog size in bytes | elasticsearch_indices_warmer_time_seconds_total | counter | 1 | Total warmer time in seconds | elasticsearch_indices_warmer_total | counter | 1 | Total warmer count +| elasticsearch_indices_recovery_current_as_source | gauge | 1 | Number of ongoing recoveries for which a shard serves as a source +| elasticsearch_indices_recovery_current_as_target | gauge | 1 | Number of ongoing recoveries for which a shard serves as a target +| elasticsearch_indices_recovery_throttle_time_seconds_total | counter | 1 | Time in seconds recovery operations were delayed due to throttling | elasticsearch_jvm_gc_collection_seconds_count | counter | 2 | Count of JVM GC runs | elasticsearch_jvm_gc_collection_seconds_sum | counter | 2 | GC run time in seconds | elasticsearch_jvm_memory_committed_bytes | gauge | 2 | JVM memory currently committed by area @@ -174,6 +177,9 @@ Further Information | elasticsearch_os_load1 | gauge | 1 | Shortterm load average | elasticsearch_os_load5 | gauge | 1 | Midterm load average | elasticsearch_os_load15 | gauge | 1 | Longterm load average +| elasticsearch_os_swap_in_bytes_used | gauge | 1 | Amount of used swap space in bytes +| elasticsearch_os_swap_in_bytes_free | gauge | 1 | Amount of free swap space in bytes +| elasticsearch_os_swap_in_bytes_total | gauge | 1 | Total amount of swap space in bytes | elasticsearch_process_cpu_percent | gauge | 1 | Percent CPU used by process | elasticsearch_process_cpu_time_seconds_sum | counter | 3 | Process CPU time in seconds | elasticsearch_process_mem_resident_size_bytes | gauge | 1 | Resident memory in use by process in bytes @@ -195,10 +201,13 @@ Further Information | elasticsearch_thread_pool_queue_count | gauge | 14 | Thread Pool operations queued | elasticsearch_thread_pool_rejected_count | counter | 14 | Thread Pool operations rejected | elasticsearch_thread_pool_threads_count | gauge | 14 | Thread Pool current threads count +| elasticsearch_transport_tcp_connections_opened_total | counter | 1 | Number of connections opened for cluster communication | elasticsearch_transport_rx_packets_total | counter | 1 | Count of packets received | elasticsearch_transport_rx_size_bytes_total | counter | 1 | Total number of bytes received | elasticsearch_transport_tx_packets_total | counter | 1 | Count of packets sent | elasticsearch_transport_tx_size_bytes_total | counter | 1 | Total number of bytes sent +| elasticsearch_http_connections_opened_current | counter | 1 | Current number of opened connections +| elasticsearch_http_connections_opened_total | counter | 1 | Total number of opened connections | elasticsearch_clusterinfo_last_retrieval_success_ts | gauge | 1 | Timestamp of the last successful cluster info retrieval | elasticsearch_clusterinfo_up | gauge | 1 | Up metric for the cluster info collector | elasticsearch_clusterinfo_version_info | gauge | 6 | Constant metric with ES version information as labels diff --git a/collector/indices.go b/collector/indices.go index 8ea2c73b..8d170e4c 100644 --- a/collector/indices.go +++ b/collector/indices.go @@ -465,6 +465,42 @@ func NewIndices(logger log.Logger, client *http.Client, url *url.URL, shards boo }, Labels: indexLabels, }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indices", "recovery_current_as_source"), + "Number of ongoing recoveries for which a shard serves as a source", + indexLabels.keys(), nil, + ), + Value: func(indexStats IndexStatsIndexResponse) float64 { + return float64(indexStats.Total.Recovery.CurrentAsSource) + }, + Labels: indexLabels, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indices", "recovery_current_as_target"), + "Number of ongoing recoveries for which a shard serves as a target", + indexLabels.keys(), nil, + ), + Value: func(indexStats IndexStatsIndexResponse) float64 { + return float64(indexStats.Total.Recovery.CurrentAsTarget) + }, + Labels: indexLabels, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "indices", "recovery_throttle_time_seconds_total"), + "Time in seconds recovery operations were delayed due to throttling", + indexLabels.keys(), nil, + ), + Value: func(indexStats IndexStatsIndexResponse) float64 { + return float64(indexStats.Total.Recovery.ThrottleTimeInMillis) / 1000 + }, + Labels: indexLabels, + }, { Type: prometheus.CounterValue, Desc: prometheus.NewDesc( diff --git a/collector/nodes.go b/collector/nodes.go index 43ffa0e8..dd4f4296 100644 --- a/collector/nodes.go +++ b/collector/nodes.go @@ -42,7 +42,7 @@ func getRoles(node NodeStatsNodeResponse) map[string]bool { } } } - if len(node.HTTP) == 0 { + if node.HTTP == nil { roles["client"] = false } return roles @@ -287,6 +287,42 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no }, Labels: defaultNodeLabelValues, }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "os", "swap_in_bytes_used"), + "Amount of used swap space in bytes", + defaultNodeLabels, nil, + ), + Value: func(node NodeStatsNodeResponse) float64 { + return float64(node.OS.Swap.Used) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "os", "swap_in_bytes_free"), + "Amount of free swap space in bytes", + defaultNodeLabels, nil, + ), + Value: func(node NodeStatsNodeResponse) float64 { + return float64(node.OS.Swap.Free) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "os", "swap_in_bytes_total"), + "Total amount of swap space in bytes", + defaultNodeLabels, nil, + ), + Value: func(node NodeStatsNodeResponse) float64 { + return float64(node.OS.Swap.Total) + }, + Labels: defaultNodeLabelValues, + }, { Type: prometheus.GaugeValue, Desc: prometheus.NewDesc( @@ -1462,6 +1498,18 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no return append(defaultNodeLabelValues(cluster, node), "user") }, }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "transport", "tcp_connections_opened_total"), + "Number of connections opened for cluster communication", + defaultNodeLabels, nil, + ), + Value: func(node NodeStatsNodeResponse) float64 { + return float64(node.Transport.ServerOpen) + }, + Labels: defaultNodeLabelValues, + }, { Type: prometheus.CounterValue, Desc: prometheus.NewDesc( @@ -1510,6 +1558,30 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no }, Labels: defaultNodeLabelValues, }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "http", "connections_opened_current"), + "Current number of opened connections", + defaultNodeLabels, nil, + ), + Value: func(node NodeStatsNodeResponse) float64 { + return float64(node.HTTP.CurrentOpen) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.CounterValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "http", "connections_opened_total"), + "Total number of opened connections", + defaultNodeLabels, nil, + ), + Value: func(node NodeStatsNodeResponse) float64 { + return float64(node.HTTP.TotalOpen) + }, + Labels: defaultNodeLabelValues, + }, }, gcCollectionMetrics: []*gcCollectionMetric{ { diff --git a/collector/nodes_response.go b/collector/nodes_response.go index 636dc1d3..bd8f4bbc 100644 --- a/collector/nodes_response.go +++ b/collector/nodes_response.go @@ -24,7 +24,7 @@ type NodeStatsNodeResponse struct { ThreadPool map[string]NodeStatsThreadPoolPoolResponse `json:"thread_pool"` JVM NodeStatsJVMResponse `json:"jvm"` Breakers map[string]NodeStatsBreakersResponse `json:"breakers"` - HTTP map[string]int `json:"http"` + HTTP *NodeStatsHTTPResponse `json:"http"` Transport NodeStatsTransportResponse `json:"transport"` Process NodeStatsProcessResponse `json:"process"` } @@ -277,8 +277,9 @@ type NodeStatsOSMemResponse struct { // NodeStatsOSSwapResponse defines node stats operating system swap usage structure type NodeStatsOSSwapResponse struct { - Used int64 `json:"used_in_bytes"` - Free int64 `json:"free_in_bytes"` + Used int64 `json:"used_in_bytes"` + Free int64 `json:"free_in_bytes"` + Total int64 `json:"total_in_bytes"` } // NodeStatsOSCPUResponse defines node stats operating system CPU usage structure @@ -325,7 +326,7 @@ type NodeStatsProcessCPUResponse struct { // NodeStatsHTTPResponse defines node stats HTTP connections structure type NodeStatsHTTPResponse struct { CurrentOpen int64 `json:"current_open"` - TotalOpen int64 `json:"total_open"` + TotalOpen int64 `json:"total_opened"` } // NodeStatsFSResponse is a representation of a file system information, data path, free disk space, read/write stats