Skip to content

Commit aac4c59

Browse files
author
Nikolay Mokrinsky (ML)
committed
Index lifecycle management metrics
Signed-off-by: Nikolay Mokrinsky (ML) <[email protected]>
1 parent 3d6277d commit aac4c59

File tree

4 files changed

+252
-0
lines changed

4 files changed

+252
-0
lines changed

collector/ilm.go

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"encoding/json"
18+
"fmt"
19+
"net/http"
20+
"net/url"
21+
"path"
22+
23+
"github.com/go-kit/kit/log"
24+
"github.com/go-kit/kit/log/level"
25+
"github.com/prometheus/client_golang/prometheus"
26+
)
27+
28+
type ilmMetric struct {
29+
Type prometheus.ValueType
30+
Desc *prometheus.Desc
31+
Value func(val int) float64
32+
Labels func(ilmIndex string, ilmPhase string, ilmAction string, ilmStep string) []string
33+
}
34+
35+
// Index Lifecycle Management information object
36+
type Ilm struct {
37+
logger log.Logger
38+
client *http.Client
39+
url *url.URL
40+
41+
up prometheus.Gauge
42+
totalScrapes prometheus.Counter
43+
jsonParseFailures prometheus.Counter
44+
45+
ilmMetric ilmMetric
46+
}
47+
48+
// NewIlm defines Index Lifecycle Management Prometheus metrics
49+
func NewIlm(logger log.Logger, client *http.Client, url *url.URL) *Ilm {
50+
return &Ilm{
51+
logger: logger,
52+
client: client,
53+
url: url,
54+
55+
up: prometheus.NewGauge(prometheus.GaugeOpts{
56+
Name: prometheus.BuildFQName(namespace, "ilm", "up"),
57+
Help: "Was the last scrape of the ElasticSearch ILM endpoint successful.",
58+
}),
59+
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
60+
Name: prometheus.BuildFQName(namespace, "ilm", "total_scrapes"),
61+
Help: "Current total ElasticSearch ILM scrapes.",
62+
}),
63+
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
64+
Name: prometheus.BuildFQName(namespace, "ilm", "json_parse_failures"),
65+
Help: "Number of errors while parsing JSON.",
66+
}),
67+
ilmMetric: ilmMetric{
68+
Type: prometheus.GaugeValue,
69+
Desc: prometheus.NewDesc(
70+
prometheus.BuildFQName(namespace, "ilm", "index_status"),
71+
"Status of ILM policy for index",
72+
[]string{"index", "phase", "action", "step"}, nil),
73+
Value: func(val int) float64 {
74+
return float64(val)
75+
},
76+
Labels: func(ilmIndex string, ilmPhase string, ilmAction string, ilmStep string) []string {
77+
return []string{ilmIndex, ilmPhase, ilmAction, ilmStep}
78+
},
79+
},
80+
}
81+
}
82+
83+
// Describe adds metrics description
84+
func (i *Ilm) Describe(ch chan<- *prometheus.Desc) {
85+
ch <- i.ilmMetric.Desc
86+
ch <- i.up.Desc()
87+
ch <- i.totalScrapes.Desc()
88+
ch <- i.jsonParseFailures.Desc()
89+
}
90+
91+
// Bool2int translates boolean variable to its integer alternative
92+
func (i *Ilm) Bool2int(managed bool) int {
93+
if managed {
94+
return 1
95+
} else {
96+
return 0
97+
}
98+
}
99+
100+
func (i *Ilm) fetchAndDecodeIlm() (IlmResponse, error) {
101+
var ir IlmResponse
102+
103+
u := *i.url
104+
u.Path = path.Join(u.Path, "/_all/_ilm/explain")
105+
106+
res, err := i.client.Get(u.String())
107+
if err != nil {
108+
return ir, fmt.Errorf("failed to get index stats from %s://%s:%s%s: %s",
109+
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
110+
}
111+
112+
defer func() {
113+
err = res.Body.Close()
114+
if err != nil {
115+
_ = level.Warn(i.logger).Log(
116+
"msg", "failed to close http.Client",
117+
"err", err,
118+
)
119+
}
120+
}()
121+
122+
if res.StatusCode != http.StatusOK {
123+
return ir, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
124+
}
125+
126+
if err := json.NewDecoder(res.Body).Decode(&ir); err != nil {
127+
i.jsonParseFailures.Inc()
128+
return ir, err
129+
}
130+
131+
return ir, nil
132+
}
133+
134+
// Collect pulls metric values from Elasticsearch
135+
func (i *Ilm) Collect(ch chan<- prometheus.Metric) {
136+
defer func() {
137+
ch <- i.up
138+
ch <- i.totalScrapes
139+
ch <- i.jsonParseFailures
140+
}()
141+
142+
// indices
143+
ilmResp, err := i.fetchAndDecodeIlm()
144+
if err != nil {
145+
i.up.Set(0)
146+
_ = level.Warn(i.logger).Log(
147+
"msg", "failed to fetch and decode ILM stats",
148+
"err", err,
149+
)
150+
return
151+
}
152+
i.totalScrapes.Inc()
153+
i.up.Set(1)
154+
155+
for indexName, indexIlm := range ilmResp.Indices {
156+
ch <- prometheus.MustNewConstMetric(
157+
i.ilmMetric.Desc,
158+
i.ilmMetric.Type,
159+
i.ilmMetric.Value(i.Bool2int(indexIlm.Managed)),
160+
i.ilmMetric.Labels(indexName, indexIlm.Phase, indexIlm.Action, indexIlm.Step)...,
161+
)
162+
}
163+
}

collector/ilm_response.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
type IlmResponse struct {
17+
Indices map[string]IlmIndexResponse `json:"indices"`
18+
}
19+
20+
type IlmIndexResponse struct {
21+
Index string `json:"index"`
22+
Managed bool `json:"managed"`
23+
Phase string `json:"phase"`
24+
Action string `json:"action"`
25+
Step string `json:"step"`
26+
}

collector/ilm_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"fmt"
18+
"github.com/go-kit/kit/log"
19+
"net/http"
20+
"net/http/httptest"
21+
"net/url"
22+
"testing"
23+
)
24+
25+
func TestIlm(t *testing.T) {
26+
ti := map[string]string{
27+
"7.3.2": `{"indices":{"foo_1":{"index":"foo_1","managed":true,"policy":"foo_policy","lifecycle_date_millis":1575630854324,"phase":"hot","phase_time_millis":1575605054674,"action":"complete","action_time_millis":1575630855862,"step":"complete","step_time_millis":1575630855862,"phase_execution":{"policy":"foo_policy","phase_definition":{"min_age":"0ms","actions":{"rollover":{"max_size":"15gb","max_age":"1d"},"set_priority":{"priority":100}}},"version":7,"modified_date_in_millis":1573070716617}}}}`,
28+
}
29+
for ver, out := range ti {
30+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
31+
fmt.Fprintln(w, out)
32+
}))
33+
defer ts.Close()
34+
35+
u, err := url.Parse(ts.URL)
36+
if err != nil {
37+
t.Fatalf("Failed to parse URL: %s", err)
38+
}
39+
i := NewIlm(log.NewNopLogger(), http.DefaultClient, u)
40+
ilm, err := i.fetchAndDecodeIlm()
41+
if err != nil {
42+
t.Fatalf("Failed to fetch or decode ILM stats: %s", err)
43+
}
44+
t.Logf("[%s] ILM Response: %+v", ver, ilm)
45+
for ilmIndex, ilmStats := range ilm.Indices {
46+
t.Logf(
47+
"Index: %s - Managed: %t - Action: %s - Phase: %s - Step: %s",
48+
ilmIndex,
49+
ilmStats.Managed,
50+
ilmStats.Action,
51+
ilmStats.Phase,
52+
ilmStats.Step,
53+
)
54+
}
55+
}
56+
}

main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ func main() {
7070
esExportSnapshots = kingpin.Flag("es.snapshots",
7171
"Export stats for the cluster snapshots.").
7272
Default("false").Envar("ES_SNAPSHOTS").Bool()
73+
esExportIlm = kingpin.Flag("es.ilm",
74+
"Export stats for Index Lifecycle Management").
75+
Default("false").Envar("ES_ILM").Bool()
7376
esClusterInfoInterval = kingpin.Flag("es.clusterinfo.interval",
7477
"Cluster info update interval for the cluster label").
7578
Default("5m").Envar("ES_CLUSTERINFO_INTERVAL").Duration()
@@ -157,6 +160,10 @@ func main() {
157160
prometheus.MustRegister(collector.NewIndicesMappings(logger, httpClient, esURL))
158161
}
159162

163+
if *esExportIlm {
164+
prometheus.MustRegister(collector.NewIlm(logger, httpClient, esURL))
165+
}
166+
160167
// create a http server
161168
server := &http.Server{}
162169

0 commit comments

Comments
 (0)