Skip to content

Commit ec80d3d

Browse files
authored
[BREAKING] feat(metrics): fix and update metrics in badger (#1948)
Our current metrics are outdated, and some of them are not being invoked at all right now. Added new metrics, and fixed old ones. Note that this PR removes all the old metrics (because of the prefix change) and introduces new improved metrics.
1 parent da1dcac commit ec80d3d

File tree

7 files changed

+330
-50
lines changed

7 files changed

+330
-50
lines changed

db.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,7 @@ func (db *DB) get(key []byte) (y.ValueStruct, error) {
775775
}
776776
// Found the required version of the key, return immediately.
777777
if vs.Version == version {
778+
y.NumGetsWithResultsAdd(db.opt.MetricsEnabled, 1)
778779
return vs, nil
779780
}
780781
if maxVs.Version < vs.Version {
@@ -898,6 +899,7 @@ func (db *DB) sendToWriteCh(entries []*Entry) (*request, error) {
898899
size += e.estimateSizeAndSetThreshold(db.valueThreshold())
899900
count++
900901
}
902+
y.NumBytesWrittenUserAdd(db.opt.MetricsEnabled, int64(size))
901903
if count >= db.opt.maxBatchCount || size >= db.opt.maxBatchSize {
902904
return nil, ErrTxnTooBig
903905
}

iterator.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,8 @@ func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator {
475475
panic(ErrDBClosed)
476476
}
477477

478+
y.NumIteratorsCreatedAdd(txn.db.opt.MetricsEnabled, 1)
479+
478480
// Keep track of the number of active iterators.
479481
txn.numIterators.Add(1)
480482

levels.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,22 @@ func (s *levelsController) runCompactDef(id, l int, cd compactDef) (err error) {
14391439
return err
14401440
}
14411441

1442+
getSizes := func(tables []*table.Table) int64 {
1443+
size := int64(0)
1444+
for _, i := range tables {
1445+
size += i.Size()
1446+
}
1447+
return size
1448+
}
1449+
1450+
sizeNewTables := int64(0)
1451+
sizeOldTables := int64(0)
1452+
if s.kv.opt.MetricsEnabled {
1453+
sizeNewTables = getSizes(newTables)
1454+
sizeOldTables = getSizes(cd.bot) + getSizes(cd.top)
1455+
y.NumBytesCompactionWrittenAdd(s.kv.opt.MetricsEnabled, nextLevel.strLevel, sizeNewTables)
1456+
}
1457+
14421458
// See comment earlier in this function about the ordering of these ops, and the order in which
14431459
// we access levels when reading.
14441460
if err := nextLevel.replaceTables(cd.bot, newTables); err != nil {
@@ -1459,16 +1475,16 @@ func (s *levelsController) runCompactDef(id, l int, cd compactDef) (err error) {
14591475
expensive = " [E]"
14601476
}
14611477
s.kv.opt.Infof("[%d]%s LOG Compact %d->%d (%d, %d -> %d tables with %d splits)."+
1462-
" [%s] -> [%s], took %v\n",
1478+
" [%s] -> [%s], took %v\n, deleted %d bytes",
14631479
id, expensive, thisLevel.level, nextLevel.level, len(cd.top), len(cd.bot),
14641480
len(newTables), len(cd.splits), strings.Join(from, " "), strings.Join(to, " "),
1465-
dur.Round(time.Millisecond))
1481+
dur.Round(time.Millisecond), sizeOldTables-sizeNewTables)
14661482
}
14671483

14681484
if cd.thisLevel.level != 0 && len(newTables) > 2*s.kv.opt.LevelSizeMultiplier {
1469-
s.kv.opt.Debugf("This Range (numTables: %d)\nLeft:\n%s\nRight:\n%s\n",
1485+
s.kv.opt.Infof("This Range (numTables: %d)\nLeft:\n%s\nRight:\n%s\n",
14701486
len(cd.top), hex.Dump(cd.thisRange.left), hex.Dump(cd.thisRange.right))
1471-
s.kv.opt.Debugf("Next Range (numTables: %d)\nLeft:\n%s\nRight:\n%s\n",
1487+
s.kv.opt.Infof("Next Range (numTables: %d)\nLeft:\n%s\nRight:\n%s\n",
14721488
len(cd.bot), hex.Dump(cd.nextRange.left), hex.Dump(cd.nextRange.right))
14731489
}
14741490
return nil
@@ -1598,13 +1614,17 @@ func (s *levelsController) get(key []byte, maxVs y.ValueStruct, startLevel int)
15981614
if vs.Value == nil && vs.Meta == 0 {
15991615
continue
16001616
}
1617+
y.NumBytesReadsLSMAdd(s.kv.opt.MetricsEnabled, int64(len(vs.Value)))
16011618
if vs.Version == version {
16021619
return vs, nil
16031620
}
16041621
if maxVs.Version < vs.Version {
16051622
maxVs = vs
16061623
}
16071624
}
1625+
if len(maxVs.Value) > 0 {
1626+
y.NumGetsWithResultsAdd(s.kv.opt.MetricsEnabled, 1)
1627+
}
16081628
return maxVs, nil
16091629
}
16101630

memtable.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ func (mt *memTable) Put(key []byte, value y.ValueStruct) error {
207207
if ts := y.ParseTs(entry.Key); ts > mt.maxVersion {
208208
mt.maxVersion = ts
209209
}
210+
y.NumBytesWrittenToL0Add(mt.opt.MetricsEnabled, entry.estimateSizeAndSetThreshold(mt.opt.ValueThreshold))
210211
return nil
211212
}
212213

@@ -388,7 +389,6 @@ func (lf *logFile) encryptionEnabled() bool {
388389

389390
// Acquire lock on mmap/file if you are calling this
390391
func (lf *logFile) read(p valuePointer) (buf []byte, err error) {
391-
var nbr int64
392392
offset := p.Offset
393393
// Do not convert size to uint32, because the lf.Data can be of size
394394
// 4GB, which overflows the uint32 during conversion to make the size 0,
@@ -404,10 +404,7 @@ func (lf *logFile) read(p valuePointer) (buf []byte, err error) {
404404
err = y.ErrEOF
405405
} else {
406406
buf = lf.Data[offset : offset+valsz]
407-
nbr = int64(valsz)
408407
}
409-
y.NumReadsAdd(lf.opt.MetricsEnabled, 1)
410-
y.NumBytesReadAdd(lf.opt.MetricsEnabled, nbr)
411408
return buf, err
412409
}
413410

metrics_test.go

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
/*
2+
* Copyright 2017 Dgraph Labs, Inc. and Contributors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package badger
18+
19+
import (
20+
"expvar"
21+
"math/rand"
22+
"testing"
23+
24+
"github.com/stretchr/testify/require"
25+
)
26+
27+
func clearAllMetrics() {
28+
expvar.Do(func(kv expvar.KeyValue) {
29+
// Reset the value of each expvar variable based on its type
30+
switch v := kv.Value.(type) {
31+
case *expvar.Int:
32+
v.Set(0)
33+
case *expvar.Float:
34+
v.Set(0)
35+
case *expvar.Map:
36+
v.Init()
37+
case *expvar.String:
38+
v.Set("")
39+
}
40+
})
41+
}
42+
43+
func TestWriteMetrics(t *testing.T) {
44+
opt := getTestOptions("")
45+
opt.managedTxns = true
46+
opt.CompactL0OnClose = true
47+
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
48+
clearAllMetrics()
49+
num := 10
50+
val := make([]byte, 1<<12)
51+
key := make([]byte, 40)
52+
for i := 0; i < num; i++ {
53+
_, err := rand.Read(key)
54+
require.NoError(t, err)
55+
_, err = rand.Read(val)
56+
require.NoError(t, err)
57+
58+
writer := db.NewManagedWriteBatch()
59+
require.NoError(t, writer.SetEntryAt(NewEntry(key, val), 1))
60+
writer.Flush()
61+
}
62+
63+
expectedSize := int64(len(val)) + 48 + 2 // 48 := size of key (40 + 8(ts)), 2 := meta
64+
write_metric := expvar.Get("badger_v4_write_bytes_user")
65+
require.Equal(t, expectedSize*int64(num), write_metric.(*expvar.Int).Value())
66+
67+
put_metric := expvar.Get("badger_v4_put_num_user")
68+
require.Equal(t, int64(num), put_metric.(*expvar.Int).Value())
69+
70+
lsm_metric := expvar.Get("badger_v4_write_bytes_l0")
71+
require.Equal(t, expectedSize*int64(num), lsm_metric.(*expvar.Int).Value())
72+
73+
compactionMetric := expvar.Get("badger_v4_write_bytes_compaction").(*expvar.Map)
74+
require.Equal(t, nil, compactionMetric.Get("l6"))
75+
76+
// Force compaction
77+
db.Close()
78+
79+
_, err := OpenManaged(opt)
80+
require.NoError(t, err)
81+
82+
compactionMetric = expvar.Get("badger_v4_write_bytes_compaction").(*expvar.Map)
83+
require.GreaterOrEqual(t, expectedSize*int64(num)+int64(num*200), compactionMetric.Get("l6").(*expvar.Int).Value())
84+
// Because we have random values, compression is not able to do much, so we incur a cost on total size
85+
})
86+
}
87+
88+
func TestVlogMetrics(t *testing.T) {
89+
opt := getTestOptions("")
90+
opt.managedTxns = true
91+
opt.CompactL0OnClose = true
92+
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
93+
clearAllMetrics()
94+
num := 10
95+
val := make([]byte, 1<<20) // Large Value
96+
key := make([]byte, 40)
97+
for i := 0; i < num; i++ {
98+
_, err := rand.Read(key)
99+
require.NoError(t, err)
100+
_, err = rand.Read(val)
101+
require.NoError(t, err)
102+
103+
writer := db.NewManagedWriteBatch()
104+
require.NoError(t, writer.SetEntryAt(NewEntry(key, val), 1))
105+
writer.Flush()
106+
}
107+
108+
expectedSize := int64(len(val)) + 200 // vlog expected size
109+
110+
totalWrites := expvar.Get("badger_v4_write_num_vlog")
111+
require.Equal(t, int64(num), totalWrites.(*expvar.Int).Value())
112+
113+
bytesWritten := expvar.Get("badger_v4_write_bytes_vlog")
114+
require.GreaterOrEqual(t, expectedSize*int64(num), bytesWritten.(*expvar.Int).Value())
115+
116+
txn := db.NewTransactionAt(2, false)
117+
item, err := txn.Get(key)
118+
require.NoError(t, err)
119+
require.Equal(t, uint64(1), item.Version())
120+
121+
err = item.Value(func(val []byte) error {
122+
totalReads := expvar.Get("badger_v4_read_num_vlog")
123+
bytesRead := expvar.Get("badger_v4_read_bytes_vlog")
124+
require.Equal(t, int64(1), totalReads.(*expvar.Int).Value())
125+
require.GreaterOrEqual(t, expectedSize, bytesRead.(*expvar.Int).Value())
126+
return nil
127+
})
128+
129+
require.NoError(t, err)
130+
})
131+
}
132+
133+
func TestReadMetrics(t *testing.T) {
134+
opt := getTestOptions("")
135+
opt.managedTxns = true
136+
opt.CompactL0OnClose = true
137+
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
138+
clearAllMetrics()
139+
num := 10
140+
val := make([]byte, 1<<15)
141+
keys := [][]byte{}
142+
writer := db.NewManagedWriteBatch()
143+
for i := 0; i < num; i++ {
144+
keyB := key("byte", 1)
145+
keys = append(keys, []byte(keyB))
146+
147+
_, err := rand.Read(val)
148+
require.NoError(t, err)
149+
150+
require.NoError(t, writer.SetEntryAt(NewEntry([]byte(keyB), val), 1))
151+
}
152+
writer.Flush()
153+
154+
txn := db.NewTransactionAt(2, false)
155+
item, err := txn.Get(keys[0])
156+
require.NoError(t, err)
157+
require.Equal(t, uint64(1), item.Version())
158+
159+
totalGets := expvar.Get("badger_v4_get_num_user")
160+
require.Equal(t, int64(1), totalGets.(*expvar.Int).Value())
161+
162+
totalMemtableReads := expvar.Get("badger_v4_get_num_memtable")
163+
require.Equal(t, int64(1), totalMemtableReads.(*expvar.Int).Value())
164+
165+
totalLSMGets := expvar.Get("badger_v4_get_num_lsm")
166+
require.Nil(t, totalLSMGets.(*expvar.Map).Get("l6"))
167+
168+
// Force compaction
169+
db.Close()
170+
171+
db, err = OpenManaged(opt)
172+
require.NoError(t, err)
173+
174+
txn = db.NewTransactionAt(2, false)
175+
item, err = txn.Get(keys[0])
176+
require.NoError(t, err)
177+
require.Equal(t, uint64(1), item.Version())
178+
179+
_, err = txn.Get([]byte(key("abdbyte", 1000))) // val should be far enough that bloom filter doesn't hit
180+
require.Error(t, err)
181+
182+
totalLSMGets = expvar.Get("badger_v4_get_num_lsm")
183+
require.Equal(t, int64(0x1), totalLSMGets.(*expvar.Map).Get("l6").(*expvar.Int).Value())
184+
185+
totalBloom := expvar.Get("badger_v4_hit_num_lsm_bloom_filter")
186+
require.Equal(t, int64(0x1), totalBloom.(*expvar.Map).Get("l6").(*expvar.Int).Value())
187+
require.Equal(t, int64(0x1), totalBloom.(*expvar.Map).Get("DoesNotHave_HIT").(*expvar.Int).Value())
188+
require.Equal(t, int64(0x2), totalBloom.(*expvar.Map).Get("DoesNotHave_ALL").(*expvar.Int).Value())
189+
190+
bytesLSM := expvar.Get("badger_v4_read_bytes_lsm")
191+
require.Equal(t, int64(len(val)), bytesLSM.(*expvar.Int).Value())
192+
193+
getWithResult := expvar.Get("badger_v4_get_with_result_num_user")
194+
require.Equal(t, int64(2), getWithResult.(*expvar.Int).Value())
195+
196+
iterOpts := DefaultIteratorOptions
197+
iter := txn.NewKeyIterator(keys[0], iterOpts)
198+
iter.Seek(keys[0])
199+
200+
rangeQueries := expvar.Get("badger_v4_iterator_num_user")
201+
require.Equal(t, int64(1), rangeQueries.(*expvar.Int).Value())
202+
})
203+
}

value.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -893,8 +893,8 @@ func (vlog *valueLog) write(reqs []*request) error {
893893
bytesWritten += buf.Len()
894894
// No need to flush anything, we write to file directly via mmap.
895895
}
896-
y.NumWritesAdd(vlog.opt.MetricsEnabled, int64(written))
897-
y.NumBytesWrittenAdd(vlog.opt.MetricsEnabled, int64(bytesWritten))
896+
y.NumWritesVlogAdd(vlog.opt.MetricsEnabled, int64(written))
897+
y.NumBytesWrittenVlogAdd(vlog.opt.MetricsEnabled, int64(bytesWritten))
898898

899899
vlog.numEntriesWritten += uint32(written)
900900
vlog.db.threshold.update(valueSizes)
@@ -994,6 +994,8 @@ func (vlog *valueLog) readValueBytes(vp valuePointer) ([]byte, *logFile, error)
994994
}
995995

996996
buf, err := lf.read(vp)
997+
y.NumReadsVlogAdd(vlog.db.opt.MetricsEnabled, 1)
998+
y.NumBytesReadsVlogAdd(vlog.db.opt.MetricsEnabled, int64(len(buf)))
997999
return buf, lf, err
9981000
}
9991001

0 commit comments

Comments
 (0)