Skip to content

Commit dca5b72

Browse files
committed
feat: expose MD raid component devices
Expose what component devices are part of a MD raid device, as well as the most common flags per-component. This will enable a future node_exporter metric showing which component of a RAID had failed. Signed-off-by: Robin H. Johnson <[email protected]> Signed-off-by: Robin H. Johnson <[email protected]>
1 parent cd2a69e commit dca5b72

File tree

2 files changed

+112
-41
lines changed

2 files changed

+112
-41
lines changed

mdstat.go

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,28 @@ var (
2727
recoveryLinePctRE = regexp.MustCompile(`= (.+)%`)
2828
recoveryLineFinishRE = regexp.MustCompile(`finish=(.+)min`)
2929
recoveryLineSpeedRE = regexp.MustCompile(`speed=(.+)[A-Z]`)
30-
componentDeviceRE = regexp.MustCompile(`(.*)\[\d+\]`)
30+
componentDeviceRE = regexp.MustCompile(`(.*)\[(\d+)\](\([SF]+\))?`)
31+
personalitiesPrefix = "Personalities : "
3132
)
3233

34+
type MDStatComponent struct {
35+
// Name of the component device.
36+
Name string
37+
// DescriptorIndex number of component device, e.g. the order in the superblock.
38+
DescriptorIndex int32
39+
// Flags per Linux drivers/md/md.[ch] as of v6.12-rc1
40+
// Subset that are exposed in mdstat
41+
WriteMostly bool
42+
Journal bool
43+
Faulty bool // "Faulty" is what kernel source uses for "(F)"
44+
Spare bool
45+
Replacement bool
46+
// Some additional flags that are NOT exposed in procfs today; they may
47+
// be available via sysfs.
48+
// In_sync, Bitmap_sync, Blocked, WriteErrorSeen, FaultRecorded,
49+
// BlockedBadBlocks, WantReplacement, Candidate, ...
50+
}
51+
3352
// MDStat holds info parsed from /proc/mdstat.
3453
type MDStat struct {
3554
// Name of the device.
@@ -60,8 +79,8 @@ type MDStat struct {
6079
BlocksSyncedFinishTime float64
6180
// current sync speed (in Kilobytes/sec)
6281
BlocksSyncedSpeed float64
63-
// Name of md component devices
64-
Devices []string
82+
// component devices
83+
Devices []MDStatComponent
6584
}
6685

6786
// MDStat parses an mdstat-file (/proc/mdstat) and returns a slice of
@@ -82,38 +101,52 @@ func (fs FS) MDStat() ([]MDStat, error) {
82101
// parseMDStat parses data from mdstat file (/proc/mdstat) and returns a slice of
83102
// structs containing the relevant info.
84103
func parseMDStat(mdStatData []byte) ([]MDStat, error) {
104+
// TODO:
105+
// - parse global hotspares from the "unused devices" line.
85106
mdStats := []MDStat{}
86107
lines := strings.Split(string(mdStatData), "\n")
108+
knownRaidTypes := make(map[string]bool)
87109

88110
for i, line := range lines {
89111
if strings.TrimSpace(line) == "" || line[0] == ' ' ||
90-
strings.HasPrefix(line, "Personalities") ||
91112
strings.HasPrefix(line, "unused") {
92113
continue
93114
}
115+
// Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
116+
if len(knownRaidTypes) == 0 && strings.HasPrefix(line, personalitiesPrefix) {
117+
personalities := strings.Fields(line[len(personalitiesPrefix):])
118+
for _, word := range personalities {
119+
word := word[1 : len(word)-1]
120+
knownRaidTypes[word] = true
121+
}
122+
continue
123+
}
94124

95125
deviceFields := strings.Fields(line)
96126
if len(deviceFields) < 3 {
97127
return nil, fmt.Errorf("%w: Expected 3+ lines, got %q", ErrFileParse, line)
98128
}
99129
mdName := deviceFields[0] // mdx
100-
state := deviceFields[2] // active or inactive
130+
state := deviceFields[2] // active, inactive, broken
101131

102-
mdType := "unknown" // raid1, raid5, etc.
132+
mdType := "unknown" // raid1, raid5, etc.
133+
var deviceStartIndex int
103134
if len(deviceFields) > 3 { // mdType may be in the 3rd or 4th field
104-
if isRaidType(deviceFields[3]) {
135+
if isRaidType(deviceFields[3], knownRaidTypes) {
105136
mdType = deviceFields[3]
106-
} else if len(deviceFields) > 4 && isRaidType(deviceFields[4]) {
137+
deviceStartIndex = 4
138+
} else if len(deviceFields) > 4 && isRaidType(deviceFields[4], knownRaidTypes) {
107139
// if the 3rd field is (...), the 4th field is the mdType
108140
mdType = deviceFields[4]
141+
deviceStartIndex = 5
109142
}
110143
}
111144

112145
if len(lines) <= i+3 {
113146
return nil, fmt.Errorf("%w: Too few lines for md device: %q", ErrFileParse, mdName)
114147
}
115148

116-
// Failed disks have the suffix (F) & Spare disks have the suffix (S).
149+
// Failed (Faulty) disks have the suffix (F) & Spare disks have the suffix (S).
117150
fail := int64(strings.Count(line, "(F)"))
118151
spare := int64(strings.Count(line, "(S)"))
119152
active, total, down, size, err := evalStatusLine(lines[i], lines[i+1])
@@ -160,6 +193,11 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
160193
}
161194
}
162195

196+
devices, err := evalComponentDevices(deviceFields[deviceStartIndex:])
197+
if err != nil {
198+
return nil, fmt.Errorf("error parsing components in md device %q: %w", mdName, err)
199+
}
200+
163201
mdStats = append(mdStats, MDStat{
164202
Name: mdName,
165203
Type: mdType,
@@ -175,7 +213,7 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
175213
BlocksSyncedPct: pct,
176214
BlocksSyncedFinishTime: finish,
177215
BlocksSyncedSpeed: speed,
178-
Devices: evalComponentDevices(deviceFields),
216+
Devices: devices,
179217
})
180218
}
181219

@@ -185,11 +223,13 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
185223
// check if a string's format is like the mdType
186224
// Rule 1: mdType should not be like (...)
187225
// Rule 2: mdType should not be like sda[0]
188-
func isRaidType(mdType string) bool {
189-
return !strings.ContainsAny(mdType, "([")
226+
func isRaidType(mdType string, knownRaidTypes map[string]bool) bool {
227+
_, ok := knownRaidTypes[mdType]
228+
return !strings.ContainsAny(mdType, "([") && ok
190229
}
191230

192231
func evalStatusLine(deviceLine, statusLine string) (active, total, down, size int64, err error) {
232+
// e.g. 523968 blocks super 1.2 [4/4] [UUUU]
193233
statusFields := strings.Fields(statusLine)
194234
if len(statusFields) < 1 {
195235
return 0, 0, 0, 0, fmt.Errorf("%w: Unexpected statusline %q: %w", ErrFileParse, statusLine, err)
@@ -280,17 +320,29 @@ func evalRecoveryLine(recoveryLine string) (blocksSynced int64, blocksToBeSynced
280320
return blocksSynced, blocksToBeSynced, pct, finish, speed, nil
281321
}
282322

283-
func evalComponentDevices(deviceFields []string) []string {
284-
mdComponentDevices := make([]string, 0)
285-
if len(deviceFields) > 3 {
286-
for _, field := range deviceFields[4:] {
287-
match := componentDeviceRE.FindStringSubmatch(field)
288-
if match == nil {
289-
continue
290-
}
291-
mdComponentDevices = append(mdComponentDevices, match[1])
323+
func evalComponentDevices(deviceFields []string) ([]MDStatComponent, error) {
324+
mdComponentDevices := make([]MDStatComponent, 0)
325+
for _, field := range deviceFields {
326+
match := componentDeviceRE.FindStringSubmatch(field)
327+
if match == nil {
328+
continue
292329
}
330+
descriptorIndex, err := strconv.ParseInt(match[2], 10, 32)
331+
if err != nil {
332+
return mdComponentDevices, fmt.Errorf("error parsing int from device %q: %w", match[2], err)
333+
}
334+
mdComponentDevices = append(mdComponentDevices, MDStatComponent{
335+
Name: match[1],
336+
DescriptorIndex: int32(descriptorIndex),
337+
// match may contain one or more of these
338+
// https://github.com/torvalds/linux/blob/7ec462100ef9142344ddbf86f2c3008b97acddbe/drivers/md/md.c#L8376-L8392
339+
Faulty: strings.Contains(match[3], "(F)"),
340+
Spare: strings.Contains(match[3], "(S)"),
341+
Journal: strings.Contains(match[3], "(J)"),
342+
Replacement: strings.Contains(match[3], "(R)"),
343+
WriteMostly: strings.Contains(match[3], "(W)"),
344+
})
293345
}
294346

295-
return mdComponentDevices
347+
return mdComponentDevices, nil
296348
}

mdstat_test.go

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ func TestFS_MDStat(t *testing.T) {
2626
if err != nil {
2727
t.Fatalf("parsing of reference-file failed entirely: %s", err)
2828
}
29+
// TODO: Test cases to capture in future:
30+
// WriteMostly devices
31+
// Journal devices
32+
// Replacement devices
33+
// Global hotspares
2934

3035
refs := map[string]MDStat{
3136
"md127": {
@@ -43,7 +48,7 @@ func TestFS_MDStat(t *testing.T) {
4348
BlocksSyncedPct: 0,
4449
BlocksSyncedFinishTime: 0,
4550
BlocksSyncedSpeed: 0,
46-
Devices: []string{"sdi2", "sdj2"}},
51+
Devices: []MDStatComponent{{Name: "sdi2", DescriptorIndex: 0}, {Name: "sdj2", DescriptorIndex: 1}}},
4752
"md0": {
4853
Name: "md0",
4954
Type: "raid1",
@@ -59,7 +64,7 @@ func TestFS_MDStat(t *testing.T) {
5964
BlocksSyncedPct: 0,
6065
BlocksSyncedFinishTime: 0,
6166
BlocksSyncedSpeed: 0,
62-
Devices: []string{"sdi1", "sdj1"}},
67+
Devices: []MDStatComponent{{Name: "sdi1", DescriptorIndex: 0}, {Name: "sdj1", DescriptorIndex: 1}}},
6368
"md4": {
6469
Name: "md4",
6570
Type: "raid1",
@@ -75,7 +80,7 @@ func TestFS_MDStat(t *testing.T) {
7580
BlocksSyncedPct: 0,
7681
BlocksSyncedFinishTime: 0,
7782
BlocksSyncedSpeed: 0,
78-
Devices: []string{"sda3", "sdb3"}},
83+
Devices: []MDStatComponent{{Name: "sda3", Faulty: true, DescriptorIndex: 0}, {Name: "sdb3", Spare: true, DescriptorIndex: 1}}},
7984
"md6": {
8085
Name: "md6",
8186
Type: "raid1",
@@ -91,7 +96,7 @@ func TestFS_MDStat(t *testing.T) {
9196
BlocksSyncedPct: 8.5,
9297
BlocksSyncedFinishTime: 17,
9398
BlocksSyncedSpeed: 259783,
94-
Devices: []string{"sdb2", "sdc", "sda2"}},
99+
Devices: []MDStatComponent{{Name: "sdb2", DescriptorIndex: 2, Faulty: true}, {Name: "sdc", DescriptorIndex: 1, Spare: true}, {Name: "sda2", DescriptorIndex: 0}}},
95100
"md3": {
96101
Name: "md3",
97102
Type: "raid6",
@@ -107,7 +112,7 @@ func TestFS_MDStat(t *testing.T) {
107112
BlocksSyncedPct: 0,
108113
BlocksSyncedFinishTime: 0,
109114
BlocksSyncedSpeed: 0,
110-
Devices: []string{"sda1", "sdh1", "sdg1", "sdf1", "sde1", "sdd1", "sdc1", "sdb1", "sdd1", "sdd2"}},
115+
Devices: []MDStatComponent{{Name: "sda1", DescriptorIndex: 8}, {Name: "sdh1", DescriptorIndex: 7}, {Name: "sdg1", DescriptorIndex: 6}, {Name: "sdf1", DescriptorIndex: 5}, {Name: "sde1", DescriptorIndex: 11}, {Name: "sdd1", DescriptorIndex: 3}, {Name: "sdc1", DescriptorIndex: 10}, {Name: "sdb1", DescriptorIndex: 9}, {Name: "sdd1", DescriptorIndex: 10, Spare: true}, {Name: "sdd2", DescriptorIndex: 11, Spare: true}}},
111116
"md8": {
112117
Name: "md8",
113118
Type: "raid1",
@@ -123,7 +128,7 @@ func TestFS_MDStat(t *testing.T) {
123128
BlocksSyncedPct: 8.5,
124129
BlocksSyncedFinishTime: 17,
125130
BlocksSyncedSpeed: 259783,
126-
Devices: []string{"sdb1", "sda1", "sdc", "sde"}},
131+
Devices: []MDStatComponent{{Name: "sdb1", DescriptorIndex: 1}, {Name: "sda1", DescriptorIndex: 0}, {Name: "sdc", DescriptorIndex: 2, Spare: true}, {Name: "sde", DescriptorIndex: 3, Spare: true}}},
127132
"md7": {
128133
Name: "md7",
129134
Type: "raid6",
@@ -139,7 +144,7 @@ func TestFS_MDStat(t *testing.T) {
139144
BlocksSyncedPct: 0,
140145
BlocksSyncedFinishTime: 0,
141146
BlocksSyncedSpeed: 0,
142-
Devices: []string{"sdb1", "sde1", "sdd1", "sdc1"}},
147+
Devices: []MDStatComponent{{Name: "sdb1", DescriptorIndex: 0}, {Name: "sde1", DescriptorIndex: 3}, {Name: "sdd1", DescriptorIndex: 2}, {Name: "sdc1", DescriptorIndex: 1, Faulty: true}}},
143148
"md9": {
144149
Name: "md9",
145150
Type: "raid1",
@@ -155,7 +160,7 @@ func TestFS_MDStat(t *testing.T) {
155160
BlocksSyncedPct: 0,
156161
BlocksSyncedFinishTime: 0,
157162
BlocksSyncedSpeed: 0,
158-
Devices: []string{"sdc2", "sdd2", "sdb2", "sda2", "sde", "sdf", "sdg"}},
163+
Devices: []MDStatComponent{{Name: "sdc2", DescriptorIndex: 2}, {Name: "sdd2", DescriptorIndex: 3}, {Name: "sdb2", DescriptorIndex: 1}, {Name: "sda2", DescriptorIndex: 0}, {Name: "sde", DescriptorIndex: 4, Faulty: true}, {Name: "sdf", DescriptorIndex: 5, Faulty: true}, {Name: "sdg", DescriptorIndex: 6, Spare: true}}},
159164
"md10": {
160165
Name: "md10",
161166
Type: "raid0",
@@ -171,7 +176,7 @@ func TestFS_MDStat(t *testing.T) {
171176
BlocksSyncedPct: 0,
172177
BlocksSyncedFinishTime: 0,
173178
BlocksSyncedSpeed: 0,
174-
Devices: []string{"sda1", "sdb1"}},
179+
Devices: []MDStatComponent{{Name: "sda1", DescriptorIndex: 0}, {Name: "sdb1", DescriptorIndex: 1}}},
175180
"md11": {
176181
Name: "md11",
177182
Type: "raid1",
@@ -187,7 +192,7 @@ func TestFS_MDStat(t *testing.T) {
187192
BlocksSyncedPct: 0,
188193
BlocksSyncedFinishTime: 0,
189194
BlocksSyncedSpeed: 0,
190-
Devices: []string{"sdb2", "sdc2", "sdc3", "hda", "ssdc2"}},
195+
Devices: []MDStatComponent{{Name: "sdb2", DescriptorIndex: 0}, {Name: "sdc2", DescriptorIndex: 1}, {Name: "sdc3", DescriptorIndex: 2, Faulty: true}, {Name: "hda", DescriptorIndex: 4, Spare: true}, {Name: "ssdc2", DescriptorIndex: 3, Spare: true}}},
191196
"md12": {
192197
Name: "md12",
193198
Type: "raid0",
@@ -203,7 +208,7 @@ func TestFS_MDStat(t *testing.T) {
203208
BlocksSyncedPct: 0,
204209
BlocksSyncedFinishTime: 0,
205210
BlocksSyncedSpeed: 0,
206-
Devices: []string{"sdc2", "sdd2"}},
211+
Devices: []MDStatComponent{{Name: "sdc2", DescriptorIndex: 0}, {Name: "sdd2", DescriptorIndex: 1}}},
207212
"md120": {
208213
Name: "md120",
209214
Type: "linear",
@@ -219,7 +224,7 @@ func TestFS_MDStat(t *testing.T) {
219224
BlocksSyncedPct: 0,
220225
BlocksSyncedFinishTime: 0,
221226
BlocksSyncedSpeed: 0,
222-
Devices: []string{"sda1", "sdb1"}},
227+
Devices: []MDStatComponent{{Name: "sda1", DescriptorIndex: 1}, {Name: "sdb1", DescriptorIndex: 0}}},
223228
"md126": {
224229
Name: "md126",
225230
Type: "raid0",
@@ -235,7 +240,7 @@ func TestFS_MDStat(t *testing.T) {
235240
BlocksSyncedPct: 0,
236241
BlocksSyncedFinishTime: 0,
237242
BlocksSyncedSpeed: 0,
238-
Devices: []string{"sdb", "sdc"}},
243+
Devices: []MDStatComponent{{Name: "sdb", DescriptorIndex: 1}, {Name: "sdc", DescriptorIndex: 0}}},
239244
"md219": {
240245
Name: "md219",
241246
Type: "unknown",
@@ -251,7 +256,7 @@ func TestFS_MDStat(t *testing.T) {
251256
BlocksSyncedPct: 0,
252257
BlocksSyncedFinishTime: 0,
253258
BlocksSyncedSpeed: 0,
254-
Devices: []string{"sdc", "sda"}},
259+
Devices: []MDStatComponent{{Name: "sdb", DescriptorIndex: 2, Spare: true}, {Name: "sdc", DescriptorIndex: 1, Spare: true}, {Name: "sda", DescriptorIndex: 0, Spare: true}}},
255260
"md00": {
256261
Name: "md00",
257262
Type: "raid0",
@@ -267,7 +272,7 @@ func TestFS_MDStat(t *testing.T) {
267272
BlocksSyncedPct: 0,
268273
BlocksSyncedFinishTime: 0,
269274
BlocksSyncedSpeed: 0,
270-
Devices: []string{"xvdb"}},
275+
Devices: []MDStatComponent{{Name: "xvdb", DescriptorIndex: 0}}},
271276
"md101": {
272277
Name: "md101",
273278
Type: "raid0",
@@ -283,7 +288,7 @@ func TestFS_MDStat(t *testing.T) {
283288
BlocksSyncedPct: 0,
284289
BlocksSyncedFinishTime: 0,
285290
BlocksSyncedSpeed: 0,
286-
Devices: []string{"sdb", "sdd", "sdc"}},
291+
Devices: []MDStatComponent{{Name: "sdb", DescriptorIndex: 2}, {Name: "sdd", DescriptorIndex: 1}, {Name: "sdc", DescriptorIndex: 0}}},
287292
"md201": {
288293
Name: "md201",
289294
Type: "raid1",
@@ -299,7 +304,7 @@ func TestFS_MDStat(t *testing.T) {
299304
BlocksSyncedPct: 5.7,
300305
BlocksSyncedFinishTime: 0.2,
301306
BlocksSyncedSpeed: 114176,
302-
Devices: []string{"sda3", "sdb3"}},
307+
Devices: []MDStatComponent{{Name: "sda3", DescriptorIndex: 0}, {Name: "sdb3", DescriptorIndex: 1}}},
303308
}
304309

305310
if want, have := len(refs), len(mdStats); want != have {
@@ -314,18 +319,32 @@ func TestFS_MDStat(t *testing.T) {
314319
}
315320

316321
func TestInvalidMdstat(t *testing.T) {
317-
invalidMount := [][]byte{[]byte(`
322+
invalidMount := [][]byte{
323+
// Test invalid Personality and format
324+
[]byte(`
318325
Personalities : [invalid]
319326
md3 : invalid
320327
314159265 blocks 64k chunks
321328
322329
unused devices: <none>
323330
`),
331+
// Test extra blank line
324332
[]byte(`
325333
md12 : active raid0 sdc2[0] sdd2[1]
326334
327335
3886394368 blocks super 1.2 512k chunks
328-
`)}
336+
`),
337+
// test for impossible component state
338+
[]byte(`
339+
md127 : active raid1 sdi2[0] sdj2[1](Z)
340+
312319552 blocks [2/2] [UU]
341+
`),
342+
// test for malformed component state
343+
[]byte(`
344+
md127 : active raid1 sdi2[0] sdj2[X]
345+
312319552 blocks [2/2] [UU]
346+
`),
347+
}
329348

330349
for _, invalid := range invalidMount {
331350
_, err := parseMDStat(invalid)

0 commit comments

Comments
 (0)