Skip to content

Commit eff29a3

Browse files
akashb-22andrewc12
authored andcommitted
Add options to zfs redundant_metadata property
Currently, additional/extra copies are created for metadata in addition to the redundancy provided by the pool(mirror/raidz/draid), due to this 2 times more space is utilized per inode and this decreases the total number of inodes that can be created in the filesystem. By setting redundant_metadata to none, no additional copies of metadata are created, hence can reduce the space consumed by the additional metadata copies and increase the total number of inodes that can be created in the filesystem. Additionally, this can improve file create performance due to the reduced amount of metadata which needs to be written. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Dipak Ghosh <[email protected]> Signed-off-by: Akash B <[email protected]> Closes openzfs#13680
1 parent 4d02ef3 commit eff29a3

File tree

9 files changed

+151
-15
lines changed

9 files changed

+151
-15
lines changed

include/sys/dmu.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2828
* Copyright 2013 Saso Kiselkov. All rights reserved.
2929
* Copyright (c) 2017, Intel Corporation.
30+
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
3031
*/
3132

3233
/* Portions Copyright 2010 Robert Milkowski */
@@ -142,6 +143,12 @@ typedef enum dmu_object_byteswap {
142143
#define DMU_OT_IS_DDT(ot) \
143144
((ot) == DMU_OT_DDT_ZAP)
144145

146+
#define DMU_OT_IS_CRITICAL(ot) \
147+
(DMU_OT_IS_METADATA(ot) && \
148+
(ot) != DMU_OT_DNODE && \
149+
(ot) != DMU_OT_DIRECTORY_CONTENTS && \
150+
(ot) != DMU_OT_SA)
151+
145152
/* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */
146153
#define DMU_OT_IS_FILE(ot) \
147154
((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER)

include/sys/fs/zfs.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
* Copyright (c) 2019 Datto Inc.
3030
* Portions Copyright 2010 Robert Milkowski
3131
* Copyright (c) 2021, Colm Buckley <[email protected]>
32+
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
3233
*/
3334

3435
#ifndef _SYS_FS_ZFS_H
@@ -503,7 +504,9 @@ typedef enum {
503504

504505
typedef enum {
505506
ZFS_REDUNDANT_METADATA_ALL,
506-
ZFS_REDUNDANT_METADATA_MOST
507+
ZFS_REDUNDANT_METADATA_MOST,
508+
ZFS_REDUNDANT_METADATA_SOME,
509+
ZFS_REDUNDANT_METADATA_NONE
507510
} zfs_redundant_metadata_type_t;
508511

509512
typedef enum {

man/man7/zfsprops.7

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@
3636
.\" Copyright 2018 Nexenta Systems, Inc.
3737
.\" Copyright 2019 Joyent, Inc.
3838
.\" Copyright (c) 2019, Kjeld Schouten-Lebbing
39+
.\" Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
3940
.\"
40-
.Dd May 24, 2021
41+
.Dd July 21, 2022
4142
.Dt ZFSPROPS 7
4243
.Os
4344
.
@@ -1454,7 +1455,7 @@ affects only files created afterward; existing files are unaffected.
14541455
.Pp
14551456
This property can also be referred to by its shortened column name,
14561457
.Sy recsize .
1457-
.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most
1458+
.It Sy redundant_metadata Ns = Ns Sy all Ns | Ns Sy most Ns | Ns Sy some Ns | Ns Sy none
14581459
Controls what types of metadata are stored redundantly.
14591460
ZFS stores an extra copy of metadata, so that if a single block is corrupted,
14601461
the amount of user data lost is limited.
@@ -1486,7 +1487,7 @@ When set to
14861487
ZFS stores an extra copy of most types of metadata.
14871488
This can improve performance of random writes, because less metadata must be
14881489
written.
1489-
In practice, at worst about 100 blocks
1490+
In practice, at worst about 1000 blocks
14901491
.Po of
14911492
.Sy recordsize
14921493
bytes each
@@ -1495,6 +1496,17 @@ of user data can be lost if a single on-disk block is corrupt.
14951496
The exact behavior of which metadata blocks are stored redundantly may change in
14961497
future releases.
14971498
.Pp
1499+
When set to
1500+
.Sy some ,
1501+
ZFS stores an extra copy of only critical metadata.
1502+
This can improve file create performance since less metadata needs to be written.
1503+
If a single on-disk block is corrupt, at worst a single user file can be lost.
1504+
.Pp
1505+
When set to
1506+
.Sy none ,
1507+
ZFS does not store any copies of metadata redundantly.
1508+
If a single on-disk block is corrupt, an entire dataset can be lost.
1509+
.Pp
14981510
The default value is
14991511
.Sy all .
15001512
.It Sy refquota Ns = Ns Ar size Ns | Ns Sy none

module/zcommon/zfs_prop.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* Copyright 2016, Joyent, Inc.
2626
* Copyright (c) 2019, Klara Inc.
2727
* Copyright (c) 2019, Allan Jude
28+
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
2829
*/
2930

3031
/* Portions Copyright 2010 Robert Milkowski */
@@ -369,6 +370,8 @@ zfs_prop_init(void)
369370
static const zprop_index_t redundant_metadata_table[] = {
370371
{ "all", ZFS_REDUNDANT_METADATA_ALL },
371372
{ "most", ZFS_REDUNDANT_METADATA_MOST },
373+
{ "some", ZFS_REDUNDANT_METADATA_SOME },
374+
{ "none", ZFS_REDUNDANT_METADATA_NONE },
372375
{ NULL }
373376
};
374377

@@ -399,7 +402,7 @@ zfs_prop_init(void)
399402
zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
400403
ZFS_REDUNDANT_METADATA_ALL,
401404
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
402-
"all | most", "REDUND_MD",
405+
"all | most | some | none", "REDUND_MD",
403406
redundant_metadata_table, sfeatures);
404407
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
405408
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,

module/zfs/dmu.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
* Copyright (c) 2019 Datto Inc.
2929
* Copyright (c) 2019, Klara Inc.
3030
* Copyright (c) 2019, Allan Jude
31+
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
3132
*/
3233

3334
#include <sys/dmu.h>
@@ -1992,12 +1993,22 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
19921993
ZCHECKSUM_FLAG_EMBEDDED))
19931994
checksum = ZIO_CHECKSUM_FLETCHER_4;
19941995

1995-
if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
1996-
(os->os_redundant_metadata ==
1997-
ZFS_REDUNDANT_METADATA_MOST &&
1998-
(level >= zfs_redundant_metadata_most_ditto_level ||
1999-
DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
1996+
switch (os->os_redundant_metadata) {
1997+
case ZFS_REDUNDANT_METADATA_ALL:
20001998
copies++;
1999+
break;
2000+
case ZFS_REDUNDANT_METADATA_MOST:
2001+
if (level >= zfs_redundant_metadata_most_ditto_level ||
2002+
DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))
2003+
copies++;
2004+
break;
2005+
case ZFS_REDUNDANT_METADATA_SOME:
2006+
if (DMU_OT_IS_CRITICAL(type))
2007+
copies++;
2008+
break;
2009+
case ZFS_REDUNDANT_METADATA_NONE:
2010+
break;
2011+
}
20012012
} else if (wp & WP_NOFILL) {
20022013
ASSERT(level == 0);
20032014

module/zfs/dmu_objset.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
* Copyright (c) 2018, loli10K <[email protected]>. All rights reserved.
3333
* Copyright (c) 2019, Klara Inc.
3434
* Copyright (c) 2019, Allan Jude
35+
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
3536
*/
3637

3738
/* Portions Copyright 2010 Robert Milkowski */
@@ -287,7 +288,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval)
287288
* Inheritance and range checking should have been done by now.
288289
*/
289290
ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
290-
newval == ZFS_REDUNDANT_METADATA_MOST);
291+
newval == ZFS_REDUNDANT_METADATA_MOST ||
292+
newval == ZFS_REDUNDANT_METADATA_SOME ||
293+
newval == ZFS_REDUNDANT_METADATA_NONE);
291294

292295
os->os_redundant_metadata = newval;
293296
}

module/zfs/dsl_prop.c

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
2424
* Copyright (c) 2013 Martin Matuska. All rights reserved.
2525
* Copyright 2019 Joyent, Inc.
26+
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
2627
*/
2728

2829
#include <sys/zfs_context.h>
@@ -41,6 +42,7 @@
4142

4243
#define ZPROP_INHERIT_SUFFIX "$inherit"
4344
#define ZPROP_RECVD_SUFFIX "$recvd"
45+
#define ZPROP_IUV_SUFFIX "$iuv"
4446

4547
static int
4648
dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
@@ -69,6 +71,16 @@ dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
6971
return (0);
7072
}
7173

74+
static int
75+
dsl_prop_known_index(zfs_prop_t prop, uint64_t value)
76+
{
77+
const char *str = NULL;
78+
if (zfs_prop_get_type(prop) == PROP_TYPE_INDEX)
79+
return (!zfs_prop_index_to_string(prop, value, &str));
80+
81+
return (-1);
82+
}
83+
7284
int
7385
dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
7486
int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
@@ -81,6 +93,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
8193
boolean_t inheriting = B_FALSE;
8294
char *inheritstr;
8395
char *recvdstr;
96+
char *iuvstr;
8497

8598
ASSERT(dsl_pool_config_held(dd->dd_pool));
8699

@@ -91,6 +104,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
91104
inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
92105
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
93106
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
107+
iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
94108

95109
/*
96110
* Note: dd may become NULL, therefore we shouldn't dereference it
@@ -105,6 +119,18 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
105119
inheriting = B_TRUE;
106120
}
107121

122+
/* Check for a iuv value. */
123+
err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
124+
iuvstr, intsz, numints, buf);
125+
if (dsl_prop_known_index(zfs_name_to_prop(propname),
126+
*(uint64_t *)buf) != 1)
127+
err = ENOENT;
128+
if (err != ENOENT) {
129+
if (setpoint != NULL && err == 0)
130+
dsl_dir_name(dd, setpoint);
131+
break;
132+
}
133+
108134
/* Check for a local value. */
109135
err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
110136
propname, intsz, numints, buf);
@@ -155,6 +181,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
155181

156182
kmem_strfree(inheritstr);
157183
kmem_strfree(recvdstr);
184+
kmem_strfree(iuvstr);
158185

159186
return (err);
160187
}
@@ -647,6 +674,45 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
647674
dsl_dir_rele(dd, FTAG);
648675
}
649676

677+
678+
/*
679+
* For newer values in zfs index type properties, we add a new key
680+
* propname$iuv (iuv = Ignore Unknown Values) to the properties zap object
681+
* to store the new property value and store the default value in the
682+
* existing prop key. So that the propname$iuv key is ignored by the older zfs
683+
* versions and the default property value from the existing prop key is
684+
* used.
685+
*/
686+
static void
687+
dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname,
688+
int intsz, int numints, const void *value, dmu_tx_t *tx)
689+
{
690+
char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
691+
boolean_t iuv = B_FALSE;
692+
zfs_prop_t prop = zfs_name_to_prop(propname);
693+
694+
switch (prop) {
695+
case ZFS_PROP_REDUNDANT_METADATA:
696+
if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME ||
697+
*(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE)
698+
iuv = B_TRUE;
699+
break;
700+
default:
701+
break;
702+
}
703+
704+
if (iuv) {
705+
VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints,
706+
value, tx));
707+
uint64_t val = zfs_prop_default_numeric(prop);
708+
VERIFY0(zap_update(mos, zapobj, propname, intsz, numints,
709+
&val, tx));
710+
} else {
711+
zap_remove(mos, zapobj, iuvstr, tx);
712+
}
713+
kmem_strfree(iuvstr);
714+
}
715+
650716
void
651717
dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
652718
zprop_source_t source, int intsz, int numints, const void *value,
@@ -659,6 +725,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
659725
const char *valstr = NULL;
660726
char *inheritstr;
661727
char *recvdstr;
728+
char *iuvstr;
662729
char *tbuf = NULL;
663730
int err;
664731
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
@@ -692,6 +759,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
692759

693760
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
694761
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
762+
iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
695763

696764
switch ((int)source) {
697765
case ZPROP_SRC_NONE:
@@ -709,11 +777,14 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
709777
/*
710778
* remove propname$inherit
711779
* set propname -> value
780+
* set propname$iuv -> new property value
712781
*/
713782
err = zap_remove(mos, zapobj, inheritstr, tx);
714783
ASSERT(err == 0 || err == ENOENT);
715784
VERIFY0(zap_update(mos, zapobj, propname,
716785
intsz, numints, value, tx));
786+
(void) dsl_prop_set_iuv(mos, zapobj, propname, intsz,
787+
numints, value, tx);
717788
break;
718789
case ZPROP_SRC_INHERITED:
719790
/*
@@ -723,6 +794,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
723794
*/
724795
err = zap_remove(mos, zapobj, propname, tx);
725796
ASSERT(err == 0 || err == ENOENT);
797+
err = zap_remove(mos, zapobj, iuvstr, tx);
798+
ASSERT(err == 0 || err == ENOENT);
726799
if (version >= SPA_VERSION_RECVD_PROPS &&
727800
dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
728801
dummy = 0;
@@ -763,6 +836,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
763836

764837
kmem_strfree(inheritstr);
765838
kmem_strfree(recvdstr);
839+
kmem_strfree(iuvstr);
766840

767841
/*
768842
* If we are left with an empty snap zap we can destroy it.
@@ -1012,6 +1086,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
10121086

10131087
propname = za.za_name;
10141088
source = setpoint;
1089+
1090+
/* Skip if iuv entries are preset. */
1091+
valstr = kmem_asprintf("%s%s", propname,
1092+
ZPROP_IUV_SUFFIX);
1093+
err = zap_contains(mos, propobj, valstr);
1094+
kmem_strfree(valstr);
1095+
if (err == 0)
1096+
continue;
10151097
} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
10161098
/* Skip explicitly inherited entries. */
10171099
continue;
@@ -1044,6 +1126,16 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
10441126

10451127
source = ((flags & DSL_PROP_GET_INHERITING) ?
10461128
setpoint : ZPROP_SOURCE_VAL_RECVD);
1129+
} else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) {
1130+
(void) strlcpy(buf, za.za_name,
1131+
MIN(sizeof (buf), suffix - za.za_name + 1));
1132+
propname = buf;
1133+
source = setpoint;
1134+
prop = zfs_name_to_prop(propname);
1135+
1136+
if (dsl_prop_known_index(prop,
1137+
za.za_first_integer) != 1)
1138+
continue;
10471139
} else {
10481140
/*
10491141
* For backward compatibility, skip suffixes we don't

tests/zfs-tests/include/properties.shlib

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#
1313
# Copyright (c) 2012, 2016, Delphix. All rights reserved.
14+
# Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
1415
#
1516

1617
. $STF_SUITE/include/libtest.shlib
@@ -27,7 +28,7 @@ typeset -a canmount_prop_vals=('on' 'off' 'noauto')
2728
typeset -a copies_prop_vals=('1' '2' '3')
2829
typeset -a logbias_prop_vals=('latency' 'throughput')
2930
typeset -a primarycache_prop_vals=('all' 'none' 'metadata')
30-
typeset -a redundant_metadata_prop_vals=('all' 'most')
31+
typeset -a redundant_metadata_prop_vals=('all' 'most' 'some' 'none')
3132
typeset -a secondarycache_prop_vals=('all' 'none' 'metadata')
3233
typeset -a snapdir_prop_vals=('hidden' 'visible')
3334
typeset -a sync_prop_vals=('standard' 'always' 'disabled')

0 commit comments

Comments
 (0)