Skip to content

Commit e4ee239

Browse files
committed
spa_min_alloc should be GCD, not min
Since spa_min_alloc may not be a power of 2, unlike ashifts, in the case of DRAID, we should not select the minimal value among several vdevs. Rounding to a multiple of it is unlikely to work for other vdevs. Instead, using the greatest common divisor produces smaller yet more reasonable results. Signed-off-by: Ameer Hamza <[email protected]>
1 parent ca960ce commit e4ee239

File tree

4 files changed

+45
-6
lines changed

4 files changed

+45
-6
lines changed

include/sys/spa_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ struct spa {
250250
uint64_t spa_min_ashift; /* of vdevs in normal class */
251251
uint64_t spa_max_ashift; /* of vdevs in normal class */
252252
uint64_t spa_min_alloc; /* of vdevs in normal class */
253+
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
253254
uint64_t spa_config_guid; /* config pool guid */
254255
uint64_t spa_load_guid; /* spa_load initialized guid */
255256
uint64_t spa_last_synced_guid; /* last synced guid */

module/zfs/spa_misc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
772772
spa->spa_min_ashift = INT_MAX;
773773
spa->spa_max_ashift = 0;
774774
spa->spa_min_alloc = INT_MAX;
775+
spa->spa_gcd_alloc = INT_MAX;
775776

776777
/* Reset cached value */
777778
spa->spa_dedup_dspace = ~0ULL;

module/zfs/vdev.c

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,6 +1393,20 @@ vdev_remove_parent(vdev_t *cvd)
13931393
vdev_free(mvd);
13941394
}
13951395

1396+
/*
1397+
* Choose GCD for spa_gcd_alloc.
1398+
*/
1399+
static uint64_t
1400+
vdev_gcd(uint64_t a, uint64_t b)
1401+
{
1402+
while (b != 0) {
1403+
uint64_t t = b;
1404+
b = a % b;
1405+
a = t;
1406+
}
1407+
return (a);
1408+
}
1409+
13961410
void
13971411
vdev_metaslab_group_create(vdev_t *vd)
13981412
{
@@ -1445,8 +1459,16 @@ vdev_metaslab_group_create(vdev_t *vd)
14451459
spa->spa_min_ashift = vd->vdev_ashift;
14461460

14471461
uint64_t min_alloc = vdev_get_min_alloc(vd);
1448-
if (min_alloc < spa->spa_min_alloc)
1462+
if (spa->spa_min_alloc == INT_MAX &&
1463+
spa->spa_gcd_alloc == INT_MAX) {
14491464
spa->spa_min_alloc = min_alloc;
1465+
spa->spa_gcd_alloc = min_alloc;
1466+
} else {
1467+
if (min_alloc < spa->spa_min_alloc)
1468+
spa->spa_min_alloc = min_alloc;
1469+
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
1470+
spa->spa_gcd_alloc);
1471+
}
14501472
}
14511473
}
14521474
}
@@ -2207,8 +2229,16 @@ vdev_open(vdev_t *vd)
22072229
if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
22082230
vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
22092231
uint64_t min_alloc = vdev_get_min_alloc(vd);
2210-
if (min_alloc < spa->spa_min_alloc)
2232+
if (spa->spa_min_alloc == INT_MAX &&
2233+
spa->spa_gcd_alloc == INT_MAX) {
22112234
spa->spa_min_alloc = min_alloc;
2235+
spa->spa_gcd_alloc = min_alloc;
2236+
} else {
2237+
if (min_alloc < spa->spa_min_alloc)
2238+
spa->spa_min_alloc = min_alloc;
2239+
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
2240+
spa->spa_gcd_alloc);
2241+
}
22122242
}
22132243

22142244
/*

module/zfs/zio.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ static uint_t zio_slow_io_ms = (30 * MILLISEC);
8989
#define BP_SPANB(indblkshift, level) \
9090
(((uint64_t)1) << ((level) * ((indblkshift) - SPA_BLKPTRSHIFT)))
9191
#define COMPARE_META_LEVEL 0x80000000ul
92+
93+
/* Get allocation size between spa_min_alloc and spa_gcd_alloc */
94+
#define SPA_ALLOC_SIZE(size, min_alloc, gcd_alloc) \
95+
((size > min_alloc) ? gcd_alloc : min_alloc)
96+
9297
/*
9398
* The following actions directly effect the spa's sync-to-convergence logic.
9499
* The values below define the sync pass when we start performing the action.
@@ -1802,9 +1807,11 @@ zio_write_compress(zio_t *zio)
18021807
* in that we charge for the padding used to fill out
18031808
* the last sector.
18041809
*/
1805-
ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT);
1810+
uint64_t spa_alloc_size = SPA_ALLOC_SIZE(psize,
1811+
spa->spa_min_alloc, spa->spa_gcd_alloc);
1812+
ASSERT3U(spa_alloc_size, >=, SPA_MINBLOCKSHIFT);
18061813
size_t rounded = (size_t)roundup(psize,
1807-
spa->spa_min_alloc);
1814+
spa_alloc_size);
18081815
if (rounded >= lsize) {
18091816
compress = ZIO_COMPRESS_OFF;
18101817
zio_buf_free(cbuf, lsize);
@@ -1847,8 +1854,8 @@ zio_write_compress(zio_t *zio)
18471854
* take this codepath because it will change the on-disk block
18481855
* and decryption will fail.
18491856
*/
1850-
size_t rounded = MIN((size_t)roundup(psize,
1851-
spa->spa_min_alloc), lsize);
1857+
size_t rounded = MIN((size_t)roundup(psize, SPA_ALLOC_SIZE(
1858+
psize, spa->spa_min_alloc, spa->spa_gcd_alloc)), lsize);
18521859

18531860
if (rounded != psize) {
18541861
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);

0 commit comments

Comments
 (0)