Skip to content

Commit b22bab2

Browse files
authored
Remove fastwrite mechanism.
Fastwrite was introduced many years ago to improve ZIL writes spread between multiple top-level vdevs by tracking number of allocated but not written blocks and choosing vdev with smaller count. It suposed to reduce ZIL knowledge about allocation, but actually made ZIL to even more actively report allocation code about the allocations, complicating both ZIL and metaslabs code. On top of that, it seems ZIO_FLAG_FASTWRITE setting in dmu_sync() was lost many years ago, that was one of the declared benefits. Plus introduction of embedded log metaslab class solved another problem with allocation rotor accounting both normal and log allocations, since in most cases those are now in different metaslab classes. After all that, I'd prefer to simplify already too complicated ZIL, ZIO and metaslab code if the benefit of complexity is not obvious. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: George Wilson <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #15107
1 parent 5bdfff5 commit b22bab2

File tree

8 files changed

+8
-123
lines changed

8 files changed

+8
-123
lines changed

include/sys/metaslab.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
8080
#define METASLAB_ASYNC_ALLOC 0x8
8181
#define METASLAB_DONT_THROTTLE 0x10
8282
#define METASLAB_MUST_RESERVE 0x20
83-
#define METASLAB_FASTWRITE 0x40
8483
#define METASLAB_ZIL 0x80
8584

8685
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
@@ -96,8 +95,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t);
9695
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
9796
int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t);
9897
void metaslab_check_free(spa_t *, const blkptr_t *);
99-
void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
100-
void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);
10198

10299
void metaslab_stat_init(void);
103100
void metaslab_stat_fini(void);

include/sys/vdev_impl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,6 @@ struct vdev {
266266
metaslab_group_t *vdev_mg; /* metaslab group */
267267
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
268268
metaslab_t **vdev_ms; /* metaslab array */
269-
uint64_t vdev_pending_fastwrite; /* allocated fastwrites */
270269
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
271270
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
272271
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */

include/sys/zil_impl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ typedef enum {
9191
typedef struct lwb {
9292
zilog_t *lwb_zilog; /* back pointer to log struct */
9393
blkptr_t lwb_blk; /* on disk address of this log blk */
94-
boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
9594
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
9695
boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */
9796
int lwb_nused; /* # used bytes in buffer */

include/sys/zio.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ typedef uint64_t zio_flag_t;
222222
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
223223
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
224224
#define ZIO_FLAG_DELEGATED (1ULL << 30)
225-
#define ZIO_FLAG_FASTWRITE (1ULL << 31)
226225

227226
#define ZIO_FLAG_MUSTSUCCEED 0
228227
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)

module/zfs/metaslab.c

Lines changed: 2 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5101,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
51015101
zio_alloc_list_t *zal, int allocator)
51025102
{
51035103
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
5104-
metaslab_group_t *mg, *fast_mg, *rotor;
5104+
metaslab_group_t *mg, *rotor;
51055105
vdev_t *vd;
51065106
boolean_t try_hard = B_FALSE;
51075107

@@ -5164,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
51645164
} else if (d != 0) {
51655165
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
51665166
mg = vd->vdev_mg->mg_next;
5167-
} else if (flags & METASLAB_FASTWRITE) {
5168-
mg = fast_mg = mca->mca_rotor;
5169-
5170-
do {
5171-
if (fast_mg->mg_vd->vdev_pending_fastwrite <
5172-
mg->mg_vd->vdev_pending_fastwrite)
5173-
mg = fast_mg;
5174-
} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor);
5175-
51765167
} else {
51775168
ASSERT(mca->mca_rotor != NULL);
51785169
mg = mca->mca_rotor;
@@ -5297,7 +5288,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
52975288
mg->mg_bias = 0;
52985289
}
52995290

5300-
if ((flags & METASLAB_FASTWRITE) ||
5291+
if ((flags & METASLAB_ZIL) ||
53015292
atomic_add_64_nv(&mca->mca_aliquot, asize) >=
53025293
mg->mg_aliquot + mg->mg_bias) {
53035294
mca->mca_rotor = mg->mg_next;
@@ -5310,11 +5301,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
53105301
((flags & METASLAB_GANG_HEADER) ? 1 : 0));
53115302
DVA_SET_ASIZE(&dva[d], asize);
53125303

5313-
if (flags & METASLAB_FASTWRITE) {
5314-
atomic_add_64(&vd->vdev_pending_fastwrite,
5315-
psize);
5316-
}
5317-
53185304
return (0);
53195305
}
53205306
next:
@@ -5950,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
59505936
return (error);
59515937
}
59525938

5953-
void
5954-
metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
5955-
{
5956-
const dva_t *dva = bp->blk_dva;
5957-
int ndvas = BP_GET_NDVAS(bp);
5958-
uint64_t psize = BP_GET_PSIZE(bp);
5959-
int d;
5960-
vdev_t *vd;
5961-
5962-
ASSERT(!BP_IS_HOLE(bp));
5963-
ASSERT(!BP_IS_EMBEDDED(bp));
5964-
ASSERT(psize > 0);
5965-
5966-
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
5967-
5968-
for (d = 0; d < ndvas; d++) {
5969-
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
5970-
continue;
5971-
atomic_add_64(&vd->vdev_pending_fastwrite, psize);
5972-
}
5973-
5974-
spa_config_exit(spa, SCL_VDEV, FTAG);
5975-
}
5976-
5977-
void
5978-
metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
5979-
{
5980-
const dva_t *dva = bp->blk_dva;
5981-
int ndvas = BP_GET_NDVAS(bp);
5982-
uint64_t psize = BP_GET_PSIZE(bp);
5983-
int d;
5984-
vdev_t *vd;
5985-
5986-
ASSERT(!BP_IS_HOLE(bp));
5987-
ASSERT(!BP_IS_EMBEDDED(bp));
5988-
ASSERT(psize > 0);
5989-
5990-
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
5991-
5992-
for (d = 0; d < ndvas; d++) {
5993-
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
5994-
continue;
5995-
ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
5996-
atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
5997-
}
5998-
5999-
spa_config_exit(spa, SCL_VDEV, FTAG);
6000-
}
6001-
60025939
static void
60035940
metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
60045941
uint64_t size, void *arg)

module/zfs/vdev.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
11921192

11931193
ASSERT(tvd == tvd->vdev_top);
11941194

1195-
tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
11961195
tvd->vdev_ms_array = svd->vdev_ms_array;
11971196
tvd->vdev_ms_shift = svd->vdev_ms_shift;
11981197
tvd->vdev_ms_count = svd->vdev_ms_count;
@@ -1655,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd)
16551654
}
16561655
}
16571656
ASSERT0(vd->vdev_ms_count);
1658-
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
16591657
}
16601658

16611659
typedef struct vdev_probe_stats {

module/zfs/zil.c

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -761,15 +761,13 @@ zil_lwb_vdev_compare(const void *x1, const void *x2)
761761
}
762762

763763
static lwb_t *
764-
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
765-
boolean_t fastwrite)
764+
zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg)
766765
{
767766
lwb_t *lwb;
768767

769768
lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
770769
lwb->lwb_zilog = zilog;
771770
lwb->lwb_blk = *bp;
772-
lwb->lwb_fastwrite = fastwrite;
773771
lwb->lwb_slog = slog;
774772
lwb->lwb_indirect = B_FALSE;
775773
if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
@@ -916,7 +914,6 @@ zil_create(zilog_t *zilog)
916914
dmu_tx_t *tx = NULL;
917915
blkptr_t blk;
918916
int error = 0;
919-
boolean_t fastwrite = FALSE;
920917
boolean_t slog = FALSE;
921918
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
922919

@@ -949,8 +946,6 @@ zil_create(zilog_t *zilog)
949946

950947
error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
951948
ZIL_MIN_BLKSZ, &slog);
952-
fastwrite = TRUE;
953-
954949
if (error == 0)
955950
zil_init_log_chain(zilog, &blk);
956951
}
@@ -959,7 +954,7 @@ zil_create(zilog_t *zilog)
959954
* Allocate a log write block (lwb) for the first log block.
960955
*/
961956
if (error == 0)
962-
lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite);
957+
lwb = zil_alloc_lwb(zilog, &blk, slog, txg);
963958

964959
/*
965960
* If we just allocated the first log block, commit our transaction
@@ -1044,9 +1039,6 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
10441039
ASSERT(zh->zh_claim_txg == 0);
10451040
VERIFY(!keep_first);
10461041
while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
1047-
if (lwb->lwb_fastwrite)
1048-
metaslab_fastwrite_unmark(zilog->zl_spa,
1049-
&lwb->lwb_blk);
10501042
if (lwb->lwb_buf != NULL)
10511043
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
10521044
zio_free(zilog->zl_spa, txg, &lwb->lwb_blk);
@@ -1551,7 +1543,6 @@ zil_lwb_write_done(zio_t *zio)
15511543
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
15521544
lwb->lwb_state = LWB_STATE_WRITE_DONE;
15531545
lwb->lwb_write_zio = NULL;
1554-
lwb->lwb_fastwrite = FALSE;
15551546
nlwb = list_next(&zilog->zl_lwb_list, lwb);
15561547
mutex_exit(&zilog->zl_lock);
15571548

@@ -1718,20 +1709,12 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
17181709
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
17191710
lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
17201711

1721-
/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
1722-
mutex_enter(&zilog->zl_lock);
1723-
if (!lwb->lwb_fastwrite) {
1724-
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
1725-
lwb->lwb_fastwrite = 1;
1726-
}
1727-
17281712
lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
17291713
&lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
1730-
zil_lwb_write_done, lwb, prio,
1731-
ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
1714+
zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb);
17321715

1716+
mutex_enter(&zilog->zl_lock);
17331717
lwb->lwb_state = LWB_STATE_OPENED;
1734-
17351718
zil_lwb_set_zio_dependency(zilog, lwb);
17361719
zilog->zl_last_lwb_opened = lwb;
17371720
mutex_exit(&zilog->zl_lock);
@@ -1864,7 +1847,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
18641847
/*
18651848
* Allocate a new log write block (lwb).
18661849
*/
1867-
nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
1850+
nlwb = zil_alloc_lwb(zilog, bp, slog, txg);
18681851
}
18691852

18701853
lwb->lwb_state = LWB_STATE_ISSUED;
@@ -3651,18 +3634,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
36513634
BP_ZERO(&zh->zh_log);
36523635
}
36533636

3654-
/*
3655-
* Remove fastwrite on any blocks that have been pre-allocated for
3656-
* the next commit. This prevents fastwrite counter pollution by
3657-
* unused, long-lived LWBs.
3658-
*/
3659-
for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) {
3660-
if (lwb->lwb_fastwrite && !lwb->lwb_write_zio) {
3661-
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
3662-
lwb->lwb_fastwrite = 0;
3663-
}
3664-
}
3665-
36663637
mutex_exit(&zilog->zl_lock);
36673638
}
36683639

@@ -3895,9 +3866,6 @@ zil_close(zilog_t *zilog)
38953866
ASSERT(list_is_empty(&zilog->zl_lwb_list));
38963867
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
38973868

3898-
if (lwb->lwb_fastwrite)
3899-
metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
3900-
39013869
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
39023870
zil_free_lwb(zilog, lwb);
39033871
}

module/zfs/zio.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3024,11 +3024,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
30243024
*/
30253025
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
30263026

3027-
/*
3028-
* We didn't allocate this bp, so make sure it doesn't get unmarked.
3029-
*/
3030-
pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
3031-
30323027
zio_nowait(zio);
30333028

30343029
return (pio);
@@ -3616,7 +3611,6 @@ zio_dva_allocate(zio_t *zio)
36163611
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
36173612
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
36183613

3619-
flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
36203614
if (zio->io_flags & ZIO_FLAG_NODATA)
36213615
flags |= METASLAB_DONT_THROTTLE;
36223616
if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
@@ -3776,7 +3770,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
37763770
* of, so we just hash the objset ID to pick the allocator to get
37773771
* some parallelism.
37783772
*/
3779-
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
3773+
int flags = METASLAB_ZIL;
37803774
int allocator = (uint_t)cityhash4(0, 0, 0,
37813775
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
37823776
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
@@ -4931,12 +4925,6 @@ zio_done(zio_t *zio)
49314925
zfs_ereport_free_checksum(zcr);
49324926
}
49334927

4934-
if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
4935-
!BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) &&
4936-
!(zio->io_flags & ZIO_FLAG_NOPWRITE)) {
4937-
metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
4938-
}
4939-
49404928
/*
49414929
* It is the responsibility of the done callback to ensure that this
49424930
* particular zio is no longer discoverable for adoption, and as

0 commit comments

Comments
 (0)