Skip to content

Commit 3927017

Browse files
committed
ZIL: Do not clone blocks from the future
ZIL claim can not handle block pointers cloned from the future, since they are not yet allocated at that point. It may happen either if the block was just written when it was cloned, or if the pool was frozen or somehow else rewound on import. Handle it from two sides: prevent cloning of blocks with physical birth time from not yet synced or frozen TXG, and abort ZIL claim if we still detect such blocks due to rewind or something else. While there, assert that any cloned blocks we claim are really allocted by calling metaslab_check_free(). Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc.
1 parent 735ba3a commit 3927017

File tree

2 files changed

+43
-10
lines changed

2 files changed

+43
-10
lines changed

module/zfs/dmu.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2274,6 +2274,21 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
22742274
goto out;
22752275
}
22762276

2277+
/*
2278+
* If the block was allocated in transaction group that is not
2279+
* yet synced, we could clone it, but we couldn't write this
2280+
* operation into ZIL, or it may be impossible to replay, since
2281+
* the block may appear not yet allocated at that point.
2282+
*/
2283+
if (BP_PHYSICAL_BIRTH(bp) > spa_freeze_txg(os->os_spa)) {
2284+
error = SET_ERROR(EINVAL);
2285+
goto out;
2286+
}
2287+
if (BP_PHYSICAL_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) {
2288+
error = SET_ERROR(EAGAIN);
2289+
goto out;
2290+
}
2291+
22772292
bps[i] = *bp;
22782293
}
22792294

module/zfs/zil.c

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -617,11 +617,12 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg)
617617
}
618618

619619
static int
620-
zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx)
620+
zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx,
621+
uint64_t first_txg)
621622
{
622623
const lr_clone_range_t *lr = (const lr_clone_range_t *)lrc;
623624
const blkptr_t *bp;
624-
spa_t *spa;
625+
spa_t *spa = zilog->zl_spa;
625626
uint_t ii;
626627

627628
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
@@ -636,19 +637,36 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx)
636637
* XXX: Do we need to byteswap lr?
637638
*/
638639

639-
spa = zilog->zl_spa;
640-
641640
for (ii = 0; ii < lr->lr_nbps; ii++) {
642641
bp = &lr->lr_bps[ii];
643642

644643
/*
645-
* When data in embedded into BP there is no need to create
646-
* BRT entry as there is no data block. Just copy the BP as
647-
* it contains the data.
644+
* When data is embedded into the BP there is no need to create
645+
* BRT entry as there is no data block. Just copy the BP as it
646+
* contains the data.
647+
*/
648+
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
649+
continue;
650+
651+
/*
652+
* We can not handle block pointers from the future, since they
653+
* are not yet allocated. It should not normally happen, but
654+
* just in case lets be safe and just stop here now instead of
655+
* corrupting the pool.
648656
*/
649-
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
657+
if (BP_PHYSICAL_BIRTH(bp) >= first_txg)
658+
return (SET_ERROR(ENOENT));
659+
660+
/*
661+
* Assert the block is really allocated before we reference it.
662+
*/
663+
metaslab_check_free(spa, bp);
664+
}
665+
666+
for (ii = 0; ii < lr->lr_nbps; ii++) {
667+
bp = &lr->lr_bps[ii];
668+
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp))
650669
brt_pending_add(spa, bp, tx);
651-
}
652670
}
653671

654672
return (0);
@@ -663,7 +681,7 @@ zil_claim_log_record(zilog_t *zilog, const lr_t *lrc, void *tx,
663681
case TX_WRITE:
664682
return (zil_claim_write(zilog, lrc, tx, first_txg));
665683
case TX_CLONE_RANGE:
666-
return (zil_claim_clone_range(zilog, lrc, tx));
684+
return (zil_claim_clone_range(zilog, lrc, tx, first_txg));
667685
default:
668686
return (0);
669687
}

0 commit comments

Comments
 (0)