Skip to content

Commit 637f918

Browse files
committed
arc: avoid possible deadlock in arc_read
In l2arc_evict(), the config lock may be acquired in reverse order (e.g., first the config lock (writer), then a hash lock) unlike in arc_read() during scenarios like L2ARC device removal. To avoid deadlocks, if the attempt to acquire the config lock (reader) fails in arc_read(), release the hash lock, wait for the config lock, and retry from the beginning. Reviewed-by: Alexander Motin <[email protected]> Signed-off-by: Ameer Hamza <[email protected]> Closes openzfs#17071
1 parent 7e72312 commit 637f918

File tree

6 files changed

+40
-16
lines changed

6 files changed

+40
-16
lines changed

cmd/zdb/zdb.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9043,7 +9043,7 @@ zdb_read_block(char *thing, spa_t *spa)
90439043
const blkptr_t *b = (const blkptr_t *)(void *)
90449044
((uintptr_t)buf + (uintptr_t)blkptr_offset);
90459045
if (zfs_blkptr_verify(spa, b,
9046-
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) {
9046+
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY)) {
90479047
abd_return_buf_copy(pabd, buf, lsize);
90489048
borrowed = B_FALSE;
90499049
buf = lbuf;
@@ -9052,7 +9052,7 @@ zdb_read_block(char *thing, spa_t *spa)
90529052
b = (const blkptr_t *)(void *)
90539053
((uintptr_t)buf + (uintptr_t)blkptr_offset);
90549054
if (lsize == -1 || zfs_blkptr_verify(spa, b,
9055-
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) {
9055+
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
90569056
printf("invalid block pointer at this DVA\n");
90579057
goto out;
90589058
}

include/sys/zio.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ enum blk_verify_flag {
545545
enum blk_config_flag {
546546
BLK_CONFIG_HELD, // SCL_VDEV held for writer
547547
BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
548+
BLK_CONFIG_NEEDED_TRY, // Try with SCL_VDEV for reader
548549
BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
549550
};
550551

@@ -662,7 +663,7 @@ extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
662663
extern int zio_resume(spa_t *spa);
663664
extern void zio_resume_wait(spa_t *spa);
664665

665-
extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
666+
extern int zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
666667
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
667668

668669
/*

module/zfs/arc.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5568,6 +5568,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
55685568
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
55695569
arc_buf_t *buf = NULL;
55705570
int rc = 0;
5571+
boolean_t bp_validation = B_FALSE;
55715572

55725573
ASSERT(!embedded_bp ||
55735574
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
@@ -5610,7 +5611,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
56105611
* should always be the case since the blkptr is protected by
56115612
* a checksum.
56125613
*/
5613-
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
5614+
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
56145615
BLK_VERIFY_LOG)) {
56155616
mutex_exit(hash_lock);
56165617
rc = SET_ERROR(ECKSUM);
@@ -5762,6 +5763,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
57625763
abd_t *hdr_abd;
57635764
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
57645765
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
5766+
int config_lock;
5767+
int error;
57655768

57665769
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
57675770
if (hash_lock != NULL)
@@ -5770,16 +5773,31 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
57705773
goto done;
57715774
}
57725775

5776+
if (zio_flags & ZIO_FLAG_CONFIG_WRITER) {
5777+
config_lock = BLK_CONFIG_HELD;
5778+
} else if (hash_lock != NULL) {
5779+
/*
5780+
* Prevent lock order reversal
5781+
*/
5782+
config_lock = BLK_CONFIG_NEEDED_TRY;
5783+
} else {
5784+
config_lock = BLK_CONFIG_NEEDED;
5785+
}
5786+
57735787
/*
57745788
* Verify the block pointer contents are reasonable. This
57755789
* should always be the case since the blkptr is protected by
57765790
* a checksum.
57775791
*/
5778-
if (!zfs_blkptr_verify(spa, bp,
5779-
(zio_flags & ZIO_FLAG_CONFIG_WRITER) ?
5780-
BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
5792+
if (!bp_validation && (error = zfs_blkptr_verify(spa, bp,
5793+
config_lock, BLK_VERIFY_LOG))) {
57815794
if (hash_lock != NULL)
57825795
mutex_exit(hash_lock);
5796+
if (error == EBUSY && !zfs_blkptr_verify(spa, bp,
5797+
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
5798+
bp_validation = B_TRUE;
5799+
goto top;
5800+
}
57835801
rc = SET_ERROR(ECKSUM);
57845802
goto done;
57855803
}

module/zfs/dsl_scan.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2305,7 +2305,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
23052305
DMU_USERUSED_OBJECT, tx);
23062306
}
23072307
arc_buf_destroy(buf, &buf);
2308-
} else if (!zfs_blkptr_verify(spa, bp,
2308+
} else if (zfs_blkptr_verify(spa, bp,
23092309
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
23102310
/*
23112311
* Sanity check the block pointer contents, this is handled

module/zfs/spa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2778,7 +2778,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
27782778
* When damaged consider it to be a metadata error since we cannot
27792779
* trust the BP_GET_TYPE and BP_GET_LEVEL values.
27802780
*/
2781-
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
2781+
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
27822782
atomic_inc_64(&sle->sle_meta_count);
27832783
return (0);
27842784
}

module/zfs/zio.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,7 +1164,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
11641164
* it only contains known object types, checksum/compression identifiers,
11651165
* block sizes within the maximum allowed limits, valid DVAs, etc.
11661166
*
1167-
* If everything checks out B_TRUE is returned. The zfs_blkptr_verify
1167+
* If everything checks out 0 is returned. The zfs_blkptr_verify
11681168
* argument controls the behavior when an invalid field is detected.
11691169
*
11701170
* Values for blk_verify_flag:
@@ -1179,7 +1179,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
11791179
* BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better
11801180
* performance
11811181
*/
1182-
boolean_t
1182+
int
11831183
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
11841184
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify)
11851185
{
@@ -1211,7 +1211,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12111211
"blkptr at %px has invalid PSIZE %llu",
12121212
bp, (longlong_t)BPE_GET_PSIZE(bp));
12131213
}
1214-
return (errors == 0);
1214+
return (errors ? ECKSUM : 0);
12151215
}
12161216
if (unlikely(BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS)) {
12171217
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
@@ -1229,7 +1229,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12291229
* will be done once the zio is executed in vdev_mirror_map_alloc.
12301230
*/
12311231
if (unlikely(!spa->spa_trust_config))
1232-
return (errors == 0);
1232+
return (errors ? ECKSUM : 0);
12331233

12341234
switch (blk_config) {
12351235
case BLK_CONFIG_HELD:
@@ -1238,8 +1238,12 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12381238
case BLK_CONFIG_NEEDED:
12391239
spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
12401240
break;
1241+
case BLK_CONFIG_NEEDED_TRY:
1242+
if (!spa_config_tryenter(spa, SCL_VDEV, bp, RW_READER))
1243+
return (EBUSY);
1244+
break;
12411245
case BLK_CONFIG_SKIP:
1242-
return (errors == 0);
1246+
return (errors ? ECKSUM : 0);
12431247
default:
12441248
panic("invalid blk_config %u", blk_config);
12451249
}
@@ -1294,10 +1298,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12941298
bp, i, (longlong_t)offset);
12951299
}
12961300
}
1297-
if (blk_config == BLK_CONFIG_NEEDED)
1301+
if (blk_config == BLK_CONFIG_NEEDED || blk_config ==
1302+
BLK_CONFIG_NEEDED_TRY)
12981303
spa_config_exit(spa, SCL_VDEV, bp);
12991304

1300-
return (errors == 0);
1305+
return (errors ? ECKSUM : 0);
13011306
}
13021307

13031308
boolean_t

0 commit comments

Comments
 (0)