Skip to content

Commit 998d2e0

Browse files
committed
arc: avoid possible deadlock in arc_read
In arc_read(), config lock may be acquired in reverse order (e.g., config lock followed by hash_lock) in scenarios like l2arc_evict() during L2ARC device removal. To avoid deadlocks, if the attempt to acquire the config lock fails, release the hash lock first. Then wait for the config lock and retry the hash lock from the beginning. Signed-off-by: Ameer Hamza <[email protected]>
1 parent 68473c4 commit 998d2e0

File tree

6 files changed

+43
-18
lines changed

6 files changed

+43
-18
lines changed

cmd/zdb/zdb.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9043,7 +9043,7 @@ zdb_read_block(char *thing, spa_t *spa)
90439043
const blkptr_t *b = (const blkptr_t *)(void *)
90449044
((uintptr_t)buf + (uintptr_t)blkptr_offset);
90459045
if (zfs_blkptr_verify(spa, b,
9046-
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) {
9046+
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) != 1) {
90479047
abd_return_buf_copy(pabd, buf, lsize);
90489048
borrowed = B_FALSE;
90499049
buf = lbuf;
@@ -9052,7 +9052,7 @@ zdb_read_block(char *thing, spa_t *spa)
90529052
b = (const blkptr_t *)(void *)
90539053
((uintptr_t)buf + (uintptr_t)blkptr_offset);
90549054
if (lsize == -1 || zfs_blkptr_verify(spa, b,
9055-
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) {
9055+
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) != 1) {
90569056
printf("invalid block pointer at this DVA\n");
90579057
goto out;
90589058
}

include/sys/zio.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ enum blk_verify_flag {
546546
enum blk_config_flag {
547547
BLK_CONFIG_HELD, // SCL_VDEV held for writer
548548
BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
549+
BLK_CONFIG_NEEDED_TRY, // Try with SCL_VDEV for reader
549550
BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
550551
};
551552

@@ -663,7 +664,7 @@ extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
663664
extern int zio_resume(spa_t *spa);
664665
extern void zio_resume_wait(spa_t *spa);
665666

666-
extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
667+
extern int zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
667668
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
668669

669670
/*

module/zfs/arc.c

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5683,6 +5683,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
56835683
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
56845684
arc_buf_t *buf = NULL;
56855685
int rc = 0;
5686+
boolean_t bp_validation = B_FALSE;
56865687

56875688
ASSERT(!embedded_bp ||
56885689
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
@@ -5725,8 +5726,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
57255726
* should always be the case since the blkptr is protected by
57265727
* a checksum.
57275728
*/
5728-
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
5729-
BLK_VERIFY_LOG)) {
5729+
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
5730+
BLK_VERIFY_LOG) != 1) {
57305731
mutex_exit(hash_lock);
57315732
rc = SET_ERROR(ECKSUM);
57325733
goto done;
@@ -5877,6 +5878,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
58775878
abd_t *hdr_abd;
58785879
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
58795880
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
5881+
int config_lock;
5882+
int error;
58805883

58815884
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
58825885
if (hash_lock != NULL)
@@ -5885,16 +5888,31 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
58855888
goto done;
58865889
}
58875890

5891+
if (zio_flags & ZIO_FLAG_CONFIG_WRITER) {
5892+
config_lock = BLK_CONFIG_HELD;
5893+
} else if (hash_lock != NULL) {
5894+
/*
5895+
* Prevent lock order reversal
5896+
*/
5897+
config_lock = BLK_CONFIG_NEEDED_TRY;
5898+
} else {
5899+
config_lock = BLK_CONFIG_NEEDED;
5900+
}
5901+
58885902
/*
58895903
* Verify the block pointer contents are reasonable. This
58905904
* should always be the case since the blkptr is protected by
58915905
* a checksum.
58925906
*/
5893-
if (!zfs_blkptr_verify(spa, bp,
5894-
(zio_flags & ZIO_FLAG_CONFIG_WRITER) ?
5895-
BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
5907+
if (!bp_validation && ((error = zfs_blkptr_verify(spa, bp,
5908+
config_lock, BLK_VERIFY_LOG)) != 1)) {
58965909
if (hash_lock != NULL)
58975910
mutex_exit(hash_lock);
5911+
if (error == -EBUSY && (zfs_blkptr_verify(spa, bp,
5912+
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == 1)) {
5913+
bp_validation = B_TRUE;
5914+
goto top;
5915+
}
58985916
rc = SET_ERROR(ECKSUM);
58995917
goto done;
59005918
}

module/zfs/dsl_scan.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2305,8 +2305,8 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
23052305
DMU_USERUSED_OBJECT, tx);
23062306
}
23072307
arc_buf_destroy(buf, &buf);
2308-
} else if (!zfs_blkptr_verify(spa, bp,
2309-
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
2308+
} else if (zfs_blkptr_verify(spa, bp,
2309+
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) != 1) {
23102310
/*
23112311
* Sanity check the block pointer contents, this is handled
23122312
* by arc_read() for the cases above.

module/zfs/spa.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2778,7 +2778,8 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
27782778
* When damaged consider it to be a metadata error since we cannot
27792779
* trust the BP_GET_TYPE and BP_GET_LEVEL values.
27802780
*/
2781-
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
2781+
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)
2782+
!= 1) {
27822783
atomic_inc_64(&sle->sle_meta_count);
27832784
return (0);
27842785
}

module/zfs/zio.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,7 +1164,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
11641164
* it only contains known object types, checksum/compression identifiers,
11651165
* block sizes within the maximum allowed limits, valid DVAs, etc.
11661166
*
1167-
* If everything checks out B_TRUE is returned. The zfs_blkptr_verify
1167+
* If everything checks out 1 is returned. The zfs_blkptr_verify
11681168
* argument controls the behavior when an invalid field is detected.
11691169
*
11701170
* Values for blk_verify_flag:
@@ -1179,7 +1179,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
11791179
* BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better
11801180
* performance
11811181
*/
1182-
boolean_t
1182+
int
11831183
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
11841184
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify)
11851185
{
@@ -1211,7 +1211,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12111211
"blkptr at %px has invalid PSIZE %llu",
12121212
bp, (longlong_t)BPE_GET_PSIZE(bp));
12131213
}
1214-
return (errors == 0);
1214+
return ((errors) ? 0 : 1);
12151215
}
12161216
if (unlikely(BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS)) {
12171217
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
@@ -1229,7 +1229,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12291229
* will be done once the zio is executed in vdev_mirror_map_alloc.
12301230
*/
12311231
if (unlikely(!spa->spa_trust_config))
1232-
return (errors == 0);
1232+
return ((errors) ? 0 : 1);
12331233

12341234
switch (blk_config) {
12351235
case BLK_CONFIG_HELD:
@@ -1238,8 +1238,12 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12381238
case BLK_CONFIG_NEEDED:
12391239
spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
12401240
break;
1241+
case BLK_CONFIG_NEEDED_TRY:
1242+
if (!spa_config_tryenter(spa, SCL_VDEV, bp, RW_READER))
1243+
return (-EBUSY);
1244+
break;
12411245
case BLK_CONFIG_SKIP:
1242-
return (errors == 0);
1246+
return ((errors) ? 0 : 1);
12431247
default:
12441248
panic("invalid blk_config %u", blk_config);
12451249
}
@@ -1294,10 +1298,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12941298
bp, i, (longlong_t)offset);
12951299
}
12961300
}
1297-
if (blk_config == BLK_CONFIG_NEEDED)
1301+
if (blk_config == BLK_CONFIG_NEEDED || blk_config ==
1302+
BLK_CONFIG_NEEDED_TRY)
12981303
spa_config_exit(spa, SCL_VDEV, bp);
12991304

1300-
return (errors == 0);
1305+
return ((errors) ? 0 : 1);
13011306
}
13021307

13031308
boolean_t

0 commit comments

Comments
 (0)