Skip to content

Commit 2ef38b1

Browse files
committed
deadlock between spa_errlog_lock and dp_config_rwlock
There is a lock order inversion deadlock between `spa_errlog_lock` and `dp_config_rwlock`: A thread in `spa_delete_dataset_errlog()` is running from a sync task. It is holding the `dp_config_rwlock` for writer (see `dsl_sync_task_sync()`), and waiting for the `spa_errlog_lock`. A thread in `dsl_pool_config_enter()` is holding the `spa_errlog_lock` (see `spa_get_errlog_size()`) and waiting for the `dp_config_rwlock` (as reader). Note that this was introduced by openzfs#12812. This commit address this by defining the lock ordering to be dp_config_rwlock first, then spa_errlog_lock / spa_errlist_lock. spa_get_errlog() and spa_get_errlog_size() can acquire the locks in this order, and then process_error_block() and get_head_and_birth_txg() can verify that the dp_config_rwlock is already held. Tested by having some errors in the pool (via `zinject -t data /path/to/file`), one thread running `zpool iostat 0.001`, and another thread runs `zfs destroy` (in a loop, although it hits the first time). This reproduces the problem easily without the fix, and works with the fix. Signed-off-by: Matthew Ahrens <[email protected]>
1 parent 5f73bbb commit 2ef38b1

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

module/zfs/spa_errlog.c

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,8 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
160160
dsl_dataset_t *ds;
161161
objset_t *os;
162162

163-
dsl_pool_config_enter(dp, FTAG);
164163
int error = dsl_dataset_hold_obj(dp, ds_obj, FTAG, &ds);
165164
if (error != 0) {
166-
dsl_pool_config_exit(dp, FTAG);
167165
return (error);
168166
}
169167
ASSERT(head_dataset_id);
@@ -172,7 +170,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
172170
error = dmu_objset_from_ds(ds, &os);
173171
if (error != 0) {
174172
dsl_dataset_rele(ds, FTAG);
175-
dsl_pool_config_exit(dp, FTAG);
176173
return (error);
177174
}
178175

@@ -189,7 +186,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
189186
ZFS_KEYSTATUS_UNAVAILABLE) {
190187
zep->zb_birth = 0;
191188
dsl_dataset_rele(ds, FTAG);
192-
dsl_pool_config_exit(dp, FTAG);
193189
return (0);
194190
}
195191

@@ -199,7 +195,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
199195
error = dnode_hold(os, zep->zb_object, FTAG, &dn);
200196
if (error != 0) {
201197
dsl_dataset_rele(ds, FTAG);
202-
dsl_pool_config_exit(dp, FTAG);
203198
return (error);
204199
}
205200

@@ -225,7 +220,6 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
225220
rw_exit(&dn->dn_struct_rwlock);
226221
dnode_rele(dn, FTAG);
227222
dsl_dataset_rele(ds, FTAG);
228-
dsl_pool_config_exit(dp, FTAG);
229223
return (error);
230224
}
231225

@@ -479,9 +473,6 @@ static int
479473
process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
480474
uint64_t *count, void *uaddr, boolean_t only_count)
481475
{
482-
dsl_pool_t *dp = spa->spa_dsl_pool;
483-
uint64_t top_affected_fs;
484-
485476
/*
486477
* If the zb_birth is 0 it means we failed to retrieve the birth txg
487478
* of the block pointer. This happens when an encrypted filesystem is
@@ -504,13 +495,12 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
504495
return (0);
505496
}
506497

507-
dsl_pool_config_enter(dp, FTAG);
498+
uint64_t top_affected_fs;
508499
int error = find_top_affected_fs(spa, head_ds, zep, &top_affected_fs);
509500
if (error == 0)
510501
error = check_filesystem(spa, top_affected_fs, zep, count,
511502
uaddr, only_count);
512503

513-
dsl_pool_config_exit(dp, FTAG);
514504
return (error);
515505
}
516506

@@ -687,6 +677,12 @@ spa_get_errlog_size(spa_t *spa)
687677
{
688678
uint64_t total = 0;
689679

680+
/*
681+
* The pool config lock is needed to hold a dataset_t via (among other
682+
* places) get_errlist_size() -> get_head_and_birth_txg(), and lock
683+
* ordering requires that we get it before the spa_errlog_lock.
684+
*/
685+
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
690686
if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
691687
mutex_enter(&spa->spa_errlog_lock);
692688
uint64_t count;
@@ -718,6 +714,7 @@ spa_get_errlog_size(spa_t *spa)
718714
mutex_exit(&spa->spa_errlist_lock);
719715
#endif
720716
}
717+
dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
721718
return (total);
722719
}
723720

@@ -988,6 +985,12 @@ spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count)
988985
int ret = 0;
989986

990987
#ifdef _KERNEL
988+
/*
989+
* The pool config lock is needed to hold a dataset_t via (among other
990+
* places) process_error_list() -> get_head_and_birth_txg(), and lock
991+
* ordering requires that we get it before the spa_errlog_lock.
992+
*/
993+
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
991994
mutex_enter(&spa->spa_errlog_lock);
992995

993996
ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count);
@@ -1006,6 +1009,7 @@ spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count)
10061009
mutex_exit(&spa->spa_errlist_lock);
10071010

10081011
mutex_exit(&spa->spa_errlog_lock);
1012+
dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
10091013
#else
10101014
(void) spa, (void) uaddr, (void) count;
10111015
#endif
@@ -1174,6 +1178,13 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
11741178
spa->spa_scrub_finished = B_FALSE;
11751179

11761180
mutex_exit(&spa->spa_errlist_lock);
1181+
1182+
/*
1183+
* The pool config lock is needed to hold a dataset_t via
1184+
* sync_error_list() -> get_head_and_birth_txg(), and lock ordering
1185+
* requires that we get it before the spa_errlog_lock.
1186+
*/
1187+
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
11771188
mutex_enter(&spa->spa_errlog_lock);
11781189

11791190
tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
@@ -1218,6 +1229,7 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
12181229
dmu_tx_commit(tx);
12191230

12201231
mutex_exit(&spa->spa_errlog_lock);
1232+
dsl_pool_config_exit(spa->spa_dsl_pool, FTAG);
12211233
}
12221234

12231235
static void

0 commit comments

Comments
 (0)