Skip to content

Commit f701b68

Browse files
committed
Remove spa_namespace_lock from zpool status
This commit removes spa_namespace_lock from the zpool status codepath. This means that zpool status will not hang if a pool fails while holding the spa_namespace_lock. Background: The spa_namespace_lock was originally meant to protect the spa_namespace_avl AVL tree. The spa_namespace_avl tree held the mappings from pool names to spa_t's. So if you wanted to lookup the spa_t for the "tank" pool, you would do an AVL search for "tank" while holding spa_namespace_lock. Over time though the spa_namespace_lock was re-purposed to protect other critical codepaths in the spa subsystem as well. In many cases we don't know what the original authors meant to protect with it, or if they needed it for read-only or read-write protection. It is simply "too big and risky to fix properly". The workaround is to add a new lightweight version of the spa_namespace_lock called spa_namespace_lite_lock. spa_namespace_lite_lock only protects the AVL tree, and nothing else. It can be used for read-only access to the AVL tree without requiring the spa_namespace_lock. Calls to spa_lookup_lite() and spa_next_lite() only need to acquire a reader lock on spa_namespace_lite_lock; they do not need to also acquire the old spa_namespace_lock. This allows us to still run zpool status even if the zfs module has spa_namespace_lock held. Note that these AVL tree locks only protect the tree, not the actual spa_t contents. Signed-off-by: Tony Hutter <[email protected]>
1 parent 5c67820 commit f701b68

File tree

19 files changed

+326
-72
lines changed

19 files changed

+326
-72
lines changed

include/sys/spa.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,22 @@ extern kmutex_t spa_namespace_lock;
842842
extern avl_tree_t spa_namespace_avl;
843843
extern kcondvar_t spa_namespace_cv;
844844

845+
extern krwlock_t spa_namespace_lite_lock;
846+
847+
extern uint_t spa_namespace_delay_ms;
848+
/*
849+
* Special version of mutex_enter() used for testing the spa_namespace_lock.
850+
* It inserts an artificial delay after acquiring the lock to simulate a failure
851+
* while holding the lock. The delay is controlled by the
852+
* spa_namespace_lock_delay module param and is only to be used by ZTS.
853+
*/
854+
#define mutex_enter_ns(lock) do { \
855+
ASSERT(lock == &spa_namespace_lock); \
856+
mutex_enter(lock); \
857+
if (unlikely(spa_namespace_delay_ms != 0)) \
858+
zfs_msleep(spa_namespace_delay_ms); \
859+
} while (0);
860+
845861
/*
846862
* SPA configuration functions in spa_config.c
847863
*/
@@ -866,9 +882,11 @@ extern int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv,
866882

867883
/* Namespace manipulation */
868884
extern spa_t *spa_lookup(const char *name);
885+
extern spa_t *spa_lookup_lite(const char *name);
869886
extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
870887
extern void spa_remove(spa_t *spa);
871888
extern spa_t *spa_next(spa_t *prev);
889+
extern spa_t *spa_next_lite(spa_t *prev);
872890

873891
/* Refcount functions */
874892
extern void spa_open_ref(spa_t *spa, const void *tag);

include/sys/spa_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ struct spa {
484484
extern char *spa_config_path;
485485
extern const char *zfs_deadman_failmode;
486486
extern uint_t spa_slop_shift;
487+
extern uint_t spa_namespace_delay_ms;
487488
extern void spa_taskq_dispatch(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
488489
task_func_t *func, zio_t *zio, boolean_t cutinline);
489490
extern void spa_load_spares(spa_t *spa);

include/sys/zfs_context.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,8 @@ void ksiddomain_rele(ksiddomain_t *);
769769
(void) nanosleep(&ts, NULL); \
770770
} while (0)
771771

772+
#define zfs_msleep(ms) zfs_sleep_until(gethrtime() + (MSEC2NSEC(ms)))
773+
772774
typedef int fstrans_cookie_t;
773775

774776
extern fstrans_cookie_t spl_fstrans_mark(void);

include/sys/zfs_delay.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,6 @@
3737
usleep_range(delta_us, delta_us + 100); \
3838
} \
3939
} while (0)
40+
#define zfs_msleep(ms) zfs_sleep_until(gethrtime() + (MSEC2NSEC(ms)))
4041

4142
#endif /* _SYS_FS_ZFS_DELAY_H */

module/os/freebsd/zfs/spa_os.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
192192
*/
193193
config = spa_generate_rootconf(name);
194194

195-
mutex_enter(&spa_namespace_lock);
195+
mutex_enter_ns(&spa_namespace_lock);
196196
if (config != NULL) {
197197
pname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
198198
VERIFY0(strcmp(name, pname));

module/os/freebsd/zfs/zfs_ioctl_os.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ zfs_ioc_nextboot(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
107107
"command", &command) != 0)
108108
return (EINVAL);
109109

110-
mutex_enter(&spa_namespace_lock);
110+
mutex_enter_ns(&spa_namespace_lock);
111111
spa = spa_by_guid(pool_guid, vdev_guid);
112112
if (spa != NULL)
113113
strcpy(name, spa_name(spa));

module/zfs/arc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8205,7 +8205,7 @@ l2arc_dev_get_next(void)
82058205
* of cache devices (l2arc_dev_mtx). Once a device has been selected,
82068206
* both locks will be dropped and a spa config lock held instead.
82078207
*/
8208-
mutex_enter(&spa_namespace_lock);
8208+
mutex_enter_ns(&spa_namespace_lock);
82098209
mutex_enter(&l2arc_dev_mtx);
82108210

82118211
/* if there are no vdevs, there is nothing to do */

module/zfs/mmp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,7 @@ mmp_signal_all_threads(void)
728728
{
729729
spa_t *spa = NULL;
730730

731-
mutex_enter(&spa_namespace_lock);
731+
mutex_enter_ns(&spa_namespace_lock);
732732
while ((spa = spa_next(spa))) {
733733
if (spa->spa_state == POOL_STATE_ACTIVE)
734734
mmp_signal_thread(spa);

0 commit comments

Comments
 (0)