Skip to content

Commit 33075e4

Browse files
hawartensbehlendorf
authored andcommitted
Allow MMP to bypass waiting for other threads
At our site we have seen cases when multi-modifier protection is enabled (multihost=on) on our pool and the pool gets suspended due to a single disk that is failing and responding very slowly. Our pools have 90 disks in them and we expect disks to fail. The current version of MMP requires that we wait for other writers before moving on. When a disk is responding very slowly, we observed that waiting here was bad enough to cause the pool to suspend. This change allows the MMP thread to bypass waiting for other threads and reduces the chances the pool gets suspended. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Herb Wartens <[email protected]> Closes #14659
1 parent cdbe1d6 commit 33075e4

File tree

3 files changed

+29
-4
lines changed

3 files changed

+29
-4
lines changed

include/sys/spa.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid,
972972
/* Pool configuration locks */
973973
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
974974
extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
975+
extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
976+
krw_t rw);
975977
extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
976978
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
977979

module/zfs/mmp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ mmp_write_uberblock(spa_t *spa)
444444
uint64_t offset;
445445

446446
hrtime_t lock_acquire_time = gethrtime();
447-
spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER);
447+
spa_config_enter_mmp(spa, SCL_STATE, mmp_tag, RW_READER);
448448
lock_acquire_time = gethrtime() - lock_acquire_time;
449449
if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
450450
zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns "

module/zfs/spa_misc.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -494,8 +494,9 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
494494
return (1);
495495
}
496496

497-
void
498-
spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
497+
static void
498+
spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw,
499+
int mmp_flag)
499500
{
500501
(void) tag;
501502
int wlocks_held = 0;
@@ -510,7 +511,8 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
510511
continue;
511512
mutex_enter(&scl->scl_lock);
512513
if (rw == RW_READER) {
513-
while (scl->scl_writer || scl->scl_write_wanted) {
514+
while (scl->scl_writer ||
515+
(!mmp_flag && scl->scl_write_wanted)) {
514516
cv_wait(&scl->scl_cv, &scl->scl_lock);
515517
}
516518
} else {
@@ -528,6 +530,27 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
528530
ASSERT3U(wlocks_held, <=, locks);
529531
}
530532

533+
void
534+
spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
535+
{
536+
spa_config_enter_impl(spa, locks, tag, rw, 0);
537+
}
538+
539+
/*
540+
* The spa_config_enter_mmp() allows the mmp thread to cut in front of
541+
* outstanding write lock requests. This is needed since the mmp updates are
542+
* time sensitive and failure to service them promptly will result in a
543+
* suspended pool. This pool suspension has been seen in practice when there is
544+
* a single disk in a pool that is responding slowly and presumably about to
545+
* fail.
546+
*/
547+
548+
void
549+
spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw)
550+
{
551+
spa_config_enter_impl(spa, locks, tag, rw, 1);
552+
}
553+
531554
void
532555
spa_config_exit(spa_t *spa, int locks, const void *tag)
533556
{

0 commit comments

Comments
 (0)