Avoid dirtying the final TXGs when exporting a pool

gamanakis · web-flow · commit 2fb52853dcca · 2022-02-15T15:48:59.000-08:00
There are two codepaths than can dirty final TXGs: 1) If calling spa_export_common()->spa_unload()-> spa_unload_log_sm_flush_all() after the spa_final_txg is set, then spa_sync()->spa_flush_metaslabs() may end up dirtying the final TXGs. Then we have the following panic: Call Trace: <TASK> dump_stack_lvl+0x46/0x62 spl_panic+0xea/0x102 [spl] dbuf_dirty+0xcd6/0x11b0 [zfs] zap_lockdir_impl+0x321/0x590 [zfs] zap_lockdir+0xed/0x150 [zfs] zap_update+0x69/0x250 [zfs] feature_sync+0x5f/0x190 [zfs] space_map_alloc+0x83/0xc0 [zfs] spa_generate_syncing_log_sm+0x10b/0x2f0 [zfs] spa_flush_metaslabs+0xb2/0x350 [zfs] spa_sync_iterate_to_convergence+0x15a/0x320 [zfs] spa_sync+0x2e0/0x840 [zfs] txg_sync_thread+0x2b1/0x3f0 [zfs] thread_generic_wrapper+0x62/0xa0 [spl] kthread+0x127/0x150 ret_from_fork+0x22/0x30 </TASK> 2) Calling vdev_*_stop_all() for a second time in spa_unload() after spa_export_common() unnecessarily delays the final TXGs beyond what spa_final_txg is set at. Fix this by performing the check and call for spa_unload_log_sm_flush_all() before the spa_final_txg is set in spa_export_common(). Also check if the spa_final_txg has already been set in spa_unload() and skip those calls in this case. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Amanakis <gamanakis@gmail.com> External-issue: https://www.illumos.org/issues/9081 Closes openzfs#13048 Closes openzfs#13098
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
@@ -1597,25 +1597,33 @@ spa_unload(spa_t *spa)
 	spa_wake_waiters(spa);
 
 	/*
-	 * If the log space map feature is enabled and the pool is getting
-	 * exported (but not destroyed), we want to spend some time flushing
-	 * as many metaslabs as we can in an attempt to destroy log space
-	 * maps and save import time.
+	 * If we have set the spa_final_txg, we have already performed the
+	 * tasks below in spa_export_common(). We should not redo it here since
+	 * we delay the final TXGs beyond what spa_final_txg is set at.
 	 */
-	if (spa_should_flush_logs_on_unload(spa))
-		spa_unload_log_sm_flush_all(spa);
+	if (spa->spa_final_txg == UINT64_MAX) {
+		/*
+		 * If the log space map feature is enabled and the pool is
+		 * getting exported (but not destroyed), we want to spend some
+		 * time flushing as many metaslabs as we can in an attempt to
+		 * destroy log space maps and save import time.
+		 */
+		if (spa_should_flush_logs_on_unload(spa))
+			spa_unload_log_sm_flush_all(spa);
 
-	/*
-	 * Stop async tasks.
-	 */
-	spa_async_suspend(spa);
+		/*
+		 * Stop async tasks.
+		 */
+		spa_async_suspend(spa);
 
-	if (spa->spa_root_vdev) {
-		vdev_t *root_vdev = spa->spa_root_vdev;
-		vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE);
-		vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
-		vdev_autotrim_stop_all(spa);
-		vdev_rebuild_stop_all(spa);
+		if (spa->spa_root_vdev) {
+			vdev_t *root_vdev = spa->spa_root_vdev;
+			vdev_initialize_stop_all(root_vdev,
+			    VDEV_INITIALIZE_ACTIVE);
+			vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE);
+			vdev_autotrim_stop_all(spa);
+			vdev_rebuild_stop_all(spa);
+		}
 	}
 
 	/*
@@ -6427,9 +6435,27 @@ spa_export_common(const char *pool, int new_state, nvlist_t **oldconfig,
 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 			spa->spa_state = new_state;
+			vdev_config_dirty(spa->spa_root_vdev);
+			spa_config_exit(spa, SCL_ALL, FTAG);
+		}
+
+		/*
+		 * If the log space map feature is enabled and the pool is
+		 * getting exported (but not destroyed), we want to spend some
+		 * time flushing as many metaslabs as we can in an attempt to
+		 * destroy log space maps and save import time. This has to be
+		 * done before we set the spa_final_txg, otherwise
+		 * spa_sync() -> spa_flush_metaslabs() may dirty the final TXGs.
+		 * spa_should_flush_logs_on_unload() should be called after
+		 * spa_state has been set to the new_state.
+		 */
+		if (spa_should_flush_logs_on_unload(spa))
+			spa_unload_log_sm_flush_all(spa);
+
+		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
+			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 			spa->spa_final_txg = spa_last_synced_txg(spa) +
 			    TXG_DEFER_SIZE + 1;
-			vdev_config_dirty(spa->spa_root_vdev);
 			spa_config_exit(spa, SCL_ALL, FTAG);
 		}
 	}