@@ -470,22 +470,23 @@ static int zfs_arc_prune_task_threads = 1;
470
470
static taskq_t * arc_flush_taskq ;
471
471
472
472
/*
473
- * Controls the number of ARC eviction threads.
473
+ * Controls the number of ARC eviction threads to dispatch sublists to.
474
+ *
474
475
* Possible values:
475
476
* 0 (auto) compute the number of threads using a logarithmic formula.
476
477
* 1 (disabled) one thread - parallel eviction is disabled.
477
- * 2+ (manual) set the number manually, limited by zfs_arc_evict_threads_max.
478
+ * 2+ (manual) set the number manually.
479
+ *
480
+ * See arc_evict_thread_init() for how "auto" is computed.
478
481
*/
479
482
static uint_t zfs_arc_evict_threads = 0 ;
480
483
481
484
/*
482
- * The number of allocated ARC eviction threads. This limits the maximum value
483
- * of zfs_arc_evict_threads.
484
- * The number is set up at module load time and depends on the initial value of
485
- * zfs_arc_evict_threads. If zfs_arc_evict_threads is set to auto, a logarithmic
486
- * function is used to compute this value. Otherwise, it is set to max_ncpus.
485
+ * The max number of ARC eviction threads to use. All provided or computed
486
+ * values for zfs_arc_evict_threads will be clamped to this. If not set at
487
+ * module load time, will be computed. See arc_evict_thread_init().
487
488
*/
488
- static uint_t zfs_arc_evict_threads_max ;
489
+ static uint_t zfs_arc_evict_threads_max = 0 ;
489
490
490
491
/* The 7 states: */
491
492
arc_state_t ARC_anon ;
@@ -4086,6 +4087,104 @@ arc_evict_task(void *arg)
4086
4087
eva -> eva_marker , eva -> eva_spa , eva -> eva_bytes );
4087
4088
}
4088
4089
4090
+ static uint_t arc_evict_threads = 0 ;
4091
+ static uint_t arc_evict_threads_auto = 0 ;
4092
+
4093
+ static void
4094
+ arc_evict_thread_recalc (void )
4095
+ {
4096
+ /* Reload from tuneable */
4097
+ uint_t want_threads = zfs_arc_evict_threads ;
4098
+ if (want_threads == 0 )
4099
+ want_threads = arc_evict_threads_auto ;
4100
+
4101
+ /* Clamp to configured maximum */
4102
+ if (want_threads > zfs_arc_evict_threads_max )
4103
+ want_threads = zfs_arc_evict_threads_max ;
4104
+
4105
+ arc_evict_threads = want_threads ;
4106
+
4107
+ if (arc_evict_threads > 1 && arc_evict_taskq == NULL )
4108
+ /*
4109
+ * First time the thread count goes over 1, ensure the taskq
4110
+ * is created. By default, this will happen in the call from
4111
+ * arc_init() on any system with 6+ CPUs. This is here to
4112
+ * support a system that started out with 1 thread (either
4113
+ * explicitly configured or because the system has <6 CPUs),
4114
+ * which was then raised. This should be a rare case, and we
4115
+ * do this to avoid precreating a taskq that we won't ever use.
4116
+ */
4117
+ arc_evict_taskq = taskq_create ("arc_evict" ,
4118
+ zfs_arc_evict_threads_max , defclsyspri , 0 , INT_MAX ,
4119
+ TASKQ_PREPOPULATE );
4120
+ }
4121
+
4122
+ static void
4123
+ arc_evict_thread_init (void )
4124
+ {
4125
+ /*
4126
+ * Compute number of threads we want to use for eviction.
4127
+ *
4128
+ * Normally, it's log2(ncpus) + ncpus/32, which gets us to the
4129
+ * default max of 16 threads at ~256 CPUs.
4130
+ *
4131
+ * However, this formula goes to two threads at 4 CPUs, which
4132
+ * is still rather to low to be really useful, so we just go
4133
+ * with 1 thread at fewer than 6 cores.
4134
+ */
4135
+ if (max_ncpus < 6 )
4136
+ arc_evict_threads_auto = 1 ;
4137
+ else
4138
+ arc_evict_threads_auto =
4139
+ (highbit64 (max_ncpus ) - 1 ) + max_ncpus / 32 ;
4140
+
4141
+ /*
4142
+ * If not set, compute an appropriate max possible threads.
4143
+ */
4144
+ if (zfs_arc_evict_threads_max == 0 ) {
4145
+ /*
4146
+ * If they want the computed number of threads, then then
4147
+ * set the max such that the computer value falls in the
4148
+ * range of 4 to 16 threads.
4149
+ *
4150
+ * 4 threads in the minimum, because this allows machines
4151
+ * with <16 CPUs to still have room to raise
4152
+ * zfs_arc_evict_threads at runtime, giving operators of
4153
+ * smaller machines some room to move.
4154
+ *
4155
+ * Above that, we set it to the computed value, so it can
4156
+ * only be reduced at runtime.
4157
+ *
4158
+ * 16 is the hard upper limit for a computed maximum,
4159
+ * regardless of the number of CPUs. This is the computed value
4160
+ * for 256-287 cores, at which point you almost certainly
4161
+ * should be setting these tuneables yourself.
4162
+ *
4163
+ * If they have supplied a wanted number of threads, but
4164
+ * have not explicitly set a maximum, follow the same rules,
4165
+ * but if they go over 16, set the max there, so they don't
4166
+ * have to set a separate tuneable just to get their choice
4167
+ * through.
4168
+ */
4169
+ if (zfs_arc_evict_threads > 16 )
4170
+ zfs_arc_evict_threads_max = zfs_arc_evict_threads ;
4171
+ else {
4172
+ uint_t nthreads = zfs_arc_evict_threads ?
4173
+ zfs_arc_evict_threads : arc_evict_threads_auto ;
4174
+
4175
+ zfs_arc_evict_threads_max = MAX (MIN (nthreads , 16 ), 4 );
4176
+ }
4177
+ }
4178
+
4179
+ /* Clamp computed number of threads to maximum */
4180
+ if (arc_evict_threads_auto > zfs_arc_evict_threads_max )
4181
+ arc_evict_threads_auto = zfs_arc_evict_threads_max ;
4182
+
4183
+ arc_evict_thread_recalc ();
4184
+
4185
+ cmn_err (CE_NOTE , "max %u auto %u threads %u" , zfs_arc_evict_threads_max , arc_evict_threads_auto , arc_evict_threads );
4186
+ }
4187
+
4089
4188
/*
4090
4189
* The minimum number of bytes we can evict at once is a block size.
4091
4190
* So, SPA_MAXBLOCKSIZE is a reasonable minimal value per an eviction task.
@@ -4118,21 +4217,7 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
4118
4217
4119
4218
num_sublists = multilist_get_num_sublists (ml );
4120
4219
4121
- /* Compute how many sublists we can evict in parallel */
4122
- uint_t nthreads = 1 ;
4123
- if (arc_evict_taskq ) {
4124
- /* One sublist per thread */
4125
- nthreads = num_sublists ;
4126
-
4127
- /* Operator has set an ideal number of threads, honour it */
4128
- if (zfs_arc_evict_threads > 0 )
4129
- nthreads = MIN (nthreads , zfs_arc_evict_threads );
4130
-
4131
- /* Never more than the max threads */
4132
- nthreads = MIN (nthreads , zfs_arc_evict_threads_max );
4133
- }
4134
-
4135
- boolean_t use_evcttq = nthreads > 1 ;
4220
+ boolean_t use_evcttq = arc_evict_threads > 1 ;
4136
4221
4137
4222
/*
4138
4223
* If we've tried to evict from each sublist, made some
@@ -4156,9 +4241,9 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
4156
4241
}
4157
4242
4158
4243
if (use_evcttq ) {
4159
- eva = kmem_alloc (sizeof (* eva ) * nthreads , KM_NOSLEEP );
4244
+ eva = kmem_alloc (sizeof (* eva ) * arc_evict_threads , KM_NOSLEEP );
4160
4245
if (eva ) {
4161
- for (int i = 0 ; i < nthreads ; i ++ ) {
4246
+ for (int i = 0 ; i < arc_evict_threads ; i ++ ) {
4162
4247
taskq_init_ent (& eva [i ].eva_tqent );
4163
4248
eva [i ].eva_ml = ml ;
4164
4249
eva [i ].eva_spa = spa ;
@@ -4181,7 +4266,7 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
4181
4266
while (total_evicted < bytes ) {
4182
4267
uint64_t scan_evicted = 0 ;
4183
4268
uint64_t evict = MIN_EVICT_SIZE ;
4184
- uint_t ntasks = nthreads ;
4269
+ uint_t ntasks = arc_evict_threads ;
4185
4270
4186
4271
if (use_evcttq ) {
4187
4272
if (sublists_left < ntasks )
@@ -4283,7 +4368,7 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
4283
4368
}
4284
4369
4285
4370
if (eva )
4286
- kmem_free (eva , sizeof (* eva ) * nthreads );
4371
+ kmem_free (eva , sizeof (* eva ) * arc_evict_threads );
4287
4372
4288
4373
for (int i = 0 ; i < num_sublists ; i ++ ) {
4289
4374
multilist_sublist_t * mls = multilist_sublist_lock_idx (ml , i );
@@ -4806,6 +4891,12 @@ arc_evict_cb(void *arg, zthr_t *zthr)
4806
4891
{
4807
4892
(void ) arg ;
4808
4893
4894
+ /*
4895
+ * Recompute how many sublists we can evict in parallel, as it may
4896
+ * have been changed by the operator since the last eviction call.
4897
+ */
4898
+ arc_evict_thread_recalc ();
4899
+
4809
4900
uint64_t evicted = 0 ;
4810
4901
fstrans_cookie_t cookie = spl_fstrans_mark ();
4811
4902
@@ -8002,34 +8093,7 @@ arc_init(void)
8002
8093
arc_prune_taskq = taskq_create ("arc_prune" , zfs_arc_prune_task_threads ,
8003
8094
defclsyspri , 100 , INT_MAX , TASKQ_PREPOPULATE | TASKQ_DYNAMIC );
8004
8095
8005
- if (max_ncpus > 1 ) {
8006
- if (zfs_arc_evict_threads == 0 ) {
8007
- /*
8008
- * Limit the maximum number of threads by 16.
8009
- * We reach the limit when max_ncpu == 256.
8010
- */
8011
- uint_t nthreads = MIN ((highbit64 (max_ncpus ) - 1 ) +
8012
- max_ncpus / 32 , 16 );
8013
-
8014
- /*
8015
- * If there's less then four cores in the system,
8016
- * disable parallel eviction entirely. This is somewhat
8017
- * arbitrary but at least, this is quite a small
8018
- * computer that should probably use those cores for
8019
- * something else.
8020
- */
8021
- zfs_arc_evict_threads_max = max_ncpus < 4 ? 1 :
8022
- nthreads ;
8023
- } else {
8024
- zfs_arc_evict_threads_max = max_ncpus / 2 ;
8025
- }
8026
-
8027
- if (zfs_arc_evict_threads_max > 1 ) {
8028
- arc_evict_taskq = taskq_create ("arc_evict" ,
8029
- zfs_arc_evict_threads_max ,
8030
- defclsyspri , 0 , INT_MAX , TASKQ_PREPOPULATE );
8031
- }
8032
- }
8096
+ arc_evict_thread_init ();
8033
8097
8034
8098
list_create (& arc_async_flush_list , sizeof (arc_async_flush_t ),
8035
8099
offsetof(arc_async_flush_t , af_node ));
@@ -11266,7 +11330,7 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, prune_task_threads, INT, ZMOD_RW,
11266
11330
"Number of arc_prune threads" );
11267
11331
11268
11332
ZFS_MODULE_PARAM (zfs_arc , zfs_arc_ , evict_threads , UINT , ZMOD_RW ,
11269
- "Controls the number of ARC eviction threads " );
11333
+ "Number of threads to use for ARC eviction. " );
11270
11334
11271
11335
ZFS_MODULE_PARAM (zfs_arc , zfs_arc_ , evict_threads_max , UINT , ZMOD_RD ,
11272
- "The number of allocated ARC eviction threads " );
11336
+ "The max number of threads that can be used for ARC eviction" );
0 commit comments