Skip to content

Commit 1229323

Browse files
committed
Align thread priority with Linux defaults
Under Linux filesystem threads responsible for handling I/O are normally created with the maximum priority. Non-I/O filesystem processes run with the default priority. ZFS should adopt the same priority scheme under Linux to maintain good performance and so that it will complete fairly when other Linux filesystems are active. The priorities have been updated to the following: $ ps -eLo rtprio,cls,pid,pri,nice,cmd | egrep 'z_|spl_|zvol|arc|dbu|meta' - TS 10743 19 -20 [spl_kmem_cache] - TS 10744 19 -20 [spl_system_task] - TS 10745 19 -20 [spl_dynamic_tas] - TS 10764 19 0 [dbu_evict] - TS 10765 19 0 [arc_prune] - TS 10766 19 0 [arc_reclaim] - TS 10767 19 0 [arc_user_evicts] - TS 10768 19 0 [l2arc_feed] - TS 10769 39 0 [z_unmount] - TS 10770 39 -20 [zvol] - TS 11011 39 -20 [z_null_iss] - TS 11012 39 -20 [z_null_int] - TS 11013 39 -20 [z_rd_iss] - TS 11014 39 -20 [z_rd_int_0] - TS 11022 38 -19 [z_wr_iss] - TS 11023 39 -20 [z_wr_iss_h] - TS 11024 39 -20 [z_wr_int_0] - TS 11032 39 -20 [z_wr_int_h] - TS 11033 39 -20 [z_fr_iss_0] - TS 11041 39 -20 [z_fr_int] - TS 11042 39 -20 [z_cl_iss] - TS 11043 39 -20 [z_cl_int] - TS 11044 39 -20 [z_ioctl_iss] - TS 11045 39 -20 [z_ioctl_int] - TS 11046 39 -20 [metaslab_group_] - TS 11050 19 0 [z_iput] - TS 11121 38 -19 [z_wr_iss] Note that under Linux the meaning of a processes priority is inverted with respect to illumos. High values on Linux indicate a _low_ priority while high value on illumos indicate a _high_ priority. In order to preserve the logical meaning of the minclsyspri and maxclsyspri macros when they are used by the illumos wrapper functions their values have been inverted. This way when changes are merged from upstream illumos we won't need to remember to invert the macro. It could also lead to confusion. This patch depends on openzfs/spl#466. Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Ned Bass <[email protected]> Closes #3607
1 parent c97d306 commit 1229323

File tree

12 files changed

+29
-20
lines changed

12 files changed

+29
-20
lines changed

include/sys/zfs_context.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ typedef struct kthread {
233233
kt_did_t t_tid;
234234
thread_func_t t_func;
235235
void * t_arg;
236+
pri_t t_pri;
236237
} kthread_t;
237238

238239
#define curthread zk_thread_current()
@@ -615,8 +616,12 @@ extern void delay(clock_t ticks);
615616
#define max_ncpus 64
616617
#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))
617618

618-
#define minclsyspri 60
619-
#define maxclsyspri 99
619+
/*
620+
* Process priorities as defined by setpriority(2) and getpriority(2).
621+
*/
622+
#define minclsyspri 19
623+
#define maxclsyspri -20
624+
#define defclsyspri 0
620625

621626
#define CPU_SEQID (pthread_self() & (max_ncpus - 1))
622627

lib/libzpool/kernel.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ zk_thread_helper(void *arg)
128128
VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
129129
kthread_nr++;
130130
VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
131+
(void) setpriority(PRIO_PROCESS, 0, kt->t_pri);
131132

132133
kt->t_tid = pthread_self();
133134
((thread_func_arg_t) kt->t_func)(kt->t_arg);
@@ -151,6 +152,7 @@ zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
151152
kt = umem_zalloc(sizeof (kthread_t), UMEM_NOFAIL);
152153
kt->t_func = func;
153154
kt->t_arg = arg;
155+
kt->t_pri = pri;
154156

155157
VERIFY0(pthread_attr_init(&attr));
156158
VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));

lib/libzpool/taskq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
308308

309309
for (t = 0; t < nthreads; t++)
310310
VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0,
311-
taskq_thread, tq, TS_RUN, NULL, 0, 0)) != NULL);
311+
taskq_thread, tq, TS_RUN, NULL, 0, pri)) != NULL);
312312

313313
return (tq);
314314
}
@@ -371,7 +371,7 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id)
371371
void
372372
system_taskq_init(void)
373373
{
374-
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
374+
system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
375375
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
376376
}
377377

module/zfs/arc.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5431,7 +5431,7 @@ arc_init(void)
54315431
mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
54325432
bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t));
54335433

5434-
arc_prune_taskq = taskq_create("arc_prune", max_ncpus, minclsyspri,
5434+
arc_prune_taskq = taskq_create("arc_prune", max_ncpus, defclsyspri,
54355435
max_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
54365436

54375437
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
@@ -5444,10 +5444,10 @@ arc_init(void)
54445444
}
54455445

54465446
(void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0,
5447-
TS_RUN, minclsyspri);
5447+
TS_RUN, defclsyspri);
54485448

54495449
(void) thread_create(NULL, 0, arc_user_evicts_thread, NULL, 0, &p0,
5450-
TS_RUN, minclsyspri);
5450+
TS_RUN, defclsyspri);
54515451

54525452
arc_dead = FALSE;
54535453
arc_warm = B_FALSE;
@@ -6954,7 +6954,7 @@ l2arc_start(void)
69546954
return;
69556955

69566956
(void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0,
6957-
TS_RUN, minclsyspri);
6957+
TS_RUN, defclsyspri);
69586958
}
69596959

69606960
void

module/zfs/dbuf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ dbuf_init(void)
413413
* All entries are queued via taskq_dispatch_ent(), so min/maxalloc
414414
* configuration is not required.
415415
*/
416-
dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
416+
dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
417417
}
418418

419419
void

module/zfs/dmu_objset.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1839,7 +1839,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
18391839
ntasks = dmu_find_threads;
18401840
if (ntasks == 0)
18411841
ntasks = vdev_count_leaves(dp->dp_spa) * 4;
1842-
tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks,
1842+
tq = taskq_create("dmu_objset_find", ntasks, maxclsyspri, ntasks,
18431843
INT_MAX, 0);
18441844
if (tq == NULL) {
18451845
kmem_free(dcp, sizeof (*dcp));

module/zfs/dsl_pool.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
170170
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
171171
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
172172

173-
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, minclsyspri,
173+
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
174174
max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
175175

176176
return (dp);

module/zfs/metaslab.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
492492
mg->mg_activation_count = 0;
493493

494494
mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
495-
minclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
495+
maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
496496

497497
return (mg);
498498
}

module/zfs/spa.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -898,11 +898,13 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
898898
pri_t pri = maxclsyspri;
899899
/*
900900
* The write issue taskq can be extremely CPU
901-
* intensive. Run it at slightly lower priority
902-
* than the other taskqs.
901+
* intensive. Run it at slightly less important
902+
* priority than the other taskqs. Under Linux this
903+
* means incrementing the priority value on platforms
904+
* like illumos it should be decremented.
903905
*/
904906
if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
905-
pri--;
907+
pri++;
906908

907909
tq = taskq_create_proc(name, value, pri, 50,
908910
INT_MAX, spa->spa_proc, flags);

module/zfs/txg.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,15 +205,15 @@ txg_sync_start(dsl_pool_t *dp)
205205
tx->tx_threads = 2;
206206

207207
tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread,
208-
dp, 0, &p0, TS_RUN, minclsyspri);
208+
dp, 0, &p0, TS_RUN, defclsyspri);
209209

210210
/*
211211
* The sync thread can need a larger-than-default stack size on
212212
* 32-bit x86. This is due in part to nested pools and
213213
* scrub_visitbp() recursion.
214214
*/
215215
tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread,
216-
dp, 0, &p0, TS_RUN, minclsyspri);
216+
dp, 0, &p0, TS_RUN, defclsyspri);
217217

218218
mutex_exit(&tx->tx_sync_lock);
219219
}
@@ -445,7 +445,7 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
445445
* Commit callback taskq hasn't been created yet.
446446
*/
447447
tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
448-
max_ncpus, minclsyspri, max_ncpus, max_ncpus * 2,
448+
max_ncpus, defclsyspri, max_ncpus, max_ncpus * 2,
449449
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
450450
}
451451

module/zfs/zfs_ctldir.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
10091009
void
10101010
zfsctl_init(void)
10111011
{
1012-
zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri,
1012+
zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri,
10131013
1, 8, TASKQ_PREPOPULATE);
10141014
}
10151015

module/zfs/zil.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1888,7 +1888,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
18881888
ASSERT(list_is_empty(&zilog->zl_lwb_list));
18891889

18901890
zilog->zl_get_data = get_data;
1891-
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
1891+
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, defclsyspri,
18921892
2, 2, TASKQ_PREPOPULATE);
18931893

18941894
return (zilog);

0 commit comments

Comments
 (0)