Skip to content

Commit b58986e

Browse files
committed
Use large stacks when available
While stack size will vary by architecture it has historically defaulted to 8K on x86_64 systems. However, as of Linux 3.15 the default thread stack size was increased to 16K. These kernels are now the default in most non- enterprise distributions which means we no longer need to assume 8K stacks. This patch takes advantage of that fact by appropriately reverting stack conservation changes which were made to ensure stability. Changes which may have had a negative impact on performance for certain workloads. This also has the side effect of bringing the code slightly more in line with upstream. Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Richard Yao <[email protected]> Closes #4059
1 parent f409267 commit b58986e

File tree

4 files changed

+67
-19
lines changed

4 files changed

+67
-19
lines changed

config/Rules.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ AM_CFLAGS += ${NO_BOOL_COMPARE}
77
AM_CFLAGS += -fno-strict-aliasing
88
AM_CPPFLAGS = -D_GNU_SOURCE -D__EXTENSIONS__ -D_REENTRANT
99
AM_CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_FILE_OFFSET_BITS=64
10-
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DTEXT_DOMAIN=\"zfs-linux-user\"
10+
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DHAVE_LARGE_STACKS=1
11+
AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-linux-user\"
1112
AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
1213
AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
1314
AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"

config/kernel.m4

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,9 +460,35 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [
460460
], [
461461
])
462462
463+
ZFS_AC_KERNEL_CONFIG_THREAD_SIZE
463464
ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC
464465
])
465466

467+
dnl #
468+
dnl # Check configured THREAD_SIZE
469+
dnl #
470+
dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64
471+
dnl # the default thread stack size was increased to 16K from 8K. Therefore,
472+
dnl # on newer kernels and some architectures stack usage optimizations can be
473+
dnl # conditionally applied to improve performance without negatively impacting
474+
dnl # stability.
475+
dnl #
476+
AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [
477+
AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks])
478+
ZFS_LINUX_TRY_COMPILE([
479+
#include <linux/module.h>
480+
],[
481+
#if (THREAD_SIZE < 16384)
482+
#error "THREAD_SIZE is less than 16K"
483+
#endif
484+
],[
485+
AC_MSG_RESULT([yes])
486+
AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks])
487+
],[
488+
AC_MSG_RESULT([no])
489+
])
490+
])
491+
466492
dnl #
467493
dnl # Check CONFIG_DEBUG_LOCK_ALLOC
468494
dnl #
@@ -572,7 +598,7 @@ dnl #
572598
dnl # ZFS_LINUX_CONFIG
573599
dnl #
574600
AC_DEFUN([ZFS_LINUX_CONFIG],
575-
[AC_MSG_CHECKING([whether Linux was built with CONFIG_$1])
601+
[AC_MSG_CHECKING([whether kernel was built with CONFIG_$1])
576602
ZFS_LINUX_TRY_COMPILE([
577603
#include <linux/module.h>
578604
],[

module/zfs/dmu_send.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ typedef struct dump_bytes_io {
6767
} dump_bytes_io_t;
6868

6969
static void
70-
dump_bytes_strategy(void *arg)
70+
dump_bytes_cb(void *arg)
7171
{
7272
dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
7373
dmu_sendarg_t *dsp = dbi->dbi_dsp;
@@ -94,14 +94,18 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
9494
dbi.dbi_buf = buf;
9595
dbi.dbi_len = len;
9696

97+
#if defined(HAVE_LARGE_STACKS)
98+
dump_bytes_cb(&dbi);
99+
#else
97100
/*
98101
* The vn_rdwr() call is performed in a taskq to ensure that there is
99102
* always enough stack space to write safely to the target filesystem.
100103
* The ZIO_TYPE_FREE threads are used because there can be a lot of
101104
* them and they are used in vdev_file.c for a similar purpose.
102105
*/
103106
spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE,
104-
ZIO_TASKQ_ISSUE, dump_bytes_strategy, &dbi, TQ_SLEEP);
107+
ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
108+
#endif /* HAVE_LARGE_STACKS */
105109

106110
return (dsp->dsa_err);
107111
}

module/zfs/zio.c

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,31 @@ zio_execute(zio_t *zio)
14011401
spl_fstrans_unmark(cookie);
14021402
}
14031403

1404+
/*
1405+
* Used to determine if in the current context the stack is sized large
1406+
* enough to allow zio_execute() to be called recursively. A minimum
1407+
* stack size of 16K is required to avoid needing to re-dispatch the zio.
1408+
*/
1409+
boolean_t
1410+
zio_execute_stack_check(zio_t *zio)
1411+
{
1412+
#if !defined(HAVE_LARGE_STACKS)
1413+
dsl_pool_t *dp = spa_get_dsl(zio->io_spa);
1414+
1415+
/* Executing in txg_sync_thread() context. */
1416+
if (dp && curthread == dp->dp_tx.tx_sync_thread)
1417+
return (B_TRUE);
1418+
1419+
/* Pool initialization outside of zio_taskq context. */
1420+
if (dp && spa_is_initializing(dp->dp_spa) &&
1421+
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE) &&
1422+
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH))
1423+
return (B_TRUE);
1424+
#endif /* HAVE_LARGE_STACKS */
1425+
1426+
return (B_FALSE);
1427+
}
1428+
14041429
__attribute__((always_inline))
14051430
static inline void
14061431
__zio_execute(zio_t *zio)
@@ -1410,8 +1435,6 @@ __zio_execute(zio_t *zio)
14101435
while (zio->io_stage < ZIO_STAGE_DONE) {
14111436
enum zio_stage pipeline = zio->io_pipeline;
14121437
enum zio_stage stage = zio->io_stage;
1413-
dsl_pool_t *dp;
1414-
boolean_t cut;
14151438
int rv;
14161439

14171440
ASSERT(!MUTEX_HELD(&zio->io_lock));
@@ -1424,10 +1447,6 @@ __zio_execute(zio_t *zio)
14241447

14251448
ASSERT(stage <= ZIO_STAGE_DONE);
14261449

1427-
dp = spa_get_dsl(zio->io_spa);
1428-
cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
1429-
zio_requeue_io_start_cut_in_line : B_FALSE;
1430-
14311450
/*
14321451
* If we are in interrupt context and this pipeline stage
14331452
* will grab a config lock that is held across I/O,
@@ -1439,21 +1458,19 @@ __zio_execute(zio_t *zio)
14391458
*/
14401459
if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL &&
14411460
zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) {
1461+
boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
1462+
zio_requeue_io_start_cut_in_line : B_FALSE;
14421463
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
14431464
return;
14441465
}
14451466

14461467
/*
1447-
* If we executing in the context of the tx_sync_thread,
1448-
* or we are performing pool initialization outside of a
1449-
* zio_taskq[ZIO_TASKQ_ISSUE|ZIO_TASKQ_ISSUE_HIGH] context.
1450-
* Then issue the zio asynchronously to minimize stack usage
1451-
* for these deep call paths.
1468+
* If the current context doesn't have large enough stacks
1469+
* the zio must be issued asynchronously to prevent overflow.
14521470
*/
1453-
if ((dp && curthread == dp->dp_tx.tx_sync_thread) ||
1454-
(dp && spa_is_initializing(dp->dp_spa) &&
1455-
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE) &&
1456-
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH))) {
1471+
if (zio_execute_stack_check(zio)) {
1472+
boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
1473+
zio_requeue_io_start_cut_in_line : B_FALSE;
14571474
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
14581475
return;
14591476
}

0 commit comments

Comments
 (0)