Skip to content

Commit f763c3d

Browse files
loli10Kbehlendorf
authored andcommitted
Fix range locking in ZIL commit codepath
Since OpenZFS 7578 (1b7c1e5) if we have a ZVOL with logbias=throughput we will force WR_INDIRECT itxs in zvol_log_write() setting itx->itx_lr offset and length to the offset and length of the BIO from zvol_write()->zvol_log_write(): these offset and length are later used to take a range lock in zillog->zl_get_data function: zvol_get_data(). Now suppose we have a ZVOL with blocksize=8K and push 4K writes to offset 0: we will only be range-locking 0-4096. This means the ASSERTion we make in dbuf_unoverride() is no longer valid because now dmu_sync() is called from zilog's get_data functions holding a partial lock on the dbuf. Fix this by taking a range lock on the whole block in zvol_get_data(). Reviewed-by: Chunwei Chen <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: loli10K <[email protected]> Closes #6238 Closes #6315 Closes #6356 Closes #6477
1 parent 08de8c1 commit f763c3d

File tree

7 files changed

+123
-14
lines changed

7 files changed

+123
-14
lines changed

cmd/ztest/ztest.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2145,6 +2145,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
21452145
if (buf != NULL) { /* immediate write */
21462146
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
21472147
RL_READER);
2148+
zgd->zgd_rl = zgd_private->z_rl->z_rl;
21482149

21492150
error = dmu_read(os, object, offset, size, buf,
21502151
DMU_READ_NO_PREFETCH);
@@ -2160,6 +2161,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
21602161

21612162
zgd_private->z_rl = ztest_range_lock(zd, object, offset, size,
21622163
RL_READER);
2164+
zgd->zgd_rl = zgd_private->z_rl->z_rl;
21632165

21642166
error = dmu_buf_hold(os, object, offset, zgd, &db,
21652167
DMU_READ_NO_PREFETCH);

module/zfs/dmu.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include <sys/zfeature.h>
5050
#include <sys/abd.h>
5151
#include <sys/trace_dmu.h>
52+
#include <sys/zfs_rlock.h>
5253
#ifdef _KERNEL
5354
#include <sys/vmsystm.h>
5455
#include <sys/zfs_znode.h>
@@ -1815,6 +1816,11 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
18151816
ASSERT(pio != NULL);
18161817
ASSERT(txg != 0);
18171818

1819+
/* dbuf is within the locked range */
1820+
ASSERT3U(db->db.db_offset, >=, zgd->zgd_rl->r_off);
1821+
ASSERT3U(db->db.db_offset + db->db.db_size, <=,
1822+
zgd->zgd_rl->r_off + zgd->zgd_rl->r_len);
1823+
18181824
SET_BOOKMARK(&zb, ds->ds_object,
18191825
db->db.db_object, db->db_level, db->db_blkid);
18201826

module/zfs/zfs_vnops.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1050,7 +1050,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
10501050
} else { /* indirect write */
10511051
/*
10521052
* Have to lock the whole block to ensure when it's
1053-
* written out and it's checksum is being calculated
1053+
* written out and its checksum is being calculated
10541054
* that no one can change the data. We need to re-check
10551055
* blocksize after we get the lock in case it's changed!
10561056
*/

module/zfs/zvol.c

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,7 @@ zvol_discard(void *arg)
821821
uint64_t start = BIO_BI_SECTOR(bio) << 9;
822822
uint64_t size = BIO_BI_SIZE(bio);
823823
uint64_t end = start + size;
824+
boolean_t sync;
824825
int error = 0;
825826
dmu_tx_t *tx;
826827
unsigned long start_jif;
@@ -830,9 +831,11 @@ zvol_discard(void *arg)
830831
start_jif = jiffies;
831832
generic_start_io_acct(WRITE, bio_sectors(bio), &zv->zv_disk->part0);
832833

834+
sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
835+
833836
if (end > zv->zv_volsize) {
834837
error = SET_ERROR(EIO);
835-
goto out;
838+
goto unlock;
836839
}
837840

838841
/*
@@ -848,7 +851,7 @@ zvol_discard(void *arg)
848851
}
849852

850853
if (start >= end)
851-
goto out;
854+
goto unlock;
852855

853856
tx = dmu_tx_create(zv->zv_objset);
854857
dmu_tx_mark_netfree(tx);
@@ -861,9 +864,11 @@ zvol_discard(void *arg)
861864
error = dmu_free_long_range(zv->zv_objset,
862865
ZVOL_OBJ, start, size);
863866
}
864-
865-
out:
867+
unlock:
866868
zfs_range_unlock(zvr->rl);
869+
if (error == 0 && sync)
870+
zil_commit(zv->zv_zilog, ZVOL_OBJ);
871+
867872
rw_exit(&zv->zv_suspend_lock);
868873
generic_end_io_acct(WRITE, &zv->zv_disk->part0, start_jif);
869874
BIO_END_IO(bio, -error);
@@ -933,6 +938,8 @@ zvol_request(struct request_queue *q, struct bio *bio)
933938
}
934939

935940
if (rw == WRITE) {
941+
boolean_t need_sync = B_FALSE;
942+
936943
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
937944
BIO_END_IO(bio, -SET_ERROR(EROFS));
938945
goto out;
@@ -966,13 +973,24 @@ zvol_request(struct request_queue *q, struct bio *bio)
966973
*/
967974
zvr->rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
968975
RL_WRITER);
976+
/*
977+
* Sync writes and discards execute zil_commit() which may need
978+
* to take a RL_READER lock on the whole block being modified
979+
* via its zillog->zl_get_data(): to avoid circular dependency
980+
* issues with taskq threads execute these requests
981+
* synchronously here in zvol_request().
982+
*/
983+
need_sync = bio_is_fua(bio) ||
984+
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
969985
if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
970-
if (zvol_request_sync || taskq_dispatch(zvol_taskq,
971-
zvol_discard, zvr, TQ_SLEEP) == TASKQID_INVALID)
986+
if (zvol_request_sync || need_sync ||
987+
taskq_dispatch(zvol_taskq, zvol_discard, zvr,
988+
TQ_SLEEP) == TASKQID_INVALID)
972989
zvol_discard(zvr);
973990
} else {
974-
if (zvol_request_sync || taskq_dispatch(zvol_taskq,
975-
zvol_write, zvr, TQ_SLEEP) == TASKQID_INVALID)
991+
if (zvol_request_sync || need_sync ||
992+
taskq_dispatch(zvol_taskq, zvol_write, zvr,
993+
TQ_SLEEP) == TASKQID_INVALID)
976994
zvol_write(zvr);
977995
}
978996
} else {
@@ -1030,8 +1048,6 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
10301048

10311049
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
10321050
zgd->zgd_zilog = zv->zv_zilog;
1033-
zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
1034-
RL_READER);
10351051

10361052
/*
10371053
* Write records come in two flavors: immediate and indirect.
@@ -1041,11 +1057,21 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
10411057
* we don't have to write the data twice.
10421058
*/
10431059
if (buf != NULL) { /* immediate write */
1060+
zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
1061+
RL_READER);
10441062
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
10451063
DMU_READ_NO_PREFETCH);
1046-
} else {
1064+
} else { /* indirect write */
1065+
/*
1066+
* Have to lock the whole block to ensure when it's written out
1067+
* and its checksum is being calculated that no one can change
1068+
* the data. Contrarily to zfs_get_data we need not re-check
1069+
* blocksize after we get the lock because it cannot be changed.
1070+
*/
10471071
size = zv->zv_volblocksize;
10481072
offset = P2ALIGN_TYPED(offset, size, uint64_t);
1073+
zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
1074+
RL_READER);
10491075
error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
10501076
DMU_READ_NO_PREFETCH);
10511077
if (error == 0) {

tests/runfiles/linux.run

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ tests = ['zvol_cli_001_pos', 'zvol_cli_002_pos', 'zvol_cli_003_neg']
594594
[tests/functional/zvol/zvol_misc]
595595
tests = ['zvol_misc_001_neg', 'zvol_misc_002_pos', 'zvol_misc_003_neg',
596596
'zvol_misc_004_pos', 'zvol_misc_005_neg', 'zvol_misc_006_pos',
597-
'zvol_misc_snapdev', 'zvol_misc_volmode']
597+
'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
598598

599599
[tests/functional/zvol/zvol_swap]
600600
tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_003_pos',

tests/zfs-tests/tests/functional/zvol/zvol_misc/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ dist_pkgdata_SCRIPTS = \
1010
zvol_misc_005_neg.ksh \
1111
zvol_misc_006_pos.ksh \
1212
zvol_misc_snapdev.ksh \
13-
zvol_misc_volmode.ksh
13+
zvol_misc_volmode.ksh \
14+
zvol_misc_zil.ksh
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/ksh -p
2+
#
3+
# CDDL HEADER START
4+
#
5+
# The contents of this file are subject to the terms of the
6+
# Common Development and Distribution License (the "License").
7+
# You may not use this file except in compliance with the License.
8+
#
9+
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10+
# or http://www.opensolaris.org/os/licensing.
11+
# See the License for the specific language governing permissions
12+
# and limitations under the License.
13+
#
14+
# When distributing Covered Code, include this CDDL HEADER in each
15+
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16+
# If applicable, add the following below this CDDL HEADER, with the
17+
# fields enclosed by brackets "[]" replaced with your own identifying
18+
# information: Portions Copyright [yyyy] [name of copyright owner]
19+
#
20+
# CDDL HEADER END
21+
#
22+
23+
#
24+
# Copyright 2017, loli10K <[email protected]>. All rights reserved.
25+
#
26+
27+
. $STF_SUITE/include/libtest.shlib
28+
. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
29+
. $STF_SUITE/tests/functional/zvol/zvol_misc/zvol_misc_common.kshlib
30+
31+
#
32+
# DESCRIPTION:
33+
# Verify ZIL functionality on ZVOLs
34+
#
35+
# STRATEGY:
36+
# 1. Create a ZVOLs with various combination of "logbias" and "sync" values
37+
# 2. Write data to ZVOL device node
38+
# 3. Verify we don't trigger any issue like the one reported in #6238
39+
#
40+
41+
verify_runnable "global"
42+
43+
function cleanup
44+
{
45+
datasetexists $ZVOL && log_must_busy zfs destroy $ZVOL
46+
udev_wait
47+
}
48+
49+
log_assert "Verify ZIL functionality on ZVOLs"
50+
log_onexit cleanup
51+
52+
ZVOL="$TESTPOOL/vol"
53+
ZDEV="$ZVOL_DEVDIR/$ZVOL"
54+
typeset -a logbias_prop_vals=('latency' 'throughput')
55+
typeset -a sync_prop_vals=('standard' 'always' 'disabled')
56+
57+
for logbias in ${logbias_prop_vals[@]}; do
58+
for sync in ${sync_prop_vals[@]}; do
59+
# 1. Create a ZVOL with logbias=throughput and sync=always
60+
log_must zfs create -V $VOLSIZE -b 128K -o sync=$sync \
61+
-o logbias=$logbias $ZVOL
62+
63+
# 2. Write data to its device node
64+
for i in {1..50}; do
65+
dd if=/dev/zero of=$ZDEV bs=8k count=1 &
66+
done
67+
68+
# 3. Verify we don't trigger any issue
69+
log_must wait
70+
log_must_busy zfs destroy $ZVOL
71+
done
72+
done
73+
74+
log_pass "ZIL functionality works on ZVOLs"

0 commit comments

Comments
 (0)