Skip to content

Commit 7f9d9e6

Browse files
authored
Avoid vq_lock drop in vdev_queue_aggregate()
vq_lock is already too congested for two more operations per I/O. Instead of dropping and reacquiring it inside vdev_queue_aggregate() delegate the zio_vdev_io_bypass() and zio_execute() calls for parent I/Os to callers, that drop the lock any way to execute the new I/O. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Mark Maybee <[email protected]> Reviewed-by: Brian Atkinson <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored-By: iXsystems, Inc. Closes #12297
1 parent e829a86 commit 7f9d9e6

File tree

1 file changed

+34
-29
lines changed

1 file changed

+34
-29
lines changed

module/zfs/vdev_queue.c

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,6 @@ static zio_t *
599599
vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
600600
{
601601
zio_t *first, *last, *aio, *dio, *mandatory, *nio;
602-
zio_link_t *zl = NULL;
603602
uint64_t maxgap = 0;
604603
uint64_t size;
605604
uint64_t limit;
@@ -797,19 +796,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
797796
ASSERT3U(abd_get_size(aio->io_abd), ==, aio->io_size);
798797

799798
/*
800-
* We need to drop the vdev queue's lock during zio_execute() to
801-
* avoid a deadlock that we could encounter due to lock order
802-
* reversal between vq_lock and io_lock in zio_change_priority().
799+
* Callers must call zio_vdev_io_bypass() and zio_execute() for
800+
* aggregated (parent) I/Os so that we could avoid dropping the
801+
* queue's lock here to avoid a deadlock that we could encounter
802+
* due to lock order reversal between vq_lock and io_lock in
803+
* zio_change_priority().
803804
*/
804-
mutex_exit(&vq->vq_lock);
805-
while ((dio = zio_walk_parents(aio, &zl)) != NULL) {
806-
ASSERT3U(dio->io_type, ==, aio->io_type);
807-
808-
zio_vdev_io_bypass(dio);
809-
zio_execute(dio);
810-
}
811-
mutex_enter(&vq->vq_lock);
812-
813805
return (aio);
814806
}
815807

@@ -847,23 +839,24 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
847839
ASSERT3U(zio->io_priority, ==, p);
848840

849841
aio = vdev_queue_aggregate(vq, zio);
850-
if (aio != NULL)
842+
if (aio != NULL) {
851843
zio = aio;
852-
else
844+
} else {
853845
vdev_queue_io_remove(vq, zio);
854846

855-
/*
856-
* If the I/O is or was optional and therefore has no data, we need to
857-
* simply discard it. We need to drop the vdev queue's lock to avoid a
858-
* deadlock that we could encounter since this I/O will complete
859-
* immediately.
860-
*/
861-
if (zio->io_flags & ZIO_FLAG_NODATA) {
862-
mutex_exit(&vq->vq_lock);
863-
zio_vdev_io_bypass(zio);
864-
zio_execute(zio);
865-
mutex_enter(&vq->vq_lock);
866-
goto again;
847+
/*
848+
* If the I/O is or was optional and therefore has no data, we
849+
* need to simply discard it. We need to drop the vdev queue's
850+
* lock to avoid a deadlock that we could encounter since this
851+
* I/O will complete immediately.
852+
*/
853+
if (zio->io_flags & ZIO_FLAG_NODATA) {
854+
mutex_exit(&vq->vq_lock);
855+
zio_vdev_io_bypass(zio);
856+
zio_execute(zio);
857+
mutex_enter(&vq->vq_lock);
858+
goto again;
859+
}
867860
}
868861

869862
vdev_queue_pending_add(vq, zio);
@@ -876,7 +869,8 @@ zio_t *
876869
vdev_queue_io(zio_t *zio)
877870
{
878871
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
879-
zio_t *nio;
872+
zio_t *dio, *nio;
873+
zio_link_t *zl = NULL;
880874

881875
if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
882876
return (zio);
@@ -923,6 +917,11 @@ vdev_queue_io(zio_t *zio)
923917
return (NULL);
924918

925919
if (nio->io_done == vdev_queue_agg_io_done) {
920+
while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
921+
ASSERT3U(dio->io_type, ==, nio->io_type);
922+
zio_vdev_io_bypass(dio);
923+
zio_execute(dio);
924+
}
926925
zio_nowait(nio);
927926
return (NULL);
928927
}
@@ -934,7 +933,8 @@ void
934933
vdev_queue_io_done(zio_t *zio)
935934
{
936935
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
937-
zio_t *nio;
936+
zio_t *dio, *nio;
937+
zio_link_t *zl = NULL;
938938

939939
hrtime_t now = gethrtime();
940940
vq->vq_io_complete_ts = now;
@@ -946,6 +946,11 @@ vdev_queue_io_done(zio_t *zio)
946946
while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
947947
mutex_exit(&vq->vq_lock);
948948
if (nio->io_done == vdev_queue_agg_io_done) {
949+
while ((dio = zio_walk_parents(nio, &zl)) != NULL) {
950+
ASSERT3U(dio->io_type, ==, nio->io_type);
951+
zio_vdev_io_bypass(dio);
952+
zio_execute(dio);
953+
}
949954
zio_nowait(nio);
950955
} else {
951956
zio_vdev_io_reissue(nio);

0 commit comments

Comments
 (0)