Skip to content

Commit f4dd502

Browse files
committed
FreeBSD: Use unmapped I/O for scattered/gang ABD buffers.
Many FreeBSD disk drivers support "unmapped" I/O mode, when data buffer represented not with a virtually contiguous KVA-mapped address range, but with a list of physical memory pages. Originally it was designed to do I/O from buffers without KVA mapping (unmapped). But moving virtual addresses out of equation allows us to operate even non-contiguous data buffers with one condition: all buffer discon- tinuities must be aligned to memory page borders. Doing I/O to capable GEOM device this patch traverses through non- linear ABD buffers, validating the chunks borders. If the condition is met, it supplues GEOM with the list of original physical memory pages instead of copying the data into temporary contiguous buffer. On capable hardware on pools with ashift=12 and default ABD chunk of 4KB it should handle all the I/O without additional memory copying. Signed-off-by: Alexander Motin <[email protected]> Sponsored-By: iXsystems, Inc.
1 parent f24c7c3 commit f4dd502

File tree

1 file changed

+113
-10
lines changed

1 file changed

+113
-10
lines changed

module/os/freebsd/zfs/vdev_geom.c

Lines changed: 113 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,15 @@
2929
#include <sys/param.h>
3030
#include <sys/kernel.h>
3131
#include <sys/bio.h>
32+
#include <sys/buf.h>
3233
#include <sys/file.h>
3334
#include <sys/spa.h>
3435
#include <sys/spa_impl.h>
3536
#include <sys/vdev_impl.h>
3637
#include <sys/vdev_os.h>
3738
#include <sys/fs/zfs.h>
3839
#include <sys/zio.h>
40+
#include <vm/vm_page.h>
3941
#include <geom/geom.h>
4042
#include <geom/geom_disk.h>
4143
#include <geom/geom_int.h>
@@ -1059,6 +1061,80 @@ vdev_geom_io_intr(struct bio *bp)
10591061
zio_delay_interrupt(zio);
10601062
}
10611063

1064+
struct vdev_geom_check_unmapped_cb_state {
1065+
int pages;
1066+
uint_t end;
1067+
};
1068+
1069+
/*
1070+
* Callback to check the ABD segment size/alignment and count the pages.
1071+
* GEOM requires data buffer to look virtually contiguous. It means only
1072+
* the first page of the buffer may not start and only the last may not
1073+
* end on a page boundary. All other physical pages must be full.
1074+
*/
1075+
static int
1076+
vdev_geom_check_unmapped_cb(void *buf, size_t len, void *priv)
1077+
{
1078+
struct vdev_geom_check_unmapped_cb_state *s = priv;
1079+
vm_offset_t off = (vm_offset_t)buf & PAGE_MASK;
1080+
1081+
if (s->pages != 0 && off != 0)
1082+
return (1);
1083+
if (s->end != 0)
1084+
return (1);
1085+
s->end = (off + len) & PAGE_MASK;
1086+
s->pages += (off + len + PAGE_MASK) >> PAGE_SHIFT;
1087+
return (0);
1088+
}
1089+
1090+
/*
1091+
* Check whether we can use unmapped I/O for this ZIO on this device to
1092+
* avoid data copying between scattered and/or gang ABD buffer and linear.
1093+
*/
1094+
static int
1095+
vdev_geom_check_unmapped(zio_t *zio, struct g_consumer *cp)
1096+
{
1097+
struct vdev_geom_check_unmapped_cb_state s;
1098+
1099+
/* If the buffer is already linear, then nothing to do here. */
1100+
if (abd_is_linear(zio->io_abd))
1101+
return (0);
1102+
1103+
/*
1104+
* If unmapped I/O is not supported by the GEOM provider,
1105+
* then we can't do anything and have to copy the data.
1106+
*/
1107+
if ((cp->provider->flags & G_PF_ACCEPT_UNMAPPED) == 0)
1108+
return (0);
1109+
1110+
/* Check the buffer chunks sizes/alignments and count pages. */
1111+
s.pages = s.end = 0;
1112+
if (abd_iterate_func(zio->io_abd, 0, zio->io_size,
1113+
vdev_geom_check_unmapped_cb, &s))
1114+
return (0);
1115+
return (s.pages);
1116+
}
1117+
1118+
/*
1119+
* Callback to translate the ABD segment into array of physical pages.
1120+
*/
1121+
static int
1122+
vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv)
1123+
{
1124+
struct bio *bp = priv;
1125+
vm_offset_t addr = (vm_offset_t)buf;
1126+
vm_offset_t end = addr + len;
1127+
1128+
if (bp->bio_ma_n == 0)
1129+
bp->bio_ma_offset = addr & PAGE_MASK;
1130+
do {
1131+
bp->bio_ma[bp->bio_ma_n++] =
1132+
PHYS_TO_VM_PAGE(pmap_kextract(addr));
1133+
addr += PAGE_SIZE;
1134+
} while (addr < end);
1135+
return (0);
1136+
}
1137+
10621138
static void
10631139
vdev_geom_io_start(zio_t *zio)
10641140
{
@@ -1123,14 +1199,34 @@ vdev_geom_io_start(zio_t *zio)
11231199
zio->io_target_timestamp = zio_handle_io_delay(zio);
11241200
bp->bio_offset = zio->io_offset;
11251201
bp->bio_length = zio->io_size;
1126-
if (zio->io_type == ZIO_TYPE_READ) {
1202+
if (zio->io_type == ZIO_TYPE_READ)
11271203
bp->bio_cmd = BIO_READ;
1128-
bp->bio_data =
1129-
abd_borrow_buf(zio->io_abd, zio->io_size);
1130-
} else {
1204+
else
11311205
bp->bio_cmd = BIO_WRITE;
1132-
bp->bio_data =
1133-
abd_borrow_buf_copy(zio->io_abd, zio->io_size);
1206+
1207+
/*
1208+
* If possible, represent scattered and/or gang ABD buffer to
1209+
* GEOM as an array of physical pages. It allows to satisfy
1210+
* requirement of virtually contiguous buffer without copying.
1211+
*/
1212+
int pgs = vdev_geom_check_unmapped(zio, cp);
1213+
if (pgs > 0) {
1214+
bp->bio_ma = malloc(sizeof (struct vm_page *) * pgs,
1215+
M_DEVBUF, M_WAITOK);
1216+
bp->bio_ma_n = 0;
1217+
bp->bio_ma_offset = 0;
1218+
abd_iterate_func(zio->io_abd, 0, zio->io_size,
1219+
vdev_geom_fill_unmap_cb, bp);
1220+
bp->bio_data = unmapped_buf;
1221+
bp->bio_flags |= BIO_UNMAPPED;
1222+
} else {
1223+
if (zio->io_type == ZIO_TYPE_READ) {
1224+
bp->bio_data = abd_borrow_buf(zio->io_abd,
1225+
zio->io_size);
1226+
} else {
1227+
bp->bio_data = abd_borrow_buf_copy(zio->io_abd,
1228+
zio->io_size);
1229+
}
11341230
}
11351231
break;
11361232
case ZIO_TYPE_TRIM:
@@ -1169,10 +1265,17 @@ vdev_geom_io_done(zio_t *zio)
11691265
return;
11701266
}
11711267

1172-
if (zio->io_type == ZIO_TYPE_READ)
1173-
abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size);
1174-
else
1175-
abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size);
1268+
if (bp->bio_ma != NULL) {
1269+
free(bp->bio_ma, M_DEVBUF);
1270+
} else {
1271+
if (zio->io_type == ZIO_TYPE_READ) {
1272+
abd_return_buf_copy(zio->io_abd, bp->bio_data,
1273+
zio->io_size);
1274+
} else {
1275+
abd_return_buf(zio->io_abd, bp->bio_data,
1276+
zio->io_size);
1277+
}
1278+
}
11761279

11771280
g_destroy_bio(bp);
11781281
zio->io_bio = NULL;

0 commit comments

Comments
 (0)