|
29 | 29 | #include <sys/param.h>
|
30 | 30 | #include <sys/kernel.h>
|
31 | 31 | #include <sys/bio.h>
|
| 32 | +#include <sys/buf.h> |
32 | 33 | #include <sys/file.h>
|
33 | 34 | #include <sys/spa.h>
|
34 | 35 | #include <sys/spa_impl.h>
|
35 | 36 | #include <sys/vdev_impl.h>
|
36 | 37 | #include <sys/vdev_os.h>
|
37 | 38 | #include <sys/fs/zfs.h>
|
38 | 39 | #include <sys/zio.h>
|
| 40 | +#include <vm/vm_page.h> |
39 | 41 | #include <geom/geom.h>
|
40 | 42 | #include <geom/geom_disk.h>
|
41 | 43 | #include <geom/geom_int.h>
|
@@ -1059,6 +1061,80 @@ vdev_geom_io_intr(struct bio *bp)
|
1059 | 1061 | zio_delay_interrupt(zio);
|
1060 | 1062 | }
|
1061 | 1063 |
|
| 1064 | +struct vdev_geom_check_unmapped_cb_state { |
| 1065 | + int pages; |
| 1066 | + uint_t end; |
| 1067 | +}; |
| 1068 | + |
| 1069 | +/* |
| 1070 | + * Callback to check the ABD segment size/alignment and count the pages. |
| 1071 | + * GEOM requires data buffer to look virtually contiguous. It means only |
| 1072 | + * the first page of the buffer may not start and only the last may not |
| 1073 | + * end on a page boundary. All other physical pages must be full. |
| 1074 | + */ |
| 1075 | +static int |
| 1076 | +vdev_geom_check_unmapped_cb(void *buf, size_t len, void *priv) |
| 1077 | +{ |
| 1078 | + struct vdev_geom_check_unmapped_cb_state *s = priv; |
| 1079 | + vm_offset_t off = (vm_offset_t)buf & PAGE_MASK; |
| 1080 | + |
| 1081 | + if (s->pages != 0 && off != 0) |
| 1082 | + return (1); |
| 1083 | + if (s->end != 0) |
| 1084 | + return (1); |
| 1085 | + s->end = (off + len) & PAGE_MASK; |
| 1086 | + s->pages += (off + len + PAGE_MASK) >> PAGE_SHIFT; |
| 1087 | + return (0); |
| 1088 | +} |
| 1089 | + |
| 1090 | +/* |
| 1091 | + * Check whether we can use unmapped I/O for this ZIO on this device to |
| 1092 | + * avoid data copying between scattered and/or gang ABD buffer and linear. |
| 1093 | + */ |
| 1094 | +static int |
| 1095 | +vdev_geom_check_unmapped(zio_t *zio, struct g_consumer *cp) |
| 1096 | +{ |
| 1097 | + struct vdev_geom_check_unmapped_cb_state s; |
| 1098 | + |
| 1099 | + /* If the buffer is already linear, then nothing to do here. */ |
| 1100 | + if (abd_is_linear(zio->io_abd)) |
| 1101 | + return (0); |
| 1102 | + |
| 1103 | + /* |
| 1104 | + * If unmapped I/O is not supported by the GEOM provider, |
| 1105 | + * then we can't do anything and have to copy the data. |
| 1106 | + */ |
| 1107 | + if ((cp->provider->flags & G_PF_ACCEPT_UNMAPPED) == 0) |
| 1108 | + return (0); |
| 1109 | + |
| 1110 | + /* Check the buffer chunks sizes/alignments and count pages. */ |
| 1111 | + s.pages = s.end = 0; |
| 1112 | + if (abd_iterate_func(zio->io_abd, 0, zio->io_size, |
| 1113 | + vdev_geom_check_unmapped_cb, &s)) |
| 1114 | + return (0); |
| 1115 | + return (s.pages); |
| 1116 | +} |
| 1117 | + |
| 1118 | +/* |
| 1119 | + * Callback to translate the ABD segment into array of physical pages. |
| 1120 | + */ |
| 1121 | +static int |
| 1122 | +vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv) |
| 1123 | +{ |
| 1124 | + struct bio *bp = priv; |
| 1125 | + vm_offset_t addr = (vm_offset_t)buf; |
| 1126 | + vm_offset_t end = addr + len; |
| 1127 | + |
| 1128 | + if (bp->bio_ma_n == 0) |
| 1129 | + bp->bio_ma_offset = addr & PAGE_MASK; |
| 1130 | + do { |
| 1131 | + bp->bio_ma[bp->bio_ma_n++] = |
| 1132 | + PHYS_TO_VM_PAGE(pmap_kextract(addr)); |
| 1133 | + addr += PAGE_SIZE; |
| 1134 | + } while (addr < end); |
| 1135 | + return (0); |
| 1136 | +} |
| 1137 | + |
1062 | 1138 | static void
|
1063 | 1139 | vdev_geom_io_start(zio_t *zio)
|
1064 | 1140 | {
|
@@ -1123,14 +1199,34 @@ vdev_geom_io_start(zio_t *zio)
|
1123 | 1199 | zio->io_target_timestamp = zio_handle_io_delay(zio);
|
1124 | 1200 | bp->bio_offset = zio->io_offset;
|
1125 | 1201 | bp->bio_length = zio->io_size;
|
1126 |
| - if (zio->io_type == ZIO_TYPE_READ) { |
| 1202 | + if (zio->io_type == ZIO_TYPE_READ) |
1127 | 1203 | bp->bio_cmd = BIO_READ;
|
1128 |
| - bp->bio_data = |
1129 |
| - abd_borrow_buf(zio->io_abd, zio->io_size); |
1130 |
| - } else { |
| 1204 | + else |
1131 | 1205 | bp->bio_cmd = BIO_WRITE;
|
1132 |
| - bp->bio_data = |
1133 |
| - abd_borrow_buf_copy(zio->io_abd, zio->io_size); |
| 1206 | + |
| 1207 | + /* |
| 1208 | + * If possible, represent scattered and/or gang ABD buffer to |
| 1209 | + * GEOM as an array of physical pages. It allows to satisfy |
| 1210 | + * requirement of virtually contiguous buffer without copying. |
| 1211 | + */ |
| 1212 | + int pgs = vdev_geom_check_unmapped(zio, cp); |
| 1213 | + if (pgs > 0) { |
| 1214 | + bp->bio_ma = malloc(sizeof (struct vm_page *) * pgs, |
| 1215 | + M_DEVBUF, M_WAITOK); |
| 1216 | + bp->bio_ma_n = 0; |
| 1217 | + bp->bio_ma_offset = 0; |
| 1218 | + abd_iterate_func(zio->io_abd, 0, zio->io_size, |
| 1219 | + vdev_geom_fill_unmap_cb, bp); |
| 1220 | + bp->bio_data = unmapped_buf; |
| 1221 | + bp->bio_flags |= BIO_UNMAPPED; |
| 1222 | + } else { |
| 1223 | + if (zio->io_type == ZIO_TYPE_READ) { |
| 1224 | + bp->bio_data = abd_borrow_buf(zio->io_abd, |
| 1225 | + zio->io_size); |
| 1226 | + } else { |
| 1227 | + bp->bio_data = abd_borrow_buf_copy(zio->io_abd, |
| 1228 | + zio->io_size); |
| 1229 | + } |
1134 | 1230 | }
|
1135 | 1231 | break;
|
1136 | 1232 | case ZIO_TYPE_TRIM:
|
@@ -1169,10 +1265,17 @@ vdev_geom_io_done(zio_t *zio)
|
1169 | 1265 | return;
|
1170 | 1266 | }
|
1171 | 1267 |
|
1172 |
| - if (zio->io_type == ZIO_TYPE_READ) |
1173 |
| - abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size); |
1174 |
| - else |
1175 |
| - abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size); |
| 1268 | + if (bp->bio_ma != NULL) { |
| 1269 | + free(bp->bio_ma, M_DEVBUF); |
| 1270 | + } else { |
| 1271 | + if (zio->io_type == ZIO_TYPE_READ) { |
| 1272 | + abd_return_buf_copy(zio->io_abd, bp->bio_data, |
| 1273 | + zio->io_size); |
| 1274 | + } else { |
| 1275 | + abd_return_buf(zio->io_abd, bp->bio_data, |
| 1276 | + zio->io_size); |
| 1277 | + } |
| 1278 | + } |
1176 | 1279 |
|
1177 | 1280 | g_destroy_bio(bp);
|
1178 | 1281 | zio->io_bio = NULL;
|
|
0 commit comments