Skip to content

Commit e040843

Browse files
committed
allow callers to allocate and provide the abd_t struct
The `abd_get_offset_*()` routines create an abd_t that references another abd_t, and doesn't allocate any pages/buffers of its own. In some workloads, these routines may be called frequently, to create many abd_t's representing small pieces of a single large abd_t. In particular, the upcoming RAIDZ Expansion project makes heavy use of these routines. This commit adds the ability for the caller to allocate and provide the abd_t struct to a variant of `abd_get_offset_*()`. This eliminates the cost of allocating the abd_t and performing the accounting associated with it (`abdstat_struct_size`). The RAIDZ/DRAID code uses this for the `rc_abd`, which references the zio's abd. The upcoming RAIDZ Expansion project will leverage this infrastructure to increase performance of reads post-expansion by around 50%. Additionally, some of the interfaces around creating and destroying abd_t's are cleaned up. Most significantly, the distinction between `abd_put()` and `abd_free()` is eliminated; all types of abd_t's are now disposed of with `abd_free()`. Signed-off-by: Matthew Ahrens <[email protected]> Requires-builders: arch,centos7,centos8,centosstream8,debian10,fedora33,ubuntu18,ubuntu20,builtin,freebsd12,freebsd13,coverage
1 parent a9eaae0 commit e040843

File tree

16 files changed

+259
-293
lines changed

16 files changed

+259
-293
lines changed

cmd/raidz_test/raidz_test.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,9 @@ vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
492492
(dc - r) * (rows - 1) + row;
493493
}
494494
rr->rr_col[c].rc_size = 1ULL << ashift;
495-
rr->rr_col[c].rc_abd =
496-
abd_get_offset(abd, off << ashift);
495+
rr->rr_col[c].rc_abd = abd_get_offset_struct(
496+
&rr->rr_col[c].rc_abdstruct,
497+
abd, off << ashift, 1 << ashift);
497498
}
498499

499500
asize += rr->rr_col[c].rc_size;

include/sys/abd.h

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,46 @@
3535
extern "C" {
3636
#endif
3737

38-
struct abd; /* forward declaration */
39-
typedef struct abd abd_t;
38+
typedef enum abd_flags {
39+
ABD_FLAG_LINEAR = 1 << 0, /* is buffer linear (or scattered)? */
40+
ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */
41+
ABD_FLAG_META = 1 << 2, /* does this represent FS metadata? */
42+
ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */
43+
ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */
44+
ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */
45+
ABD_FLAG_GANG = 1 << 6, /* mult ABDs chained together */
46+
ABD_FLAG_GANG_FREE = 1 << 7, /* gang ABD is responsible for mem */
47+
ABD_FLAG_ZEROS = 1 << 8, /* ABD for zero-filled buffer */
48+
ABD_FLAG_ALLOCD = 1 << 9, /* we allocated the abd_t */
49+
} abd_flags_t;
50+
51+
typedef struct abd {
52+
abd_flags_t abd_flags;
53+
uint_t abd_size; /* excludes scattered abd_offset */
54+
list_node_t abd_gang_link;
55+
struct abd *abd_parent;
56+
zfs_refcount_t abd_children;
57+
kmutex_t abd_mtx;
58+
union {
59+
struct abd_scatter {
60+
uint_t abd_offset;
61+
#if defined(__FreeBSD__) && defined(_KERNEL)
62+
uint_t abd_chunk_size;
63+
void *abd_chunks[1]; /* actually variable-length */
64+
#else
65+
uint_t abd_nents;
66+
struct scatterlist *abd_sgl;
67+
#endif
68+
} abd_scatter;
69+
struct abd_linear {
70+
void *abd_buf;
71+
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
72+
} abd_linear;
73+
struct abd_gang {
74+
list_t abd_gang_chain;
75+
} abd_gang;
76+
} abd_u;
77+
} abd_t;
4078

4179
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
4280
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
@@ -49,14 +87,14 @@ extern int zfs_abd_scatter_enabled;
4987

5088
abd_t *abd_alloc(size_t, boolean_t);
5189
abd_t *abd_alloc_linear(size_t, boolean_t);
52-
abd_t *abd_alloc_gang_abd(void);
90+
abd_t *abd_alloc_gang(void);
5391
abd_t *abd_alloc_for_io(size_t, boolean_t);
5492
abd_t *abd_alloc_sametype(abd_t *, size_t);
5593
void abd_gang_add(abd_t *, abd_t *, boolean_t);
5694
void abd_free(abd_t *);
57-
void abd_put(abd_t *);
5895
abd_t *abd_get_offset(abd_t *, size_t);
5996
abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
97+
abd_t *abd_get_offset_struct(abd_t *, abd_t *, size_t, size_t);
6098
abd_t *abd_get_zeros(size_t);
6199
abd_t *abd_get_from_buf(void *, size_t);
62100
void abd_cache_reap_now(void);
@@ -87,7 +125,6 @@ int abd_cmp(abd_t *, abd_t *);
87125
int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
88126
void abd_zero_off(abd_t *, size_t, size_t);
89127
void abd_verify(abd_t *);
90-
uint_t abd_get_size(abd_t *);
91128

92129
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
93130
ssize_t csize, ssize_t dsize, const unsigned parity,
@@ -135,9 +172,29 @@ abd_zero(abd_t *abd, size_t size)
135172
/*
136173
* ABD type check functions
137174
*/
138-
boolean_t abd_is_linear(abd_t *);
139-
boolean_t abd_is_gang(abd_t *);
140-
boolean_t abd_is_linear_page(abd_t *);
175+
static inline boolean_t
176+
abd_is_linear(abd_t *abd)
177+
{
178+
return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0);
179+
}
180+
181+
static inline boolean_t
182+
abd_is_linear_page(abd_t *abd)
183+
{
184+
return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0);
185+
}
186+
187+
static inline boolean_t
188+
abd_is_gang(abd_t *abd)
189+
{
190+
return ((abd->abd_flags & ABD_FLAG_GANG) != 0);
191+
}
192+
193+
static inline uint_t
194+
abd_get_size(abd_t *abd)
195+
{
196+
return (abd->abd_size);
197+
}
141198

142199
/*
143200
* Module lifecycle

include/sys/abd_impl.h

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,51 +32,11 @@
3232
extern "C" {
3333
#endif
3434

35-
typedef enum abd_flags {
36-
ABD_FLAG_LINEAR = 1 << 0, /* is buffer linear (or scattered)? */
37-
ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */
38-
ABD_FLAG_META = 1 << 2, /* does this represent FS metadata? */
39-
ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */
40-
ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */
41-
ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */
42-
ABD_FLAG_GANG = 1 << 6, /* mult ABDs chained together */
43-
ABD_FLAG_GANG_FREE = 1 << 7, /* gang ABD is responsible for mem */
44-
ABD_FLAG_ZEROS = 1 << 8, /* ABD for zero-filled buffer */
45-
} abd_flags_t;
46-
4735
typedef enum abd_stats_op {
4836
ABDSTAT_INCR, /* Increase abdstat values */
4937
ABDSTAT_DECR /* Decrease abdstat values */
5038
} abd_stats_op_t;
5139

52-
struct abd {
53-
abd_flags_t abd_flags;
54-
uint_t abd_size; /* excludes scattered abd_offset */
55-
list_node_t abd_gang_link;
56-
struct abd *abd_parent;
57-
zfs_refcount_t abd_children;
58-
kmutex_t abd_mtx;
59-
union {
60-
struct abd_scatter {
61-
uint_t abd_offset;
62-
#if defined(__FreeBSD__) && defined(_KERNEL)
63-
uint_t abd_chunk_size;
64-
void *abd_chunks[];
65-
#else
66-
uint_t abd_nents;
67-
struct scatterlist *abd_sgl;
68-
#endif
69-
} abd_scatter;
70-
struct abd_linear {
71-
void *abd_buf;
72-
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
73-
} abd_linear;
74-
struct abd_gang {
75-
list_t abd_gang_chain;
76-
} abd_gang;
77-
} abd_u;
78-
};
79-
8040
struct scatterlist; /* forward declaration */
8141

8242
struct abd_iter {
@@ -95,14 +55,16 @@ struct abd_iter {
9555
extern abd_t *abd_zero_scatter;
9656

9757
abd_t *abd_gang_get_offset(abd_t *, size_t *);
58+
abd_t *abd_alloc_struct(size_t);
59+
void abd_free_struct(abd_t *);
9860

9961
/*
10062
* OS specific functions
10163
*/
10264

103-
abd_t *abd_alloc_struct(size_t);
104-
abd_t *abd_get_offset_scatter(abd_t *, size_t);
105-
void abd_free_struct(abd_t *);
65+
abd_t *abd_alloc_struct_impl(size_t);
66+
abd_t *abd_get_offset_scatter(abd_t *, abd_t *, size_t);
67+
void abd_free_struct_impl(abd_t *);
10668
void abd_alloc_chunks(abd_t *, size_t);
10769
void abd_free_chunks(abd_t *);
10870
boolean_t abd_size_alloc_linear(size_t);

include/sys/vdev_raidz_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ typedef struct raidz_col {
106106
uint64_t rc_devidx; /* child device index for I/O */
107107
uint64_t rc_offset; /* device offset */
108108
uint64_t rc_size; /* I/O size */
109+
abd_t rc_abdstruct; /* rc_abd probably points here */
109110
abd_t *rc_abd; /* I/O data */
110111
void *rc_orig_data; /* pre-reconstruction */
111112
abd_t *rc_gdata; /* used to store the "good" version */

module/os/freebsd/zfs/abd_os.c

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ abd_free_chunks(abd_t *abd)
202202
}
203203

204204
abd_t *
205-
abd_alloc_struct(size_t size)
205+
abd_alloc_struct_impl(size_t size)
206206
{
207207
uint_t chunkcnt = abd_chunkcnt_for_bytes(size);
208208
/*
@@ -216,22 +216,18 @@ abd_alloc_struct(size_t size)
216216
offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
217217
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
218218
ASSERT3P(abd, !=, NULL);
219-
list_link_init(&abd->abd_gang_link);
220-
mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
221219
ABDSTAT_INCR(abdstat_struct_size, abd_size);
222220

223221
return (abd);
224222
}
225223

226224
void
227-
abd_free_struct(abd_t *abd)
225+
abd_free_struct_impl(abd_t *abd)
228226
{
229227
uint_t chunkcnt = abd_is_linear(abd) || abd_is_gang(abd) ? 0 :
230228
abd_scatter_chunkcnt(abd);
231229
ssize_t size = MAX(sizeof (abd_t),
232230
offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]));
233-
mutex_destroy(&abd->abd_mtx);
234-
ASSERT(!list_link_active(&abd->abd_gang_link));
235231
kmem_free(abd, size);
236232
ABDSTAT_INCR(abdstat_struct_size, -size);
237233
}
@@ -249,10 +245,8 @@ abd_alloc_zero_scatter(void)
249245
abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP);
250246
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
251247

252-
abd_zero_scatter->abd_flags = ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
248+
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
253249
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
254-
abd_zero_scatter->abd_parent = NULL;
255-
zfs_refcount_create(&abd_zero_scatter->abd_children);
256250

257251
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
258252
ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
@@ -270,7 +264,6 @@ abd_alloc_zero_scatter(void)
270264
static void
271265
abd_free_zero_scatter(void)
272266
{
273-
zfs_refcount_destroy(&abd_zero_scatter->abd_children);
274267
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
275268
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size);
276269

@@ -355,25 +348,33 @@ abd_alloc_scatter_offset_chunkcnt(size_t chunkcnt)
355348
}
356349

357350
abd_t *
358-
abd_get_offset_scatter(abd_t *sabd, size_t off)
351+
abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
359352
{
360-
abd_t *abd = NULL;
361-
362353
abd_verify(sabd);
363354
ASSERT3U(off, <=, sabd->abd_size);
364355

365356
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
366357
uint_t chunkcnt = abd_scatter_chunkcnt(sabd) -
367358
(new_offset / zfs_abd_chunk_size);
368359

369-
abd = abd_alloc_scatter_offset_chunkcnt(chunkcnt);
360+
/*
361+
* If an abd struct is provided, it is only the minimum size. If we
362+
* need additional chunks, we need to allocate a new struct.
363+
*/
364+
if (abd != NULL &&
365+
offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]) >
366+
sizeof (abd_t)) {
367+
abd = NULL;
368+
}
369+
370+
if (abd == NULL)
371+
abd = abd_alloc_struct(chunkcnt * zfs_abd_chunk_size);
370372

371373
/*
372374
* Even if this buf is filesystem metadata, we only track that
373375
* if we own the underlying data buffer, which is not true in
374376
* this case. Therefore, we don't ever use ABD_FLAG_META here.
375377
*/
376-
abd->abd_flags = 0;
377378

378379
ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size;
379380
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;

module/os/linux/zfs/abd_os.c

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -185,26 +185,22 @@ abd_chunkcnt_for_bytes(size_t size)
185185
}
186186

187187
abd_t *
188-
abd_alloc_struct(size_t size)
188+
abd_alloc_struct_impl(size_t size)
189189
{
190190
/*
191191
* In Linux we do not use the size passed in during ABD
192192
* allocation, so we just ignore it.
193193
*/
194194
abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);
195195
ASSERT3P(abd, !=, NULL);
196-
list_link_init(&abd->abd_gang_link);
197-
mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
198196
ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));
199197

200198
return (abd);
201199
}
202200

203201
void
204-
abd_free_struct(abd_t *abd)
202+
abd_free_struct_impl(abd_t *abd)
205203
{
206-
mutex_destroy(&abd->abd_mtx);
207-
ASSERT(!list_link_active(&abd->abd_gang_link));
208204
kmem_cache_free(abd_cache, abd);
209205
ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
210206
}
@@ -472,14 +468,12 @@ abd_alloc_zero_scatter(void)
472468
ASSERT3U(table.nents, ==, nr_pages);
473469

474470
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
475-
abd_zero_scatter->abd_flags = ABD_FLAG_OWNER;
471+
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
476472
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
477473
ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl;
478474
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
479475
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
480-
abd_zero_scatter->abd_parent = NULL;
481476
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
482-
zfs_refcount_create(&abd_zero_scatter->abd_children);
483477

484478
abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) {
485479
sg_set_page(sg, abd_zero_page, PAGESIZE, 0);
@@ -599,12 +593,11 @@ abd_alloc_zero_scatter(void)
599593
abd_zero_page = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
600594
memset(abd_zero_page, 0, PAGESIZE);
601595
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
602-
abd_zero_scatter->abd_flags = ABD_FLAG_OWNER;
596+
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER;
603597
abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS;
604598
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
605599
ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages;
606600
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
607-
abd_zero_scatter->abd_parent = NULL;
608601
zfs_refcount_create(&abd_zero_scatter->abd_children);
609602
ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages *
610603
sizeof (struct scatterlist), KM_SLEEP);
@@ -678,7 +671,6 @@ abd_verify_scatter(abd_t *abd)
678671
static void
679672
abd_free_zero_scatter(void)
680673
{
681-
zfs_refcount_destroy(&abd_zero_scatter->abd_children);
682674
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
683675
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE);
684676
ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
@@ -747,9 +739,7 @@ abd_free_linear_page(abd_t *abd)
747739
ABD_SCATTER(abd).abd_sgl = sg;
748740
abd_free_chunks(abd);
749741

750-
zfs_refcount_destroy(&abd->abd_children);
751742
abd_update_scatter_stats(abd, ABDSTAT_DECR);
752-
abd_free_struct(abd);
753743
}
754744

755745
/*
@@ -770,9 +760,8 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata)
770760
}
771761

772762
abd_t *
773-
abd_get_offset_scatter(abd_t *sabd, size_t off)
763+
abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
774764
{
775-
abd_t *abd = NULL;
776765
int i = 0;
777766
struct scatterlist *sg = NULL;
778767

@@ -781,14 +770,14 @@ abd_get_offset_scatter(abd_t *sabd, size_t off)
781770

782771
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
783772

784-
abd = abd_alloc_struct(0);
773+
if (abd == NULL)
774+
abd = abd_alloc_struct(0);
785775

786776
/*
787777
* Even if this buf is filesystem metadata, we only track that
788778
* if we own the underlying data buffer, which is not true in
789779
* this case. Therefore, we don't ever use ABD_FLAG_META here.
790780
*/
791-
abd->abd_flags = 0;
792781

793782
abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {
794783
if (new_offset < sg->length)

0 commit comments

Comments
 (0)