Skip to content

Commit 588fa16

Browse files
amotintonyhutter
authored andcommitted
ZAP: Reduce leaf array and free chunks fragmentation
Previous implementation of zap_leaf_array_free() put chunks on the free list in reverse order. Also zap_leaf_transfer_entry() and zap_entry_remove() were freeing name and value arrays in reverse order. Together this created a mess in the free list, making following allocations much more fragmented than necessary. This patch re-implements zap_leaf_array_free() to keep existing chunks order, and implements non-destructive zap_leaf_array_copy() to be used in zap_leaf_transfer_entry() to allow properly ordered freeing name and value arrays there and in zap_entry_remove(). With this change test of some writes and deletes shows percent of non-contiguous chunks in DDT reducing from 61% and 47% to 0% and 17% for arrays and frees respectively. Sure some explicit sorting could do even better, especially for ZAPs with variable-size arrays, but it would also cost much more, while this should be very cheap. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #16766 (cherry picked from commit 9a81484)
1 parent 92f430b commit 588fa16

File tree

1 file changed

+62
-44
lines changed

1 file changed

+62
-44
lines changed

module/zfs/zap_leaf.c

Lines changed: 62 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -249,20 +249,63 @@ zap_leaf_array_create(zap_leaf_t *l, const char *buf,
249249
return (chunk_head);
250250
}
251251

252-
static void
253-
zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
252+
/*
253+
* Non-destructively copy array between leaves.
254+
*/
255+
static uint16_t
256+
zap_leaf_array_copy(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
254257
{
255-
uint16_t chunk = *chunkp;
258+
uint16_t new_chunk;
259+
uint16_t *nchunkp = &new_chunk;
256260

257-
*chunkp = CHAIN_END;
261+
while (chunk != CHAIN_END) {
262+
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
263+
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
264+
265+
struct zap_leaf_array *la =
266+
&ZAP_LEAF_CHUNK(l, chunk).l_array;
267+
struct zap_leaf_array *nla =
268+
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
269+
ASSERT3U(la->la_type, ==, ZAP_CHUNK_ARRAY);
270+
271+
*nla = *la; /* structure assignment */
272+
273+
chunk = la->la_next;
274+
*nchunkp = nchunk;
275+
nchunkp = &nla->la_next;
276+
}
277+
*nchunkp = CHAIN_END;
278+
return (new_chunk);
279+
}
280+
281+
/*
282+
* Free array. Unlike trivial loop of zap_leaf_chunk_free() this does
283+
* not reverse order of chunks in the free list, reducing fragmentation.
284+
*/
285+
static void
286+
zap_leaf_array_free(zap_leaf_t *l, uint16_t chunk)
287+
{
288+
struct zap_leaf_header *hdr = &zap_leaf_phys(l)->l_hdr;
289+
uint16_t *tailp = &hdr->lh_freelist;
290+
uint16_t oldfree = *tailp;
258291

259292
while (chunk != CHAIN_END) {
260-
uint_t nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
261-
ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
262-
ZAP_CHUNK_ARRAY);
263-
zap_leaf_chunk_free(l, chunk);
264-
chunk = nextchunk;
293+
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
294+
zap_leaf_chunk_t *c = &ZAP_LEAF_CHUNK(l, chunk);
295+
ASSERT3U(c->l_array.la_type, ==, ZAP_CHUNK_ARRAY);
296+
297+
*tailp = chunk;
298+
chunk = c->l_array.la_next;
299+
300+
c->l_free.lf_type = ZAP_CHUNK_FREE;
301+
memset(c->l_free.lf_pad, 0, sizeof (c->l_free.lf_pad));
302+
tailp = &c->l_free.lf_next;
303+
304+
ASSERT3U(hdr->lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
305+
hdr->lh_nfree++;
265306
}
307+
308+
*tailp = oldfree;
266309
}
267310

268311
/* array_len and buf_len are in integers, not bytes */
@@ -516,7 +559,7 @@ zap_entry_update(zap_entry_handle_t *zeh,
516559
if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
517560
return (SET_ERROR(EAGAIN));
518561

519-
zap_leaf_array_free(l, &le->le_value_chunk);
562+
zap_leaf_array_free(l, le->le_value_chunk);
520563
le->le_value_chunk =
521564
zap_leaf_array_create(l, buf, integer_size, num_integers);
522565
le->le_value_numints = num_integers;
@@ -535,10 +578,11 @@ zap_entry_remove(zap_entry_handle_t *zeh)
535578
struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
536579
ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
537580

538-
zap_leaf_array_free(l, &le->le_name_chunk);
539-
zap_leaf_array_free(l, &le->le_value_chunk);
540-
541581
*zeh->zeh_chunkp = le->le_next;
582+
583+
/* Free in opposite order to reduce fragmentation. */
584+
zap_leaf_array_free(l, le->le_value_chunk);
585+
zap_leaf_array_free(l, le->le_name_chunk);
542586
zap_leaf_chunk_free(l, entry_chunk);
543587

544588
zap_leaf_phys(l)->l_hdr.lh_nentries--;
@@ -702,34 +746,6 @@ zap_leaf_rehash_entry(zap_leaf_t *l, struct zap_leaf_entry *le, uint16_t entry)
702746
return (chunkp);
703747
}
704748

705-
static uint16_t
706-
zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
707-
{
708-
uint16_t new_chunk;
709-
uint16_t *nchunkp = &new_chunk;
710-
711-
while (chunk != CHAIN_END) {
712-
uint16_t nchunk = zap_leaf_chunk_alloc(nl);
713-
struct zap_leaf_array *nla =
714-
&ZAP_LEAF_CHUNK(nl, nchunk).l_array;
715-
struct zap_leaf_array *la =
716-
&ZAP_LEAF_CHUNK(l, chunk).l_array;
717-
uint_t nextchunk = la->la_next;
718-
719-
ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
720-
ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
721-
722-
*nla = *la; /* structure assignment */
723-
724-
zap_leaf_chunk_free(l, chunk);
725-
chunk = nextchunk;
726-
*nchunkp = nchunk;
727-
nchunkp = &nla->la_next;
728-
}
729-
*nchunkp = CHAIN_END;
730-
return (new_chunk);
731-
}
732-
733749
static void
734750
zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
735751
{
@@ -742,10 +758,12 @@ zap_leaf_transfer_entry(zap_leaf_t *l, uint_t entry, zap_leaf_t *nl)
742758

743759
(void) zap_leaf_rehash_entry(nl, nle, chunk);
744760

745-
nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
746-
nle->le_value_chunk =
747-
zap_leaf_transfer_array(l, le->le_value_chunk, nl);
761+
nle->le_name_chunk = zap_leaf_array_copy(l, le->le_name_chunk, nl);
762+
nle->le_value_chunk = zap_leaf_array_copy(l, le->le_value_chunk, nl);
748763

764+
/* Free in opposite order to reduce fragmentation. */
765+
zap_leaf_array_free(l, le->le_value_chunk);
766+
zap_leaf_array_free(l, le->le_name_chunk);
749767
zap_leaf_chunk_free(l, entry);
750768

751769
zap_leaf_phys(l)->l_hdr.lh_nentries--;

0 commit comments

Comments
 (0)