Skip to content

Commit 033c788

Browse files
mattmacytonyhutter
authored andcommitted
Restore ARC MFU/MRU pressure
The arc_adapt() function tunes LRU/MLU balance according to 4 types of cache hits (which is passed as state agrument): ghost LRU, LRU, MRU, ghost MRU. If this function is called with wrong cache hit (state), adaptation will be sub-optimal and performance will suffer. Some time ago upstream received this commit: 6950 ARC should cache compressed data) in arc_read() do next sequence (access to ghost buffer) Before this commit, hit to any ghost list was passed arc_adapt() before call to arc_access() which revive element in cache and change state from ghost to real hit. After this commit, the order of calls was reverted and arc_adapt() is now called only with «real» hits even if hit was in one of two ghost lists, which renders ghost lists useless and breaks the ARC algorithm. FreeBSD fixed this problem locally in Change D19094 / Commit r348772. This change is an adaptation of the above commit to the current arc code. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matt Macy <[email protected]> Closes openzfs#10548 Closes openzfs#10618
1 parent 26a3f3c commit 033c788

File tree

1 file changed

+42
-22
lines changed

1 file changed

+42
-22
lines changed

module/zfs/arc.c

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,14 +1098,20 @@ static kmutex_t l2arc_feed_thr_lock;
10981098
static kcondvar_t l2arc_feed_thr_cv;
10991099
static uint8_t l2arc_thread_exit;
11001100

1101-
static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *);
1101+
enum arc_hdr_alloc_flags {
1102+
ARC_HDR_ALLOC_RDATA = 0x1,
1103+
ARC_HDR_DO_ADAPT = 0x2,
1104+
};
1105+
1106+
1107+
static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
11021108
static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
1103-
static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *);
1109+
static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
11041110
static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
11051111
static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
11061112
static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
11071113
static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t);
1108-
static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t);
1114+
static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int);
11091115
static void arc_access(arc_buf_hdr_t *, kmutex_t *);
11101116
static boolean_t arc_is_overflowing(void);
11111117
static void arc_buf_watch(arc_buf_t *);
@@ -1980,7 +1986,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
19801986
ASSERT(HDR_EMPTY_OR_LOCKED(hdr));
19811987
ASSERT(HDR_ENCRYPTED(hdr));
19821988

1983-
arc_hdr_alloc_abd(hdr, B_FALSE);
1989+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
19841990

19851991
ret = spa_do_crypt_abd(B_FALSE, spa, zb, hdr->b_crypt_hdr.b_ot,
19861992
B_FALSE, bswap, hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_iv,
@@ -2007,7 +2013,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
20072013
* and then loan a buffer from it, rather than allocating a
20082014
* linear buffer and wrapping it in an abd later.
20092015
*/
2010-
cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
2016+
cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE);
20112017
tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
20122018

20132019
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -3312,9 +3318,11 @@ arc_buf_destroy_impl(arc_buf_t *buf)
33123318
}
33133319

33143320
static void
3315-
arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
3321+
arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
33163322
{
33173323
uint64_t size;
3324+
boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0);
3325+
boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0);
33183326

33193327
ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
33203328
ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3324,13 +3332,15 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
33243332
if (alloc_rdata) {
33253333
size = HDR_GET_PSIZE(hdr);
33263334
ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
3327-
hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr);
3335+
hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
3336+
do_adapt);
33283337
ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
33293338
ARCSTAT_INCR(arcstat_raw_size, size);
33303339
} else {
33313340
size = arc_hdr_size(hdr);
33323341
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
3333-
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr);
3342+
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
3343+
do_adapt);
33343344
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
33353345
}
33363346

@@ -3382,13 +3392,15 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
33823392
arc_buf_contents_t type, boolean_t alloc_rdata)
33833393
{
33843394
arc_buf_hdr_t *hdr;
3395+
int flags = ARC_HDR_DO_ADAPT;
33853396

33863397
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
33873398
if (protected) {
33883399
hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
33893400
} else {
33903401
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
33913402
}
3403+
flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0;
33923404

33933405
ASSERT(HDR_EMPTY(hdr));
33943406
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
@@ -3412,7 +3424,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
34123424
* the compressed or uncompressed data depending on the block
34133425
* it references and compressed arc enablement.
34143426
*/
3415-
arc_hdr_alloc_abd(hdr, alloc_rdata);
3427+
arc_hdr_alloc_abd(hdr, flags);
34163428
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
34173429

34183430
return (hdr);
@@ -5507,11 +5519,12 @@ arc_is_overflowing(void)
55075519
}
55085520

55095521
static abd_t *
5510-
arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
5522+
arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
5523+
boolean_t do_adapt)
55115524
{
55125525
arc_buf_contents_t type = arc_buf_type(hdr);
55135526

5514-
arc_get_data_impl(hdr, size, tag);
5527+
arc_get_data_impl(hdr, size, tag, do_adapt);
55155528
if (type == ARC_BUFC_METADATA) {
55165529
return (abd_alloc(size, B_TRUE));
55175530
} else {
@@ -5525,7 +5538,7 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
55255538
{
55265539
arc_buf_contents_t type = arc_buf_type(hdr);
55275540

5528-
arc_get_data_impl(hdr, size, tag);
5541+
arc_get_data_impl(hdr, size, tag, B_TRUE);
55295542
if (type == ARC_BUFC_METADATA) {
55305543
return (zio_buf_alloc(size));
55315544
} else {
@@ -5541,12 +5554,14 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
55415554
* limit, we'll only signal the reclaim thread and continue on.
55425555
*/
55435556
static void
5544-
arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
5557+
arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
5558+
boolean_t do_adapt)
55455559
{
55465560
arc_state_t *state = hdr->b_l1hdr.b_state;
55475561
arc_buf_contents_t type = arc_buf_type(hdr);
55485562

5549-
arc_adapt(size, state);
5563+
if (do_adapt)
5564+
arc_adapt(size, state);
55505565

55515566
/*
55525567
* If arc_size is currently overflowing, and has grown past our
@@ -6346,6 +6361,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
63466361
boolean_t devw = B_FALSE;
63476362
uint64_t size;
63486363
abd_t *hdr_abd;
6364+
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
63496365

63506366
/*
63516367
* Gracefully handle a damaged logical block size as a
@@ -6424,8 +6440,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
64246440
* do this after we've called arc_access() to
64256441
* avoid hitting an assert in remove_reference().
64266442
*/
6443+
arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state);
64276444
arc_access(hdr, hash_lock);
6428-
arc_hdr_alloc_abd(hdr, encrypted_read);
6445+
arc_hdr_alloc_abd(hdr, alloc_flags);
64296446
}
64306447

64316448
if (encrypted_read) {
@@ -6869,7 +6886,7 @@ arc_release(arc_buf_t *buf, void *tag)
68696886
if (arc_can_share(hdr, lastbuf)) {
68706887
arc_share_buf(hdr, lastbuf);
68716888
} else {
6872-
arc_hdr_alloc_abd(hdr, B_FALSE);
6889+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
68736890
abd_copy_from_buf(hdr->b_l1hdr.b_pabd,
68746891
buf->b_data, psize);
68756892
}
@@ -7104,7 +7121,7 @@ arc_write_ready(zio_t *zio)
71047121
if (ARC_BUF_ENCRYPTED(buf)) {
71057122
ASSERT3U(psize, >, 0);
71067123
ASSERT(ARC_BUF_COMPRESSED(buf));
7107-
arc_hdr_alloc_abd(hdr, B_TRUE);
7124+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
71087125
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
71097126
} else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
71107127
/*
@@ -7114,16 +7131,17 @@ arc_write_ready(zio_t *zio)
71147131
*/
71157132
if (BP_IS_ENCRYPTED(bp)) {
71167133
ASSERT3U(psize, >, 0);
7117-
arc_hdr_alloc_abd(hdr, B_TRUE);
7134+
arc_hdr_alloc_abd(hdr,
7135+
ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
71187136
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
71197137
} else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
71207138
!ARC_BUF_COMPRESSED(buf)) {
71217139
ASSERT3U(psize, >, 0);
7122-
arc_hdr_alloc_abd(hdr, B_FALSE);
7140+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
71237141
abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
71247142
} else {
71257143
ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
7126-
arc_hdr_alloc_abd(hdr, B_FALSE);
7144+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
71277145
abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
71287146
arc_buf_size(buf));
71297147
}
@@ -8418,7 +8436,8 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
84188436
* until arc_read_done().
84198437
*/
84208438
if (BP_IS_ENCRYPTED(bp)) {
8421-
abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
8439+
abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
8440+
B_TRUE);
84228441

84238442
zio_crypt_decode_params_bp(bp, salt, iv);
84248443
zio_crypt_decode_mac_bp(bp, mac);
@@ -8454,7 +8473,8 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
84548473
*/
84558474
if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
84568475
!HDR_COMPRESSION_ENABLED(hdr)) {
8457-
abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
8476+
abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
8477+
B_TRUE);
84588478
void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
84598479

84608480
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),

0 commit comments

Comments
 (0)