Skip to content

Commit 96b3d00

Browse files
mattmacyjsai20
authored andcommitted
Restore ARC MFU/MRU pressure
The arc_adapt() function tunes LRU/MLU balance according to 4 types of cache hits (which is passed as state agrument): ghost LRU, LRU, MRU, ghost MRU. If this function is called with wrong cache hit (state), adaptation will be sub-optimal and performance will suffer. Some time ago upstream received this commit: 6950 ARC should cache compressed data) in arc_read() do next sequence (access to ghost buffer) Before this commit, hit to any ghost list was passed arc_adapt() before call to arc_access() which revive element in cache and change state from ghost to real hit. After this commit, the order of calls was reverted and arc_adapt() is now called only with «real» hits even if hit was in one of two ghost lists, which renders ghost lists useless and breaks the ARC algorithm. FreeBSD fixed this problem locally in Change D19094 / Commit r348772. This change is an adaptation of the above commit to the current arc code. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matt Macy <[email protected]> Closes openzfs#10548 Closes openzfs#10618
1 parent adcc112 commit 96b3d00

File tree

1 file changed

+42
-22
lines changed

1 file changed

+42
-22
lines changed

module/zfs/arc.c

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -856,14 +856,20 @@ static uint8_t l2arc_thread_exit;
856856
static kmutex_t l2arc_rebuild_thr_lock;
857857
static kcondvar_t l2arc_rebuild_thr_cv;
858858

859-
static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *);
859+
enum arc_hdr_alloc_flags {
860+
ARC_HDR_ALLOC_RDATA = 0x1,
861+
ARC_HDR_DO_ADAPT = 0x2,
862+
};
863+
864+
865+
static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
860866
static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
861-
static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *);
867+
static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
862868
static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
863869
static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
864870
static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
865871
static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t);
866-
static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t);
872+
static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int);
867873
static void arc_access(arc_buf_hdr_t *, kmutex_t *);
868874
static void arc_buf_watch(arc_buf_t *);
869875

@@ -1822,7 +1828,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
18221828
ASSERT(HDR_EMPTY_OR_LOCKED(hdr));
18231829
ASSERT(HDR_ENCRYPTED(hdr));
18241830

1825-
arc_hdr_alloc_abd(hdr, B_FALSE);
1831+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
18261832

18271833
ret = spa_do_crypt_abd(B_FALSE, spa, zb, hdr->b_crypt_hdr.b_ot,
18281834
B_FALSE, bswap, hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_iv,
@@ -1849,7 +1855,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
18491855
* and then loan a buffer from it, rather than allocating a
18501856
* linear buffer and wrapping it in an abd later.
18511857
*/
1852-
cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
1858+
cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE);
18531859
tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
18541860

18551861
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -3154,9 +3160,11 @@ arc_buf_destroy_impl(arc_buf_t *buf)
31543160
}
31553161

31563162
static void
3157-
arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
3163+
arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
31583164
{
31593165
uint64_t size;
3166+
boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0);
3167+
boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0);
31603168

31613169
ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
31623170
ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3166,13 +3174,15 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
31663174
if (alloc_rdata) {
31673175
size = HDR_GET_PSIZE(hdr);
31683176
ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
3169-
hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr);
3177+
hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
3178+
do_adapt);
31703179
ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
31713180
ARCSTAT_INCR(arcstat_raw_size, size);
31723181
} else {
31733182
size = arc_hdr_size(hdr);
31743183
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
3175-
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr);
3184+
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
3185+
do_adapt);
31763186
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
31773187
}
31783188

@@ -3224,13 +3234,15 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
32243234
arc_buf_contents_t type, boolean_t alloc_rdata)
32253235
{
32263236
arc_buf_hdr_t *hdr;
3237+
int flags = ARC_HDR_DO_ADAPT;
32273238

32283239
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
32293240
if (protected) {
32303241
hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
32313242
} else {
32323243
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
32333244
}
3245+
flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0;
32343246

32353247
ASSERT(HDR_EMPTY(hdr));
32363248
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
@@ -3254,7 +3266,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
32543266
* the compressed or uncompressed data depending on the block
32553267
* it references and compressed arc enablement.
32563268
*/
3257-
arc_hdr_alloc_abd(hdr, alloc_rdata);
3269+
arc_hdr_alloc_abd(hdr, flags);
32583270
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
32593271

32603272
return (hdr);
@@ -5028,11 +5040,12 @@ arc_is_overflowing(void)
50285040
}
50295041

50305042
static abd_t *
5031-
arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
5043+
arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
5044+
boolean_t do_adapt)
50325045
{
50335046
arc_buf_contents_t type = arc_buf_type(hdr);
50345047

5035-
arc_get_data_impl(hdr, size, tag);
5048+
arc_get_data_impl(hdr, size, tag, do_adapt);
50365049
if (type == ARC_BUFC_METADATA) {
50375050
return (abd_alloc(size, B_TRUE));
50385051
} else {
@@ -5046,7 +5059,7 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
50465059
{
50475060
arc_buf_contents_t type = arc_buf_type(hdr);
50485061

5049-
arc_get_data_impl(hdr, size, tag);
5062+
arc_get_data_impl(hdr, size, tag, B_TRUE);
50505063
if (type == ARC_BUFC_METADATA) {
50515064
return (zio_buf_alloc(size));
50525065
} else {
@@ -5120,12 +5133,14 @@ arc_wait_for_eviction(uint64_t amount)
51205133
* limit, we'll only signal the reclaim thread and continue on.
51215134
*/
51225135
static void
5123-
arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
5136+
arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
5137+
boolean_t do_adapt)
51245138
{
51255139
arc_state_t *state = hdr->b_l1hdr.b_state;
51265140
arc_buf_contents_t type = arc_buf_type(hdr);
51275141

5128-
arc_adapt(size, state);
5142+
if (do_adapt)
5143+
arc_adapt(size, state);
51295144

51305145
/*
51315146
* If arc_size is currently overflowing, we must be adding data
@@ -5920,6 +5935,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
59205935
boolean_t devw = B_FALSE;
59215936
uint64_t size;
59225937
abd_t *hdr_abd;
5938+
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
59235939

59245940
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
59255941
rc = SET_ERROR(ENOENT);
@@ -6007,8 +6023,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
60076023
* do this after we've called arc_access() to
60086024
* avoid hitting an assert in remove_reference().
60096025
*/
6026+
arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state);
60106027
arc_access(hdr, hash_lock);
6011-
arc_hdr_alloc_abd(hdr, encrypted_read);
6028+
arc_hdr_alloc_abd(hdr, alloc_flags);
60126029
}
60136030

60146031
if (encrypted_read) {
@@ -6452,7 +6469,7 @@ arc_release(arc_buf_t *buf, void *tag)
64526469
if (arc_can_share(hdr, lastbuf)) {
64536470
arc_share_buf(hdr, lastbuf);
64546471
} else {
6455-
arc_hdr_alloc_abd(hdr, B_FALSE);
6472+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
64566473
abd_copy_from_buf(hdr->b_l1hdr.b_pabd,
64576474
buf->b_data, psize);
64586475
}
@@ -6687,7 +6704,7 @@ arc_write_ready(zio_t *zio)
66876704
if (ARC_BUF_ENCRYPTED(buf)) {
66886705
ASSERT3U(psize, >, 0);
66896706
ASSERT(ARC_BUF_COMPRESSED(buf));
6690-
arc_hdr_alloc_abd(hdr, B_TRUE);
6707+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
66916708
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
66926709
} else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
66936710
/*
@@ -6697,16 +6714,17 @@ arc_write_ready(zio_t *zio)
66976714
*/
66986715
if (BP_IS_ENCRYPTED(bp)) {
66996716
ASSERT3U(psize, >, 0);
6700-
arc_hdr_alloc_abd(hdr, B_TRUE);
6717+
arc_hdr_alloc_abd(hdr,
6718+
ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
67016719
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
67026720
} else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
67036721
!ARC_BUF_COMPRESSED(buf)) {
67046722
ASSERT3U(psize, >, 0);
6705-
arc_hdr_alloc_abd(hdr, B_FALSE);
6723+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
67066724
abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
67076725
} else {
67086726
ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
6709-
arc_hdr_alloc_abd(hdr, B_FALSE);
6727+
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
67106728
abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
67116729
arc_buf_size(buf));
67126730
}
@@ -8150,7 +8168,8 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
81508168
* until arc_read_done().
81518169
*/
81528170
if (BP_IS_ENCRYPTED(bp)) {
8153-
abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
8171+
abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
8172+
B_TRUE);
81548173

81558174
zio_crypt_decode_params_bp(bp, salt, iv);
81568175
zio_crypt_decode_mac_bp(bp, mac);
@@ -8186,7 +8205,8 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
81868205
*/
81878206
if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
81888207
!HDR_COMPRESSION_ENABLED(hdr)) {
8189-
abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
8208+
abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
8209+
B_TRUE);
81908210
void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
81918211

81928212
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),

0 commit comments

Comments
 (0)