Skip to content

Commit 94520ca

Browse files
Prakash Suryabehlendorf
authored andcommitted
Prune metadata from ghost lists in arc_adjust_meta
To maintain a strict limit on the metadata contained in the arc, while preventing the arc buffer headers from completely consuming the "arc_meta_used" space, we need to evict metadata buffers from the arc's ghost lists along with the regular lists. This change modifies arc_adjust_meta such that it more closely models the adjustments made in arc_adjust. "arc_meta_used" is used similarly to "arc_size", and "arc_meta_limit" is used similarly to "arc_c". Testing metadata intensive workloads (e.g. creating, copying, and removing millions of small files and/or directories) has shown this change to make a dramatic improvement to the hit rate maintained in the arc. While I think there is still room for improvement, this is a big step in the right direction. In addition, zpl_free_cached_objects was made into a no-op as I'm not yet sure how to properly implement that function. Signed-off-by: Prakash Surya <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Issue #2110
1 parent 1e3cb67 commit 94520ca

File tree

3 files changed

+49
-21
lines changed

3 files changed

+49
-21
lines changed

include/sys/arc.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ void arc_freed(spa_t *spa, const blkptr_t *bp);
160160
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
161161
int arc_buf_evict(arc_buf_t *buf);
162162

163-
void arc_adjust_meta(int64_t adjustment, boolean_t may_prune);
164163
void arc_flush(spa_t *spa);
165164
void arc_tempreserve_clear(uint64_t reserve);
166165
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);

module/zfs/arc.c

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2268,24 +2268,61 @@ arc_do_user_evicts(void)
22682268
* This is only used to enforce the tunable arc_meta_limit, if we are
22692269
* unable to evict enough buffers notify the user via the prune callback.
22702270
*/
2271-
void
2272-
arc_adjust_meta(int64_t adjustment, boolean_t may_prune)
2271+
static void
2272+
arc_adjust_meta(void)
22732273
{
2274-
int64_t delta;
2274+
int64_t adjustmnt, delta;
22752275

2276-
if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
2277-
delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
2276+
/*
2277+
* This slightly differs than the way we evict from the mru in
2278+
* arc_adjust because we don't have a "target" value (i.e. no
2279+
* "meta" arc_p). As a result, I think we can completely
2280+
* cannibalize the metadata in the MRU before we evict the
2281+
* metadata from the MFU. I think we probably need to implement a
2282+
* "metadata arc_p" value to do this properly.
2283+
*/
2284+
adjustmnt = arc_meta_used - arc_meta_limit;
2285+
2286+
if (adjustmnt > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
2287+
delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustmnt);
22782288
arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_METADATA);
2279-
adjustment -= delta;
2289+
adjustmnt -= delta;
22802290
}
22812291

2282-
if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
2283-
delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustment);
2292+
/*
2293+
* We can't afford to recalculate adjustmnt here. If we do,
2294+
* new metadata buffers can sneak into the MRU or ANON lists,
2295+
* thus penalize the MFU metadata. Although the fudge factor is
2296+
* small, it has been empirically shown to be significant for
2297+
* certain workloads (e.g. creating many empty directories). As
2298+
* such, we use the original calculation for adjustmnt, and
2299+
* simply decrement the amount of data evicted from the MRU.
2300+
*/
2301+
2302+
if (adjustmnt > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
2303+
delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustmnt);
22842304
arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_METADATA);
2285-
adjustment -= delta;
22862305
}
22872306

2288-
if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit))
2307+
adjustmnt = arc_mru->arcs_lsize[ARC_BUFC_METADATA] +
2308+
arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] - arc_meta_limit;
2309+
2310+
if (adjustmnt > 0 && arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] > 0) {
2311+
delta = MIN(adjustmnt,
2312+
arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA]);
2313+
arc_evict_ghost(arc_mru_ghost, 0, delta, ARC_BUFC_METADATA);
2314+
}
2315+
2316+
adjustmnt = arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] +
2317+
arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA] - arc_meta_limit;
2318+
2319+
if (adjustmnt > 0 && arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA] > 0) {
2320+
delta = MIN(adjustmnt,
2321+
arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA]);
2322+
arc_evict_ghost(arc_mfu_ghost, 0, delta, ARC_BUFC_METADATA);
2323+
}
2324+
2325+
if (arc_meta_used > arc_meta_limit)
22892326
arc_do_user_prune(zfs_arc_meta_prune);
22902327
}
22912328

@@ -2405,7 +2442,6 @@ static void
24052442
arc_adapt_thread(void)
24062443
{
24072444
callb_cpr_t cpr;
2408-
int64_t prune;
24092445

24102446
CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
24112447

@@ -2441,14 +2477,7 @@ arc_adapt_thread(void)
24412477
if (arc_no_grow && ddi_get_lbolt() >= arc_grow_time)
24422478
arc_no_grow = FALSE;
24432479

2444-
/*
2445-
* Keep meta data usage within limits, arc_shrink() is not
2446-
* used to avoid collapsing the arc_c value when only the
2447-
* arc_meta_limit is being exceeded.
2448-
*/
2449-
prune = (int64_t)arc_meta_used - (int64_t)arc_meta_limit;
2450-
if (prune > 0)
2451-
arc_adjust_meta(prune, B_TRUE);
2480+
arc_adjust_meta();
24522481

24532482
arc_adjust();
24542483

module/zfs/zpl_super.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ zpl_nr_cached_objects(struct super_block *sb)
342342
static void
343343
zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
344344
{
345-
arc_adjust_meta(nr_to_scan * sizeof (znode_t), B_FALSE);
345+
/* noop */
346346
}
347347
#endif /* HAVE_FREE_CACHED_OBJECTS */
348348

0 commit comments

Comments
 (0)