Skip to content

Commit c935fe2

Browse files
authored
arc_read()/arc_access() refactoring and cleanup
ARC code was many times significantly modified over the years, that created significant amount of tangled and potentially broken code. This should make arc_access()/arc_read() code some more readable. - Decouple prefetch status tracking from b_refcnt. It made sense originally, but became highly cryptic over the years. Move all the logic into arc_access(). While there, clean up and comment state transitions in arc_access(). Some transitions were weird IMO. - Unify arc_access() calls to arc_read() instead of sometimes calling it from arc_read_done(). To avoid extra state changes and checks add one more b_refcnt for ARC_FLAG_IO_IN_PROGRESS. - Reimplement ARC_FLAG_WAIT in case of ARC_FLAG_IO_IN_PROGRESS with the same callback mechanism to not falsely account them as hits. Count those as "iohits", an intermediate between "hits" and "misses". While there, call read callbacks in original request order, that should be good for fairness and random speculations/allocations/aggregations. - Introduce additional statistic counters for prefetch, accounting predictive vs prescient and hits vs iohits vs misses. - Remove hash_lock argument from functions not needing it. - Remove ARC_FLAG_PREDICTIVE_PREFETCH, since it should be opposite to ARC_FLAG_PRESCIENT_PREFETCH if ARC_FLAG_PREFETCH is set. We may wish to add ARC_FLAG_PRESCIENT_PREFETCH to few more places. - Fix few false positive tests found in the process. Reviewed-by: George Wilson <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Richard Yao <[email protected]> Reviewed-by: Ryan Moeller <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #14123
1 parent dc8c2f6 commit c935fe2

File tree

11 files changed

+310
-292
lines changed

11 files changed

+310
-292
lines changed

include/os/linux/zfs/sys/trace_arc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,12 @@ DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \
103103
TP_PROTO(arc_buf_hdr_t *ab), \
104104
TP_ARGS(ab))
105105
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit);
106+
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__iohit);
106107
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict);
107108
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete);
108109
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru);
109110
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu);
110111
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__async__upgrade__sync);
111-
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
112112
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit);
113113
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
114114

@@ -387,12 +387,12 @@ DEFINE_ARC_WAIT_FOR_EVICTION_EVENT(zfs_arc__wait__for__eviction);
387387
#else
388388

389389
DEFINE_DTRACE_PROBE1(arc__hit);
390+
DEFINE_DTRACE_PROBE1(arc__iohit);
390391
DEFINE_DTRACE_PROBE1(arc__evict);
391392
DEFINE_DTRACE_PROBE1(arc__delete);
392393
DEFINE_DTRACE_PROBE1(new_state__mru);
393394
DEFINE_DTRACE_PROBE1(new_state__mfu);
394395
DEFINE_DTRACE_PROBE1(arc__async__upgrade__sync);
395-
DEFINE_DTRACE_PROBE1(arc__demand__hit__predictive__prefetch);
396396
DEFINE_DTRACE_PROBE1(l2arc__hit);
397397
DEFINE_DTRACE_PROBE1(l2arc__miss);
398398
DEFINE_DTRACE_PROBE2(l2arc__read);

include/sys/arc.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ typedef enum arc_flags
115115
ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */
116116
ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
117117
ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
118-
ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
119118
ARC_FLAG_PRESCIENT_PREFETCH = 1 << 6, /* long min lifespan */
120119

121120
/*

include/sys/arc_impl.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,14 @@ struct arc_callback {
101101
boolean_t acb_compressed;
102102
boolean_t acb_noauth;
103103
boolean_t acb_nobuf;
104+
boolean_t acb_wait;
105+
int acb_wait_error;
106+
kmutex_t acb_wait_lock;
107+
kcondvar_t acb_wait_cv;
104108
zbookmark_phys_t acb_zb;
105109
zio_t *acb_zio_dummy;
106110
zio_t *acb_zio_head;
111+
arc_callback_t *acb_prev;
107112
arc_callback_t *acb_next;
108113
};
109114

@@ -511,15 +516,27 @@ struct arc_buf_hdr {
511516
};
512517

513518
typedef struct arc_stats {
519+
/* Number of requests that were satisfied without I/O. */
514520
kstat_named_t arcstat_hits;
521+
/* Number of requests for which I/O was already running. */
522+
kstat_named_t arcstat_iohits;
523+
/* Number of requests for which I/O has to be issued. */
515524
kstat_named_t arcstat_misses;
525+
/* Same three, but specifically for demand data. */
516526
kstat_named_t arcstat_demand_data_hits;
527+
kstat_named_t arcstat_demand_data_iohits;
517528
kstat_named_t arcstat_demand_data_misses;
529+
/* Same three, but specifically for demand metadata. */
518530
kstat_named_t arcstat_demand_metadata_hits;
531+
kstat_named_t arcstat_demand_metadata_iohits;
519532
kstat_named_t arcstat_demand_metadata_misses;
533+
/* Same three, but specifically for prefetch data. */
520534
kstat_named_t arcstat_prefetch_data_hits;
535+
kstat_named_t arcstat_prefetch_data_iohits;
521536
kstat_named_t arcstat_prefetch_data_misses;
537+
/* Same three, but specifically for prefetch metadata. */
522538
kstat_named_t arcstat_prefetch_metadata_hits;
539+
kstat_named_t arcstat_prefetch_metadata_iohits;
523540
kstat_named_t arcstat_prefetch_metadata_misses;
524541
kstat_named_t arcstat_mru_hits;
525542
kstat_named_t arcstat_mru_ghost_hits;
@@ -844,8 +861,18 @@ typedef struct arc_stats {
844861
kstat_named_t arcstat_meta_max;
845862
kstat_named_t arcstat_meta_min;
846863
kstat_named_t arcstat_async_upgrade_sync;
864+
/* Number of predictive prefetch requests. */
865+
kstat_named_t arcstat_predictive_prefetch;
866+
/* Number of requests for which predictive prefetch has completed. */
847867
kstat_named_t arcstat_demand_hit_predictive_prefetch;
868+
/* Number of requests for which predictive prefetch was running. */
869+
kstat_named_t arcstat_demand_iohit_predictive_prefetch;
870+
/* Number of prescient prefetch requests. */
871+
kstat_named_t arcstat_prescient_prefetch;
872+
/* Number of requests for which prescient prefetch has completed. */
848873
kstat_named_t arcstat_demand_hit_prescient_prefetch;
874+
/* Number of requests for which prescient prefetch was running. */
875+
kstat_named_t arcstat_demand_iohit_prescient_prefetch;
849876
kstat_named_t arcstat_need_free;
850877
kstat_named_t arcstat_sys_free;
851878
kstat_named_t arcstat_raw_size;
@@ -855,14 +882,19 @@ typedef struct arc_stats {
855882

856883
typedef struct arc_sums {
857884
wmsum_t arcstat_hits;
885+
wmsum_t arcstat_iohits;
858886
wmsum_t arcstat_misses;
859887
wmsum_t arcstat_demand_data_hits;
888+
wmsum_t arcstat_demand_data_iohits;
860889
wmsum_t arcstat_demand_data_misses;
861890
wmsum_t arcstat_demand_metadata_hits;
891+
wmsum_t arcstat_demand_metadata_iohits;
862892
wmsum_t arcstat_demand_metadata_misses;
863893
wmsum_t arcstat_prefetch_data_hits;
894+
wmsum_t arcstat_prefetch_data_iohits;
864895
wmsum_t arcstat_prefetch_data_misses;
865896
wmsum_t arcstat_prefetch_metadata_hits;
897+
wmsum_t arcstat_prefetch_metadata_iohits;
866898
wmsum_t arcstat_prefetch_metadata_misses;
867899
wmsum_t arcstat_mru_hits;
868900
wmsum_t arcstat_mru_ghost_hits;
@@ -936,8 +968,12 @@ typedef struct arc_sums {
936968
wmsum_t arcstat_prune;
937969
aggsum_t arcstat_meta_used;
938970
wmsum_t arcstat_async_upgrade_sync;
971+
wmsum_t arcstat_predictive_prefetch;
939972
wmsum_t arcstat_demand_hit_predictive_prefetch;
973+
wmsum_t arcstat_demand_iohit_predictive_prefetch;
974+
wmsum_t arcstat_prescient_prefetch;
940975
wmsum_t arcstat_demand_hit_prescient_prefetch;
976+
wmsum_t arcstat_demand_iohit_prescient_prefetch;
941977
wmsum_t arcstat_raw_size;
942978
wmsum_t arcstat_cached_only_in_progress;
943979
wmsum_t arcstat_abd_chunk_waste_size;

0 commit comments

Comments
 (0)