91
91
* committed to stable storage. Please refer to the zil_commit_waiter()
92
92
* function (and the comments within it) for more details.
93
93
*/
94
- static uint_t zfs_commit_timeout_pct = 5 ;
95
-
96
- /*
97
- * Minimal time we care to delay commit waiting for more ZIL records.
98
- * At least FreeBSD kernel can't sleep for less than 2us at its best.
99
- * So requests to sleep for less then 5us is a waste of CPU time with
100
- * a risk of significant log latency increase due to oversleep.
101
- */
102
- static uint64_t zil_min_commit_timeout = 5000 ;
94
+ static uint_t zfs_commit_timeout_pct = 10 ;
103
95
104
96
/*
105
97
* See zil.h for more information about these fields.
@@ -2696,6 +2688,19 @@ zil_commit_writer_stall(zilog_t *zilog)
2696
2688
ASSERT (list_is_empty (& zilog -> zl_lwb_list ));
2697
2689
}
2698
2690
2691
+ static void
2692
+ zil_burst_done (zilog_t * zilog )
2693
+ {
2694
+ if (!list_is_empty (& zilog -> zl_itx_commit_list ) ||
2695
+ zilog -> zl_cur_used == 0 )
2696
+ return ;
2697
+
2698
+ if (zilog -> zl_parallel )
2699
+ zilog -> zl_parallel -- ;
2700
+
2701
+ zilog -> zl_cur_used = 0 ;
2702
+ }
2703
+
2699
2704
/*
2700
2705
* This function will traverse the commit list, creating new lwbs as
2701
2706
* needed, and committing the itxs from the commit list to these newly
@@ -2710,7 +2715,6 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
2710
2715
list_t nolwb_waiters ;
2711
2716
lwb_t * lwb , * plwb ;
2712
2717
itx_t * itx ;
2713
- boolean_t first = B_TRUE ;
2714
2718
2715
2719
ASSERT (MUTEX_HELD (& zilog -> zl_issuer_lock ));
2716
2720
@@ -2736,9 +2740,22 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
2736
2740
zil_commit_activate_saxattr_feature (zilog );
2737
2741
ASSERT (lwb -> lwb_state == LWB_STATE_NEW ||
2738
2742
lwb -> lwb_state == LWB_STATE_OPENED );
2739
- first = (lwb -> lwb_state == LWB_STATE_NEW ) &&
2740
- ((plwb = list_prev (& zilog -> zl_lwb_list , lwb )) == NULL ||
2741
- plwb -> lwb_state == LWB_STATE_FLUSH_DONE );
2743
+
2744
+ /*
2745
+ * If the lwb is still opened, it means the workload is really
2746
+ * multi-threaded and we won the chance of write aggregation.
2747
+ * If it is not opened yet, but previous lwb is still not
2748
+ * flushed, it still means the workload is multi-threaded, but
2749
+ * there was too much time between the commits to aggregate, so
2750
+ * we try aggregation next times, but without too much hopes.
2751
+ */
2752
+ if (lwb -> lwb_state == LWB_STATE_OPENED ) {
2753
+ zilog -> zl_parallel = ZIL_BURSTS ;
2754
+ } else if ((plwb = list_prev (& zilog -> zl_lwb_list , lwb ))
2755
+ != NULL && plwb -> lwb_state != LWB_STATE_FLUSH_DONE ) {
2756
+ zilog -> zl_parallel = MAX (zilog -> zl_parallel ,
2757
+ ZIL_BURSTS / 2 );
2758
+ }
2742
2759
}
2743
2760
2744
2761
while ((itx = list_remove_head (& zilog -> zl_itx_commit_list )) != NULL ) {
@@ -2813,7 +2830,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
2813
2830
* Our lwb is done, leave the rest of
2814
2831
* itx list to somebody else who care.
2815
2832
*/
2816
- first = B_FALSE ;
2833
+ zilog -> zl_parallel = ZIL_BURSTS ;
2817
2834
break ;
2818
2835
}
2819
2836
} else {
@@ -2905,28 +2922,15 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
2905
2922
* try and pack as many itxs into as few lwbs as
2906
2923
* possible, without significantly impacting the latency
2907
2924
* of each individual itx.
2908
- *
2909
- * If we had no already running or open LWBs, it can be
2910
- * the workload is single-threaded. And if the ZIL write
2911
- * latency is very small or if the LWB is almost full, it
2912
- * may be cheaper to bypass the delay.
2913
2925
*/
2914
- if (lwb -> lwb_state == LWB_STATE_OPENED && first ) {
2915
- hrtime_t sleep = zilog -> zl_last_lwb_latency *
2916
- zfs_commit_timeout_pct / 100 ;
2917
- if (sleep < zil_min_commit_timeout ||
2918
- lwb -> lwb_nmax - lwb -> lwb_nused <
2919
- lwb -> lwb_nmax / 8 ) {
2920
- list_insert_tail (ilwbs , lwb );
2921
- lwb = zil_lwb_write_close (zilog , lwb ,
2922
- LWB_STATE_NEW );
2923
- zilog -> zl_cur_used = 0 ;
2924
- if (lwb == NULL ) {
2925
- while ((lwb = list_remove_head (ilwbs ))
2926
- != NULL )
2927
- zil_lwb_write_issue (zilog , lwb );
2928
- zil_commit_writer_stall (zilog );
2929
- }
2926
+ if (lwb -> lwb_state == LWB_STATE_OPENED && !zilog -> zl_parallel ) {
2927
+ list_insert_tail (ilwbs , lwb );
2928
+ lwb = zil_lwb_write_close (zilog , lwb , LWB_STATE_NEW );
2929
+ zil_burst_done (zilog );
2930
+ if (lwb == NULL ) {
2931
+ while ((lwb = list_remove_head (ilwbs )) != NULL )
2932
+ zil_lwb_write_issue (zilog , lwb );
2933
+ zil_commit_writer_stall (zilog );
2930
2934
}
2931
2935
}
2932
2936
}
@@ -3084,19 +3088,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
3084
3088
3085
3089
ASSERT3S (lwb -> lwb_state , = = , LWB_STATE_CLOSED );
3086
3090
3087
- /*
3088
- * Since the lwb's zio hadn't been issued by the time this thread
3089
- * reached its timeout, we reset the zilog's "zl_cur_used" field
3090
- * to influence the zil block size selection algorithm.
3091
- *
3092
- * By having to issue the lwb's zio here, it means the size of the
3093
- * lwb was too large, given the incoming throughput of itxs. By
3094
- * setting "zl_cur_used" to zero, we communicate this fact to the
3095
- * block size selection algorithm, so it can take this information
3096
- * into account, and potentially select a smaller size for the
3097
- * next lwb block that is allocated.
3098
- */
3099
- zilog -> zl_cur_used = 0 ;
3091
+ zil_burst_done (zilog );
3100
3092
3101
3093
if (nlwb == NULL ) {
3102
3094
/*
@@ -4214,9 +4206,6 @@ EXPORT_SYMBOL(zil_kstat_values_update);
4214
4206
ZFS_MODULE_PARAM (zfs , zfs_ , commit_timeout_pct , UINT , ZMOD_RW ,
4215
4207
"ZIL block open timeout percentage" );
4216
4208
4217
- ZFS_MODULE_PARAM (zfs_zil , zil_ , min_commit_timeout , U64 , ZMOD_RW ,
4218
- "Minimum delay we care for ZIL block commit" );
4219
-
4220
4209
ZFS_MODULE_PARAM (zfs_zil , zil_ , replay_disable , INT , ZMOD_RW ,
4221
4210
"Disable intent logging replay" );
4222
4211
0 commit comments