78
78
*/
79
79
80
80
/*
81
- * We will borrow aggsum_borrow_multiplier times the current request, so we will
82
- * have to get the as_lock approximately every aggsum_borrow_multiplier calls to
83
- * aggsum_delta ().
81
+ * We will borrow 2^aggsum_borrow_shift times the current request, so we will
82
+ * have to get the as_lock approximately every 2^aggsum_borrow_shift calls to
83
+ * aggsum_add ().
84
84
*/
85
- static uint_t aggsum_borrow_multiplier = 10 ;
85
+ static uint_t aggsum_borrow_shift = 4 ;
86
86
87
87
void
88
88
aggsum_init (aggsum_t * as , uint64_t value )
89
89
{
90
90
bzero (as , sizeof (* as ));
91
91
as -> as_lower_bound = as -> as_upper_bound = value ;
92
92
mutex_init (& as -> as_lock , NULL , MUTEX_DEFAULT , NULL );
93
- as -> as_numbuckets = boot_ncpus ;
94
- as -> as_buckets = kmem_zalloc (boot_ncpus * sizeof (aggsum_bucket_t ),
95
- KM_SLEEP );
93
+ /*
94
+ * Too many buckets may hurt read performance without improving
95
+ * write. From 12 CPUs use bucket per 2 CPUs, from 48 per 4, etc.
96
+ */
97
+ as -> as_bucketshift = highbit64 (boot_ncpus / 6 ) / 2 ;
98
+ as -> as_numbuckets = ((boot_ncpus - 1 ) >> as -> as_bucketshift ) + 1 ;
99
+ as -> as_buckets = kmem_zalloc (as -> as_numbuckets *
100
+ sizeof (aggsum_bucket_t ), KM_SLEEP );
96
101
for (int i = 0 ; i < as -> as_numbuckets ; i ++ ) {
97
102
mutex_init (& as -> as_buckets [i ].asc_lock ,
98
103
NULL , MUTEX_DEFAULT , NULL );
@@ -111,59 +116,49 @@ aggsum_fini(aggsum_t *as)
111
116
int64_t
112
117
aggsum_lower_bound (aggsum_t * as )
113
118
{
114
- return (as -> as_lower_bound );
119
+ return (atomic_load_64 ( & as -> as_lower_bound ) );
115
120
}
116
121
117
- int64_t
122
+ uint64_t
118
123
aggsum_upper_bound (aggsum_t * as )
119
124
{
120
- return (as -> as_upper_bound );
121
- }
122
-
123
- static void
124
- aggsum_flush_bucket (aggsum_t * as , struct aggsum_bucket * asb )
125
- {
126
- ASSERT (MUTEX_HELD (& as -> as_lock ));
127
- ASSERT (MUTEX_HELD (& asb -> asc_lock ));
128
-
129
- /*
130
- * We use atomic instructions for this because we read the upper and
131
- * lower bounds without the lock, so we need stores to be atomic.
132
- */
133
- atomic_add_64 ((volatile uint64_t * )& as -> as_lower_bound ,
134
- asb -> asc_delta + asb -> asc_borrowed );
135
- atomic_add_64 ((volatile uint64_t * )& as -> as_upper_bound ,
136
- asb -> asc_delta - asb -> asc_borrowed );
137
- asb -> asc_delta = 0 ;
138
- asb -> asc_borrowed = 0 ;
125
+ return (atomic_load_64 (& as -> as_upper_bound ));
139
126
}
140
127
141
128
uint64_t
142
129
aggsum_value (aggsum_t * as )
143
130
{
144
- int64_t rv ;
131
+ int64_t lb ;
132
+ uint64_t ub ;
145
133
146
134
mutex_enter (& as -> as_lock );
147
- if (as -> as_lower_bound == as -> as_upper_bound ) {
148
- rv = as -> as_lower_bound ;
135
+ lb = as -> as_lower_bound ;
136
+ ub = as -> as_upper_bound ;
137
+ if (lb == ub ) {
149
138
for (int i = 0 ; i < as -> as_numbuckets ; i ++ ) {
150
139
ASSERT0 (as -> as_buckets [i ].asc_delta );
151
140
ASSERT0 (as -> as_buckets [i ].asc_borrowed );
152
141
}
153
142
mutex_exit (& as -> as_lock );
154
- return (rv );
143
+ return (lb );
155
144
}
156
145
for (int i = 0 ; i < as -> as_numbuckets ; i ++ ) {
157
146
struct aggsum_bucket * asb = & as -> as_buckets [i ];
147
+ if (asb -> asc_borrowed == 0 )
148
+ continue ;
158
149
mutex_enter (& asb -> asc_lock );
159
- aggsum_flush_bucket (as , asb );
150
+ lb += asb -> asc_delta + asb -> asc_borrowed ;
151
+ ub += asb -> asc_delta - asb -> asc_borrowed ;
152
+ asb -> asc_delta = 0 ;
153
+ asb -> asc_borrowed = 0 ;
160
154
mutex_exit (& asb -> asc_lock );
161
155
}
162
- VERIFY3U (as -> as_lower_bound , = = , as -> as_upper_bound );
163
- rv = as -> as_lower_bound ;
156
+ ASSERT3U (lb , = = , ub );
157
+ atomic_store_64 (& as -> as_lower_bound , lb );
158
+ atomic_store_64 (& as -> as_upper_bound , lb );
164
159
mutex_exit (& as -> as_lock );
165
160
166
- return (rv );
161
+ return (lb );
167
162
}
168
163
169
164
void
@@ -172,7 +167,8 @@ aggsum_add(aggsum_t *as, int64_t delta)
172
167
struct aggsum_bucket * asb ;
173
168
int64_t borrow ;
174
169
175
- asb = & as -> as_buckets [CPU_SEQID_UNSTABLE % as -> as_numbuckets ];
170
+ asb = & as -> as_buckets [(CPU_SEQID_UNSTABLE >> as -> as_bucketshift ) %
171
+ as -> as_numbuckets ];
176
172
177
173
/* Try fast path if we already borrowed enough before. */
178
174
mutex_enter (& asb -> asc_lock );
@@ -188,21 +184,22 @@ aggsum_add(aggsum_t *as, int64_t delta)
188
184
* We haven't borrowed enough. Take the global lock and borrow
189
185
* considering what is requested now and what we borrowed before.
190
186
*/
191
- borrow = (delta < 0 ? - delta : delta ) * aggsum_borrow_multiplier ;
187
+ borrow = (delta < 0 ? - delta : delta );
188
+ borrow <<= aggsum_borrow_shift + as -> as_bucketshift ;
192
189
mutex_enter (& as -> as_lock );
193
- mutex_enter (& asb -> asc_lock );
194
- delta += asb -> asc_delta ;
195
- asb -> asc_delta = 0 ;
196
190
if (borrow >= asb -> asc_borrowed )
197
191
borrow -= asb -> asc_borrowed ;
198
192
else
199
193
borrow = (borrow - (int64_t )asb -> asc_borrowed ) / 4 ;
194
+ mutex_enter (& asb -> asc_lock );
195
+ delta += asb -> asc_delta ;
196
+ asb -> asc_delta = 0 ;
200
197
asb -> asc_borrowed += borrow ;
201
- atomic_add_64 ((volatile uint64_t * )& as -> as_lower_bound ,
198
+ mutex_exit (& asb -> asc_lock );
199
+ atomic_store_64 (& as -> as_lower_bound , as -> as_lower_bound +
202
200
delta - borrow );
203
- atomic_add_64 (( volatile uint64_t * ) & as -> as_upper_bound ,
201
+ atomic_store_64 ( & as -> as_upper_bound , as -> as_upper_bound +
204
202
delta + borrow );
205
- mutex_exit (& asb -> asc_lock );
206
203
mutex_exit (& as -> as_lock );
207
204
}
208
205
@@ -214,27 +211,37 @@ aggsum_add(aggsum_t *as, int64_t delta)
214
211
int
215
212
aggsum_compare (aggsum_t * as , uint64_t target )
216
213
{
217
- if (as -> as_upper_bound < target )
214
+ int64_t lb ;
215
+ uint64_t ub ;
216
+ int i ;
217
+
218
+ if (atomic_load_64 (& as -> as_upper_bound ) < target )
218
219
return (-1 );
219
- if (as -> as_lower_bound > target )
220
+ lb = atomic_load_64 (& as -> as_lower_bound );
221
+ if (lb > 0 && (uint64_t )lb > target )
220
222
return (1 );
221
223
mutex_enter (& as -> as_lock );
222
- for (int i = 0 ; i < as -> as_numbuckets ; i ++ ) {
224
+ lb = as -> as_lower_bound ;
225
+ ub = as -> as_upper_bound ;
226
+ for (i = 0 ; i < as -> as_numbuckets ; i ++ ) {
223
227
struct aggsum_bucket * asb = & as -> as_buckets [i ];
228
+ if (asb -> asc_borrowed == 0 )
229
+ continue ;
224
230
mutex_enter (& asb -> asc_lock );
225
- aggsum_flush_bucket (as , asb );
231
+ lb += asb -> asc_delta + asb -> asc_borrowed ;
232
+ ub += asb -> asc_delta - asb -> asc_borrowed ;
233
+ asb -> asc_delta = 0 ;
234
+ asb -> asc_borrowed = 0 ;
226
235
mutex_exit (& asb -> asc_lock );
227
- if (as -> as_upper_bound < target ) {
228
- mutex_exit (& as -> as_lock );
229
- return (-1 );
230
- }
231
- if (as -> as_lower_bound > target ) {
232
- mutex_exit (& as -> as_lock );
233
- return (1 );
234
- }
236
+ if (ub < target || (lb > 0 && (uint64_t )lb > target ))
237
+ break ;
238
+ }
239
+ if (i >= as -> as_numbuckets ) {
240
+ ASSERT3U (lb , = = , ub );
241
+ ASSERT3U (lb , = = , target );
235
242
}
236
- VERIFY3U ( as -> as_lower_bound , = = , as -> as_upper_bound );
237
- ASSERT3U ( as -> as_lower_bound , = = , target );
243
+ atomic_store_64 ( & as -> as_lower_bound , lb );
244
+ atomic_store_64 ( & as -> as_upper_bound , ub );
238
245
mutex_exit (& as -> as_lock );
239
- return (0 );
246
+ return (ub < target ? -1 : ( uint64_t ) lb > target ? 1 : 0 );
240
247
}
0 commit comments