Skip to content

Commit 514d661

Browse files
authored
Tune zio buffer caches and their alignments
We should not always use PAGESIZE alignment for caches bigger than it and SPA_MINBLOCKSIZE otherwise. Doing that caches for 5, 6, 7, 10 and 14KB rounded up to 8, 12 and 16KB respectively make no sense. Instead specify as alignment the biggest power-of-2 divisor. This way 2KB and 6KB caches are both aligned to 2KB, while 4KB and 8KB are aligned to 4KB. Reduce number of caches to half-power of 2 instead of quarter-power of 2. This removes caches difficult for underlying allocators to fit into page-granular slabs, such as: 2.5, 3.5, 5, 7, 10KB, etc. Since these caches are mostly used for transient allocations like ZIOs and small DBUF cache it does not worth being too aggressive. Due to the above alignment issue some of those caches were not working properly any way. 6KB cache now finally has a chance to work right, placing 2 buffers into 3 pages, that makes sense. Remove explicit alignment in Linux user-space case. I don't think it should be needed any more with the above fixes. As result on FreeBSD instead of such numbers of pages per slab: vm.uma.zio_buf_comb_16384.keg.ppera: 4 vm.uma.zio_buf_comb_14336.keg.ppera: 4 vm.uma.zio_buf_comb_12288.keg.ppera: 3 vm.uma.zio_buf_comb_10240.keg.ppera: 3 vm.uma.zio_buf_comb_8192.keg.ppera: 2 vm.uma.zio_buf_comb_7168.keg.ppera: 2 vm.uma.zio_buf_comb_6144.keg.ppera: 2 <= Broken vm.uma.zio_buf_comb_5120.keg.ppera: 2 vm.uma.zio_buf_comb_4096.keg.ppera: 1 vm.uma.zio_buf_comb_3584.keg.ppera: 7 <= Hard to free vm.uma.zio_buf_comb_3072.keg.ppera: 3 vm.uma.zio_buf_comb_2560.keg.ppera: 2 vm.uma.zio_buf_comb_2048.keg.ppera: 1 vm.uma.zio_buf_comb_1536.keg.ppera: 2 vm.uma.zio_buf_comb_1024.keg.ppera: 1 vm.uma.zio_buf_comb_512.keg.ppera: 1 I am now getting such: vm.uma.zio_buf_comb_16384.keg.ppera: 4 vm.uma.zio_buf_comb_12288.keg.ppera: 3 vm.uma.zio_buf_comb_8192.keg.ppera: 2 vm.uma.zio_buf_comb_6144.keg.ppera: 3 <= Fixed, 2 in 3 pages vm.uma.zio_buf_comb_4096.keg.ppera: 1 vm.uma.zio_buf_comb_3072.keg.ppera: 3 vm.uma.zio_buf_comb_2048.keg.ppera: 1 vm.uma.zio_buf_comb_1536.keg.ppera: 2 vm.uma.zio_buf_comb_1024.keg.ppera: 1 vm.uma.zio_buf_comb_512.keg.ppera: 1 Reviewed-by: Allan Jude <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #15452
1 parent 05a7348 commit 514d661

File tree

1 file changed

+39
-50
lines changed

1 file changed

+39
-50
lines changed

module/zfs/zio.c

Lines changed: 39 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -158,23 +158,22 @@ zio_init(void)
158158
zio_link_cache = kmem_cache_create("zio_link_cache",
159159
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
160160

161-
/*
162-
* For small buffers, we want a cache for each multiple of
163-
* SPA_MINBLOCKSIZE. For larger buffers, we want a cache
164-
* for each quarter-power of 2.
165-
*/
166161
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
167162
size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
168-
size_t p2 = size;
169-
size_t align = 0;
170-
size_t data_cflags, cflags;
171-
172-
data_cflags = KMC_NODEBUG;
173-
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
174-
KMC_NODEBUG : 0;
163+
size_t align, cflags, data_cflags;
164+
char name[32];
175165

166+
/*
167+
* Create cache for each half-power of 2 size, starting from
168+
* SPA_MINBLOCKSIZE. It should give us memory space efficiency
169+
* of ~7/8, sufficient for transient allocations mostly using
170+
* these caches.
171+
*/
172+
size_t p2 = size;
176173
while (!ISP2(p2))
177174
p2 &= p2 - 1;
175+
if (!IS_P2ALIGNED(size, p2 / 2))
176+
continue;
178177

179178
#ifndef _KERNEL
180179
/*
@@ -185,47 +184,37 @@ zio_init(void)
185184
*/
186185
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
187186
continue;
188-
/*
189-
* Here's the problem - on 4K native devices in userland on
190-
* Linux using O_DIRECT, buffers must be 4K aligned or I/O
191-
* will fail with EINVAL, causing zdb (and others) to coredump.
192-
* Since userland probably doesn't need optimized buffer caches,
193-
* we just force 4K alignment on everything.
194-
*/
195-
align = 8 * SPA_MINBLOCKSIZE;
196-
#else
197-
if (size < PAGESIZE) {
198-
align = SPA_MINBLOCKSIZE;
199-
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
200-
align = PAGESIZE;
201-
}
202187
#endif
203188

204-
if (align != 0) {
205-
char name[36];
206-
if (cflags == data_cflags) {
207-
/*
208-
* Resulting kmem caches would be identical.
209-
* Save memory by creating only one.
210-
*/
211-
(void) snprintf(name, sizeof (name),
212-
"zio_buf_comb_%lu", (ulong_t)size);
213-
zio_buf_cache[c] = kmem_cache_create(name,
214-
size, align, NULL, NULL, NULL, NULL, NULL,
215-
cflags);
216-
zio_data_buf_cache[c] = zio_buf_cache[c];
217-
continue;
218-
}
219-
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
220-
(ulong_t)size);
221-
zio_buf_cache[c] = kmem_cache_create(name, size,
222-
align, NULL, NULL, NULL, NULL, NULL, cflags);
223-
224-
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
225-
(ulong_t)size);
226-
zio_data_buf_cache[c] = kmem_cache_create(name, size,
227-
align, NULL, NULL, NULL, NULL, NULL, data_cflags);
189+
if (IS_P2ALIGNED(size, PAGESIZE))
190+
align = PAGESIZE;
191+
else
192+
align = 1 << (highbit64(size ^ (size - 1)) - 1);
193+
194+
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
195+
KMC_NODEBUG : 0;
196+
data_cflags = KMC_NODEBUG;
197+
if (cflags == data_cflags) {
198+
/*
199+
* Resulting kmem caches would be identical.
200+
* Save memory by creating only one.
201+
*/
202+
(void) snprintf(name, sizeof (name),
203+
"zio_buf_comb_%lu", (ulong_t)size);
204+
zio_buf_cache[c] = kmem_cache_create(name, size, align,
205+
NULL, NULL, NULL, NULL, NULL, cflags);
206+
zio_data_buf_cache[c] = zio_buf_cache[c];
207+
continue;
228208
}
209+
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
210+
(ulong_t)size);
211+
zio_buf_cache[c] = kmem_cache_create(name, size, align,
212+
NULL, NULL, NULL, NULL, NULL, cflags);
213+
214+
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
215+
(ulong_t)size);
216+
zio_data_buf_cache[c] = kmem_cache_create(name, size, align,
217+
NULL, NULL, NULL, NULL, NULL, data_cflags);
229218
}
230219

231220
while (--c != 0) {

0 commit comments

Comments
 (0)