Skip to content

Commit 32dc05a

Browse files
committed
ARC: Drop different size headers for crypto
To reduce memory usage ZFS crypto allocated bigger by 64 bytes ARC headers only when specific block was encrypted on disk. It was a nice optimization, except in some cases the code reallocated them on fly, that invalidated header pointers from the buffers. Since the buffers use different locking, it created number of races, that were originally covered (at least partially) by b_evict_lock, used also to protection evictions. But it has gone as part of #14340. As result, as was found in #15293, arc_hdr_realloc_crypt() ended up unprotected and causing use-after-free. Instead of introducing some even more elaborate locking, this patch just drops the difference between normal and protected headers. It cost us additional 64 bytes per header, but with couple patches saving 24 bytes, the net growth is only 40 bytes with total header size of 240 bytes on FreeBSD, that IMHO is acceptable price for simplicity. Additional locking would also end up consuming space, time or both. Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc.
1 parent 96f5dea commit 32dc05a

File tree

1 file changed

+8
-176
lines changed

1 file changed

+8
-176
lines changed

module/zfs/arc.c

Lines changed: 8 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -748,8 +748,7 @@ taskq_t *arc_prune_taskq;
748748
* Other sizes
749749
*/
750750

751-
#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
752-
#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr))
751+
#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
753752
#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
754753

755754
/*
@@ -1113,7 +1112,6 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
11131112
*/
11141113

11151114
static kmem_cache_t *hdr_full_cache;
1116-
static kmem_cache_t *hdr_full_crypt_cache;
11171115
static kmem_cache_t *hdr_l2only_cache;
11181116
static kmem_cache_t *buf_cache;
11191117

@@ -1134,7 +1132,6 @@ buf_fini(void)
11341132
for (int i = 0; i < BUF_LOCKS; i++)
11351133
mutex_destroy(BUF_HASH_LOCK(i));
11361134
kmem_cache_destroy(hdr_full_cache);
1137-
kmem_cache_destroy(hdr_full_crypt_cache);
11381135
kmem_cache_destroy(hdr_l2only_cache);
11391136
kmem_cache_destroy(buf_cache);
11401137
}
@@ -1162,19 +1159,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
11621159
return (0);
11631160
}
11641161

1165-
static int
1166-
hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
1167-
{
1168-
(void) unused;
1169-
arc_buf_hdr_t *hdr = vbuf;
1170-
1171-
hdr_full_cons(vbuf, unused, kmflag);
1172-
memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr));
1173-
arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
1174-
1175-
return (0);
1176-
}
1177-
11781162
static int
11791163
hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
11801164
{
@@ -1218,16 +1202,6 @@ hdr_full_dest(void *vbuf, void *unused)
12181202
arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
12191203
}
12201204

1221-
static void
1222-
hdr_full_crypt_dest(void *vbuf, void *unused)
1223-
{
1224-
(void) vbuf, (void) unused;
1225-
1226-
hdr_full_dest(vbuf, unused);
1227-
arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr),
1228-
ARC_SPACE_HDRS);
1229-
}
1230-
12311205
static void
12321206
hdr_l2only_dest(void *vbuf, void *unused)
12331207
{
@@ -1283,9 +1257,6 @@ buf_init(void)
12831257

12841258
hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
12851259
0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
1286-
hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
1287-
HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
1288-
NULL, NULL, NULL, 0);
12891260
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
12901261
HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
12911262
NULL, NULL, 0);
@@ -3275,11 +3246,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
32753246
arc_buf_hdr_t *hdr;
32763247

32773248
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
3278-
if (protected) {
3279-
hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
3280-
} else {
3281-
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
3282-
}
3249+
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
32833250

32843251
ASSERT(HDR_EMPTY(hdr));
32853252
#ifdef ZFS_DEBUG
@@ -3327,24 +3294,14 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
33273294
ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
33283295
(old == hdr_l2only_cache && new == hdr_full_cache));
33293296

3330-
/*
3331-
* if the caller wanted a new full header and the header is to be
3332-
* encrypted we will actually allocate the header from the full crypt
3333-
* cache instead. The same applies to freeing from the old cache.
3334-
*/
3335-
if (HDR_PROTECTED(hdr) && new == hdr_full_cache)
3336-
new = hdr_full_crypt_cache;
3337-
if (HDR_PROTECTED(hdr) && old == hdr_full_cache)
3338-
old = hdr_full_crypt_cache;
3339-
33403297
nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
33413298

33423299
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
33433300
buf_hash_remove(hdr);
33443301

33453302
memcpy(nhdr, hdr, HDR_L2ONLY_SIZE);
33463303

3347-
if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
3304+
if (new == hdr_full_cache) {
33483305
arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
33493306
/*
33503307
* arc_access and arc_change_state need to be aware that a
@@ -3423,124 +3380,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
34233380
return (nhdr);
34243381
}
34253382

3426-
/*
3427-
* This function allows an L1 header to be reallocated as a crypt
3428-
* header and vice versa. If we are going to a crypt header, the
3429-
* new fields will be zeroed out.
3430-
*/
3431-
static arc_buf_hdr_t *
3432-
arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
3433-
{
3434-
arc_buf_hdr_t *nhdr;
3435-
arc_buf_t *buf;
3436-
kmem_cache_t *ncache, *ocache;
3437-
3438-
/*
3439-
* This function requires that hdr is in the arc_anon state.
3440-
* Therefore it won't have any L2ARC data for us to worry
3441-
* about copying.
3442-
*/
3443-
ASSERT(HDR_HAS_L1HDR(hdr));
3444-
ASSERT(!HDR_HAS_L2HDR(hdr));
3445-
ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt);
3446-
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
3447-
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
3448-
ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node));
3449-
ASSERT3P(hdr->b_hash_next, ==, NULL);
3450-
3451-
if (need_crypt) {
3452-
ncache = hdr_full_crypt_cache;
3453-
ocache = hdr_full_cache;
3454-
} else {
3455-
ncache = hdr_full_cache;
3456-
ocache = hdr_full_crypt_cache;
3457-
}
3458-
3459-
nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
3460-
3461-
/*
3462-
* Copy all members that aren't locks or condvars to the new header.
3463-
* No lists are pointing to us (as we asserted above), so we don't
3464-
* need to worry about the list nodes.
3465-
*/
3466-
nhdr->b_dva = hdr->b_dva;
3467-
nhdr->b_birth = hdr->b_birth;
3468-
nhdr->b_type = hdr->b_type;
3469-
nhdr->b_flags = hdr->b_flags;
3470-
nhdr->b_psize = hdr->b_psize;
3471-
nhdr->b_lsize = hdr->b_lsize;
3472-
nhdr->b_spa = hdr->b_spa;
3473-
#ifdef ZFS_DEBUG
3474-
nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum;
3475-
#endif
3476-
nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap;
3477-
nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state;
3478-
nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access;
3479-
nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits;
3480-
nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
3481-
nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
3482-
nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
3483-
nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
3484-
nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
3485-
3486-
/*
3487-
* This zfs_refcount_add() exists only to ensure that the individual
3488-
* arc buffers always point to a header that is referenced, avoiding
3489-
* a small race condition that could trigger ASSERTs.
3490-
*/
3491-
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG);
3492-
nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf;
3493-
for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next)
3494-
buf->b_hdr = nhdr;
3495-
3496-
zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt);
3497-
(void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG);
3498-
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
3499-
3500-
if (need_crypt) {
3501-
arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED);
3502-
} else {
3503-
arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED);
3504-
}
3505-
3506-
/* unset all members of the original hdr */
3507-
memset(&hdr->b_dva, 0, sizeof (dva_t));
3508-
hdr->b_birth = 0;
3509-
hdr->b_type = 0;
3510-
hdr->b_flags = 0;
3511-
hdr->b_psize = 0;
3512-
hdr->b_lsize = 0;
3513-
hdr->b_spa = 0;
3514-
#ifdef ZFS_DEBUG
3515-
hdr->b_l1hdr.b_freeze_cksum = NULL;
3516-
#endif
3517-
hdr->b_l1hdr.b_buf = NULL;
3518-
hdr->b_l1hdr.b_byteswap = 0;
3519-
hdr->b_l1hdr.b_state = NULL;
3520-
hdr->b_l1hdr.b_arc_access = 0;
3521-
hdr->b_l1hdr.b_mru_hits = 0;
3522-
hdr->b_l1hdr.b_mru_ghost_hits = 0;
3523-
hdr->b_l1hdr.b_mfu_hits = 0;
3524-
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
3525-
hdr->b_l1hdr.b_acb = NULL;
3526-
hdr->b_l1hdr.b_pabd = NULL;
3527-
3528-
if (ocache == hdr_full_crypt_cache) {
3529-
ASSERT(!HDR_HAS_RABD(hdr));
3530-
hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
3531-
hdr->b_crypt_hdr.b_ebufcnt = 0;
3532-
hdr->b_crypt_hdr.b_dsobj = 0;
3533-
memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN);
3534-
memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN);
3535-
memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN);
3536-
}
3537-
3538-
buf_discard_identity(hdr);
3539-
kmem_cache_free(ocache, hdr);
3540-
3541-
return (nhdr);
3542-
}
3543-
35443383
/*
35453384
* This function is used by the send / receive code to convert a newly
35463385
* allocated arc_buf_t to one that is suitable for a raw encrypted write. It
@@ -3560,8 +3399,7 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
35603399
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
35613400

35623401
buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED);
3563-
if (!HDR_PROTECTED(hdr))
3564-
hdr = arc_hdr_realloc_crypt(hdr, B_TRUE);
3402+
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
35653403
hdr->b_crypt_hdr.b_dsobj = dsobj;
35663404
hdr->b_crypt_hdr.b_ot = ot;
35673405
hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
@@ -3825,12 +3663,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
38253663
#ifdef ZFS_DEBUG
38263664
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
38273665
#endif
3828-
3829-
if (!HDR_PROTECTED(hdr)) {
3830-
kmem_cache_free(hdr_full_cache, hdr);
3831-
} else {
3832-
kmem_cache_free(hdr_full_crypt_cache, hdr);
3833-
}
3666+
kmem_cache_free(hdr_full_cache, hdr);
38343667
} else {
38353668
kmem_cache_free(hdr_l2only_cache, hdr);
38363669
}
@@ -6533,13 +6366,9 @@ arc_write_ready(zio_t *zio)
65336366
add_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
65346367
}
65356368

6536-
if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr))
6537-
hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp));
6538-
65396369
if (BP_IS_PROTECTED(bp)) {
65406370
/* ZIL blocks are written through zio_rewrite */
65416371
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
6542-
ASSERT(HDR_PROTECTED(hdr));
65436372

65446373
if (BP_SHOULD_BYTESWAP(bp)) {
65456374
if (BP_GET_LEVEL(bp) > 0) {
@@ -6552,11 +6381,14 @@ arc_write_ready(zio_t *zio)
65526381
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
65536382
}
65546383

6384+
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
65556385
hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
65566386
hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
65576387
zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
65586388
hdr->b_crypt_hdr.b_iv);
65596389
zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
6390+
} else {
6391+
arc_hdr_clear_flags(hdr, ARC_FLAG_PROTECTED);
65606392
}
65616393

65626394
/*

0 commit comments

Comments
 (0)