Skip to content

Commit 75a2eb7

Browse files
amotinbehlendorf
authored andcommitted
ARC: Drop different size headers for crypto
To reduce memory usage ZFS crypto allocated bigger by 56 bytes ARC headers only when specific block was encrypted on disk. It was a nice optimization, except in some cases the code reallocated them on fly, that invalidated header pointers from the buffers. Since the buffers use different locking, it created number of races, that were originally covered (at least partially) by b_evict_lock, used also to protection evictions. But it has gone as part of #14340. As result, as was found in #15293, arc_hdr_realloc_crypt() ended up unprotected and causing use-after-free. Instead of introducing some even more elaborate locking, this patch just drops the difference between normal and protected headers. It cost us additional 56 bytes per header, but with couple patches saving 24 bytes, the net growth is only 32 bytes with total header size of 232 bytes on FreeBSD, that IMHO is acceptable price for simplicity. Additional locking would also end up consuming space, time or both. Reviewe-by: Brian Behlendorf <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Closes #15293 Closes #15347
1 parent 96b9cf4 commit 75a2eb7

File tree

1 file changed

+8
-175
lines changed

1 file changed

+8
-175
lines changed

module/zfs/arc.c

Lines changed: 8 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -748,8 +748,7 @@ taskq_t *arc_prune_taskq;
748748
* Other sizes
749749
*/
750750

751-
#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
752-
#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr))
751+
#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
753752
#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
754753

755754
/*
@@ -1113,7 +1112,6 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
11131112
*/
11141113

11151114
static kmem_cache_t *hdr_full_cache;
1116-
static kmem_cache_t *hdr_full_crypt_cache;
11171115
static kmem_cache_t *hdr_l2only_cache;
11181116
static kmem_cache_t *buf_cache;
11191117

@@ -1134,7 +1132,6 @@ buf_fini(void)
11341132
for (int i = 0; i < BUF_LOCKS; i++)
11351133
mutex_destroy(BUF_HASH_LOCK(i));
11361134
kmem_cache_destroy(hdr_full_cache);
1137-
kmem_cache_destroy(hdr_full_crypt_cache);
11381135
kmem_cache_destroy(hdr_l2only_cache);
11391136
kmem_cache_destroy(buf_cache);
11401137
}
@@ -1162,19 +1159,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
11621159
return (0);
11631160
}
11641161

1165-
static int
1166-
hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
1167-
{
1168-
(void) unused;
1169-
arc_buf_hdr_t *hdr = vbuf;
1170-
1171-
hdr_full_cons(vbuf, unused, kmflag);
1172-
memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr));
1173-
arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
1174-
1175-
return (0);
1176-
}
1177-
11781162
static int
11791163
hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
11801164
{
@@ -1218,16 +1202,6 @@ hdr_full_dest(void *vbuf, void *unused)
12181202
arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
12191203
}
12201204

1221-
static void
1222-
hdr_full_crypt_dest(void *vbuf, void *unused)
1223-
{
1224-
(void) vbuf, (void) unused;
1225-
1226-
hdr_full_dest(vbuf, unused);
1227-
arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr),
1228-
ARC_SPACE_HDRS);
1229-
}
1230-
12311205
static void
12321206
hdr_l2only_dest(void *vbuf, void *unused)
12331207
{
@@ -1283,9 +1257,6 @@ buf_init(void)
12831257

12841258
hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
12851259
0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
1286-
hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
1287-
HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
1288-
NULL, NULL, NULL, 0);
12891260
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
12901261
HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
12911262
NULL, NULL, 0);
@@ -3273,11 +3244,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
32733244
arc_buf_hdr_t *hdr;
32743245

32753246
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
3276-
if (protected) {
3277-
hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
3278-
} else {
3279-
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
3280-
}
3247+
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
32813248

32823249
ASSERT(HDR_EMPTY(hdr));
32833250
#ifdef ZFS_DEBUG
@@ -3325,24 +3292,14 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
33253292
ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
33263293
(old == hdr_l2only_cache && new == hdr_full_cache));
33273294

3328-
/*
3329-
* if the caller wanted a new full header and the header is to be
3330-
* encrypted we will actually allocate the header from the full crypt
3331-
* cache instead. The same applies to freeing from the old cache.
3332-
*/
3333-
if (HDR_PROTECTED(hdr) && new == hdr_full_cache)
3334-
new = hdr_full_crypt_cache;
3335-
if (HDR_PROTECTED(hdr) && old == hdr_full_cache)
3336-
old = hdr_full_crypt_cache;
3337-
33383295
nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
33393296

33403297
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
33413298
buf_hash_remove(hdr);
33423299

33433300
memcpy(nhdr, hdr, HDR_L2ONLY_SIZE);
33443301

3345-
if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
3302+
if (new == hdr_full_cache) {
33463303
arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
33473304
/*
33483305
* arc_access and arc_change_state need to be aware that a
@@ -3421,123 +3378,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
34213378
return (nhdr);
34223379
}
34233380

3424-
/*
3425-
* This function allows an L1 header to be reallocated as a crypt
3426-
* header and vice versa. If we are going to a crypt header, the
3427-
* new fields will be zeroed out.
3428-
*/
3429-
static arc_buf_hdr_t *
3430-
arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
3431-
{
3432-
arc_buf_hdr_t *nhdr;
3433-
arc_buf_t *buf;
3434-
kmem_cache_t *ncache, *ocache;
3435-
3436-
/*
3437-
* This function requires that hdr is in the arc_anon state.
3438-
* Therefore it won't have any L2ARC data for us to worry
3439-
* about copying.
3440-
*/
3441-
ASSERT(HDR_HAS_L1HDR(hdr));
3442-
ASSERT(!HDR_HAS_L2HDR(hdr));
3443-
ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt);
3444-
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
3445-
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
3446-
ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node));
3447-
ASSERT3P(hdr->b_hash_next, ==, NULL);
3448-
3449-
if (need_crypt) {
3450-
ncache = hdr_full_crypt_cache;
3451-
ocache = hdr_full_cache;
3452-
} else {
3453-
ncache = hdr_full_cache;
3454-
ocache = hdr_full_crypt_cache;
3455-
}
3456-
3457-
nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
3458-
3459-
/*
3460-
* Copy all members that aren't locks or condvars to the new header.
3461-
* No lists are pointing to us (as we asserted above), so we don't
3462-
* need to worry about the list nodes.
3463-
*/
3464-
nhdr->b_dva = hdr->b_dva;
3465-
nhdr->b_birth = hdr->b_birth;
3466-
nhdr->b_type = hdr->b_type;
3467-
nhdr->b_flags = hdr->b_flags;
3468-
nhdr->b_psize = hdr->b_psize;
3469-
nhdr->b_lsize = hdr->b_lsize;
3470-
nhdr->b_spa = hdr->b_spa;
3471-
#ifdef ZFS_DEBUG
3472-
nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum;
3473-
#endif
3474-
nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap;
3475-
nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state;
3476-
nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access;
3477-
nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits;
3478-
nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
3479-
nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
3480-
nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
3481-
nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
3482-
nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
3483-
3484-
/*
3485-
* This zfs_refcount_add() exists only to ensure that the individual
3486-
* arc buffers always point to a header that is referenced, avoiding
3487-
* a small race condition that could trigger ASSERTs.
3488-
*/
3489-
(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG);
3490-
nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf;
3491-
for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next)
3492-
buf->b_hdr = nhdr;
3493-
3494-
zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt);
3495-
(void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG);
3496-
ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
3497-
3498-
if (need_crypt) {
3499-
arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED);
3500-
} else {
3501-
arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED);
3502-
}
3503-
3504-
/* unset all members of the original hdr */
3505-
memset(&hdr->b_dva, 0, sizeof (dva_t));
3506-
hdr->b_birth = 0;
3507-
hdr->b_type = 0;
3508-
hdr->b_flags = 0;
3509-
hdr->b_psize = 0;
3510-
hdr->b_lsize = 0;
3511-
hdr->b_spa = 0;
3512-
#ifdef ZFS_DEBUG
3513-
hdr->b_l1hdr.b_freeze_cksum = NULL;
3514-
#endif
3515-
hdr->b_l1hdr.b_buf = NULL;
3516-
hdr->b_l1hdr.b_byteswap = 0;
3517-
hdr->b_l1hdr.b_state = NULL;
3518-
hdr->b_l1hdr.b_arc_access = 0;
3519-
hdr->b_l1hdr.b_mru_hits = 0;
3520-
hdr->b_l1hdr.b_mru_ghost_hits = 0;
3521-
hdr->b_l1hdr.b_mfu_hits = 0;
3522-
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
3523-
hdr->b_l1hdr.b_acb = NULL;
3524-
hdr->b_l1hdr.b_pabd = NULL;
3525-
3526-
if (ocache == hdr_full_crypt_cache) {
3527-
ASSERT(!HDR_HAS_RABD(hdr));
3528-
hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
3529-
hdr->b_crypt_hdr.b_dsobj = 0;
3530-
memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN);
3531-
memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN);
3532-
memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN);
3533-
}
3534-
3535-
buf_discard_identity(hdr);
3536-
kmem_cache_free(ocache, hdr);
3537-
3538-
return (nhdr);
3539-
}
3540-
35413381
/*
35423382
* This function is used by the send / receive code to convert a newly
35433383
* allocated arc_buf_t to one that is suitable for a raw encrypted write. It
@@ -3557,8 +3397,7 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
35573397
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
35583398

35593399
buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED);
3560-
if (!HDR_PROTECTED(hdr))
3561-
hdr = arc_hdr_realloc_crypt(hdr, B_TRUE);
3400+
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
35623401
hdr->b_crypt_hdr.b_dsobj = dsobj;
35633402
hdr->b_crypt_hdr.b_ot = ot;
35643403
hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
@@ -3822,12 +3661,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
38223661
#ifdef ZFS_DEBUG
38233662
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
38243663
#endif
3825-
3826-
if (!HDR_PROTECTED(hdr)) {
3827-
kmem_cache_free(hdr_full_cache, hdr);
3828-
} else {
3829-
kmem_cache_free(hdr_full_crypt_cache, hdr);
3830-
}
3664+
kmem_cache_free(hdr_full_cache, hdr);
38313665
} else {
38323666
kmem_cache_free(hdr_l2only_cache, hdr);
38333667
}
@@ -6525,13 +6359,9 @@ arc_write_ready(zio_t *zio)
65256359
add_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
65266360
}
65276361

6528-
if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr))
6529-
hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp));
6530-
65316362
if (BP_IS_PROTECTED(bp)) {
65326363
/* ZIL blocks are written through zio_rewrite */
65336364
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
6534-
ASSERT(HDR_PROTECTED(hdr));
65356365

65366366
if (BP_SHOULD_BYTESWAP(bp)) {
65376367
if (BP_GET_LEVEL(bp) > 0) {
@@ -6544,11 +6374,14 @@ arc_write_ready(zio_t *zio)
65446374
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
65456375
}
65466376

6377+
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
65476378
hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
65486379
hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
65496380
zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
65506381
hdr->b_crypt_hdr.b_iv);
65516382
zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
6383+
} else {
6384+
arc_hdr_clear_flags(hdr, ARC_FLAG_PROTECTED);
65526385
}
65536386

65546387
/*

0 commit comments

Comments
 (0)