Skip to content

panic: corrupted memory in l2arc #15506

Closed as not planned
Closed as not planned
@glebius

Description

@glebius

System information

FreeBSD 15-CURRENT @ d6e457328d0e
OpenZFS @ 41e55b476bcf
zfs-2.2.99-184-FreeBSD_g41e55b476
zfs-kmod-2.2.99-184-FreeBSD_g41e55b476

Describe the problem you're observing

Got kernel panic running kernel with INVARIANTS enabled. The panic happened during nightly job run, which induces find /. The filesystem has zvols on it and pool configuration has L2ARC.

#7  <signal handler called>
#8  0xffffffff8041b0a5 in buf_hash_find (spa=9365292975275091231, bp=0xfffffe025ed99650, lockp=0xfffffe025ed99598)
    at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/arc.c:1025
#9  0xffffffff804185eb in arc_read (pio=0xfffffe02b3910600, spa=0xfffffe022ed8c000, bp=0xfffffe025ed99650, 
    done=0xffffffff80462850 <dbuf_read_done>, private=0xfffff8001a0ca7f8, priority=ZIO_PRIORITY_SYNC_READ, zio_flags=128, 
    arc_flags=0xfffffe025ed996dc, zb=0xfffffe025ed996e0) at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/arc.c:5512
#10 0xffffffff804512d5 in dbuf_read_impl (db=0xfffff8001a0ca7f8, zio=0xfffffe02b3910600, flags=30, dblt=DLT_PARENT, tag=0xffffffff81177b83)
    at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/dbuf.c:1658
#11 0xffffffff8044f9bd in dbuf_read (db=0xfffff8001a0ca7f8, zio=0xfffffe02b3910600, flags=30)
    at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/dbuf.c:1817
#12 0xffffffff804731d8 in dmu_buf_hold_array_by_dnode (dn=0xfffff80a1cc287f0, offset=96128983040, length=32768, read=1, tag=0xffffffff8119becb, 
    numbufsp=0xfffffe025ed9990c, dbpp=0xfffffe025ed99910, flags=0) at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/dmu.c:598
#13 0xffffffff804744bb in dmu_read_impl (dn=0xfffff80a1cc287f0, offset=96128983040, size=32768, buf=0xfffffe00d472c200, flags=0)
    at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/dmu.c:1069
#14 0xffffffff80474369 in dmu_read (os=0xfffff80a1ff92000, object=1, offset=96128983040, size=32768, buf=0xfffffe00d472c200, flags=0)
    at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/dmu.c:1106
#15 0xffffffff803eba1a in zvol_geom_bio_strategy (bp=0xfffff800021b62f0)
    at /usr/src/FreeBSD/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:747
#16 0xffffffff803e907a in zvol_geom_bio_start (bp=0xfffff800021b62f0) at /usr/src/FreeBSD/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:576
#17 0xffffffff80b61e5d in g_io_request (bp=0xfffff800021b62f0, cp=0xfffff8000336b880) at /usr/src/FreeBSD/sys/geom/geom_io.c:587
#18 0xffffffff80b5ba12 in g_dev_strategy (bp=0xfffff804639922f0) at /usr/src/FreeBSD/sys/geom/geom_dev.c:803
#19 0xffffffff80c1d576 in physio (dev=0xfffff8000311fc00, uio=0xfffffe025ed99da0, ioflag=0) at /usr/src/FreeBSD/sys/kern/kern_physio.c:175
#20 0xffffffff80a5fd71 in devfs_read_f (fp=0xfffff8002d5c4730, uio=0xfffffe025ed99da0, cred=0xfffff80a202bca00, flags=1, td=0xfffff808d6d9a740)
    at /usr/src/FreeBSD/sys/fs/devfs/devfs_vnops.c:1415
#21 0xffffffff80cf2cdb in fo_read (fp=0xfffff8002d5c4730, uio=0xfffffe025ed99da0, active_cred=0xfffff80a202bca00, flags=1, td=0xfffff808d6d9a740)
    at /usr/src/FreeBSD/sys/sys/file.h:342
#22 0xffffffff80cee2c9 in dofileread (td=0xfffff808d6d9a740, fd=7, fp=0xfffff8002d5c4730, auio=0xfffffe025ed99da0, offset=96128983040, flags=1)
    at /usr/src/FreeBSD/sys/kern/sys_generic.c:367
#23 0xffffffff80cee124 in kern_preadv (td=0xfffff808d6d9a740, fd=7, auio=0xfffffe025ed99da0, offset=96128983040)
    at /usr/src/FreeBSD/sys/kern/sys_generic.c:333
#24 0xffffffff80cee049 in kern_pread (td=0xfffff808d6d9a740, fd=7, buf=0x3660cbe00200, nbyte=32768, offset=96128983040)
    at /usr/src/FreeBSD/sys/kern/sys_generic.c:242
#25 0xffffffff80cedfb7 in sys_pread (td=0xfffff808d6d9a740, uap=0xfffff808d6d9ab40) at /usr/src/FreeBSD/sys/kern/sys_generic.c:224
#26 0xffffffff8109ca74 in syscallenter (td=0xfffff808d6d9a740) at /usr/src/FreeBSD/sys/amd64/amd64/../../kern/subr_syscall.c:188
#27 0xffffffff8109c215 in amd64_syscall (td=0xfffff808d6d9a740, traced=0) at /usr/src/FreeBSD/sys/amd64/amd64/trap.c:1194
(kgdb) frame 8
#8  0xffffffff8041b0a5 in buf_hash_find (spa=9365292975275091231, bp=0xfffffe025ed99650, lockp=0xfffffe025ed99598)
    at /usr/src/FreeBSD/sys/contrib/openzfs/module/zfs/arc.c:1025
1025                    if (HDR_EQUAL(spa, dva, birth, hdr)) {

The second hdr in the chain is corrupted:

(kgdb) p *buf_hash_table.ht_table[idx]->b_hash_next
$103 = {b_dva = {dva_word = {18439988536828035042, 18440551503961587687}}, b_birth = 18440270024689582052, 
  b_type = (ARC_BUFC_NUMTYPES | unknown: 0xffe5ffe0), b_complevel = 227 '\343', b_reserved1 = 255 '\377', b_reserved2 = 65511, 
  b_hash_next = 0xffeaffe5ffe9ffe2, 
  b_flags = (ARC_FLAG_NOWAIT | ARC_FLAG_UNCACHED | ARC_FLAG_PRESCIENT_PREFETCH | ARC_FLAG_IN_HASH_TABLE | ARC_FLAG_IO_IN_PROGRESS | ARC_FLAG_IO_ERROR | ARC_FLAG_INDIRECT | ARC_FLAG_PRIO_ASYNC_READ | ARC_FLAG_L2_WRITING | ARC_FLAG_L2_EVICTED | ARC_FLAG_L2_WRITE_HEAD | ARC_FLAG_PROTECTED | ARC_FLAG_HAS_L2HDR | ARC_FLAG_SHARED_DATA | ARC_FLAG_CACHED_ONLY | ARC_FLAG_NO_BUF | ARC_FLAG_COMPRESS_0 | ARC_FLAG_COMPRESS_1 | ARC_FLAG_COMPRESS_2 | ARC_FLAG_COMPRESS_3 | ARC_FLAG_COMPRESS_4 | ARC_FLAG_COMPRESS_5 | ARC_FLAG_COMPRESS_6 | unknown: 0x80000000), b_psize = 65503, b_lsize = 65507, 
  b_spa = 18438299686967377885, b_l2hdr = {b_dev = 0xffe5ffe1ffdfffdd, b_daddr = 18441395950366097377, b_hits = 4293722085, 
    b_arcs_state = 4293722088, b_l2node = {list_next = 0xffebffe7ffebffe6, list_prev = 0xffe8ffe3ffebffe6}}, b_l1hdr = {

The memory points at a valid location in the hdr_l2only_cache zone, which is marked as allocated in the UMA slab metadata. It is 3rd item in the slab. It appears that first five entries in the slab are all trashed with similar pattern. Starting with sixth's entry the slab items aren't corrupted.

(kgdb) set $keg = hdr_l2only_cache->kc_zone->uz_keg
(kgdb) set $slabmem = (arc_buf_hdr_t *)((uintptr_t)buf_hash_table.ht_table[idx]->b_hash_next & ~4095)
(kgdb) p *(arc_buf_hdr_t *)((uintptr_t)$slabmem + $keg->uk_rsize * 4)
$111 = {b_dva = {dva_word = {18437455227677179862, 18438581149058924505}}, b_birth = 18440551508256030688, 
  b_type = (ARC_BUFC_NUMTYPES | unknown: 0xffebffe4), b_complevel = 230 '\346', b_reserved1 = 255 '\377', b_reserved2 = 65516, 
  b_hash_next = 0xfff0ffeafff2ffee, 
  b_flags = (ARC_FLAG_WAIT | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH | ARC_FLAG_UNCACHED | ARC_FLAG_PRESCIENT_PREFETCH | ARC_FLAG_IN_HASH_TABLE | ARC_FLAG_IO_IN_PROGRESS | ARC_FLAG_IO_ERROR | ARC_FLAG_INDIRECT | ARC_FLAG_PRIO_ASYNC_READ | ARC_FLAG_L2_WRITING | ARC_FLAG_L2_EVICTED | ARC_FLAG_L2_WRITE_HEAD | ARC_FLAG_PROTECTED | ARC_FLAG_HAS_L1HDR | ARC_FLAG_HAS_L2HDR | ARC_FLAG_SHARED_DATA | ARC_FLAG_CACHED_ONLY | ARC_FLAG_NO_BUF | ARC_FLAG_COMPRESS_0 | ARC_FLAG_COMPRESS_1 | ARC_FLAG_COMPRESS_2 | ARC_FLAG_COMPRESS_3 | ARC_FLAG_COMPRESS_4 | ARC_FLAG_COMPRESS_5 | ARC_FLAG_COMPRESS_6 | unknown: 0x80000000), b_psize = 65511, b_lsize = 65514, b_spa = 18439144116193001445, b_l2hdr = {b_dev = 0xffe1ffdaffe4ffde, 
    b_daddr = 18439988528237772766, b_hits = 4293394399, b_arcs_state = 4293722086, b_l2node = {list_next = 0xffedffe9ffebffe7, 
      list_prev = 0xffefffe8ffedffe9}}, b_l1hdr = 
(kgdb) p *(arc_buf_hdr_t *)((uintptr_t)$slabmem + $keg->uk_rsize * 5)
$112 = {b_dva = {dva_word = {18441114466800369646, 18439707079031193570}}, b_birth = 18439144116192477152, 
  b_type = (ARC_BUFC_METADATA | ARC_BUFC_NUMTYPES | unknown: 0xffe4ffdc), b_complevel = 230 '\346', b_reserved1 = 255 '\377', 
  b_reserved2 = 65509, b_hash_next = 0x0, 
  b_flags = (ARC_FLAG_IN_HASH_TABLE | ARC_FLAG_HAS_L2HDR | ARC_FLAG_COMPRESSED_ARC | ARC_FLAG_COMPRESS_1), b_psize = 16, b_lsize = 16, 
  b_spa = 9365292975275091231, b_l2hdr = {b_dev = 0xfffffe022fd35000, b_daddr = 172890644480, b_hits = 0, b_arcs_state = ARC_STATE_MRU, 
    b_l2node = {list_next = 0xfffff800400c01d0, list_prev = 0xfffff800400c0290}}, b_l1hdr =

I have core file saved and can provide more data.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type: DefectIncorrect behavior (e.g. crash, hang)

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions