Description
System information
Type | Version/Name |
---|---|
Distribution Name | ubuntu |
Distribution Version | 22.4 |
Kernel Version | ubuntu 5.15.0-1033-raspi |
Architecture | aarch64 aarch64 aarch64 GNU/Linux |
OpenZFS Version | zfs-2.2.99-1, zfs-kmod-2.1.5-1ubuntu6~22.04.1 |
Describe the problem you're observing
Background:
I have a 4disk raid z2 pool that had one disk failed. As I can't import the pool any longer ( crashes on import on macos, freebsd, spews out errors on linux ). Importing it with readonly works, but anything else but using zpool status will crash the machine (freebsd,macos) or hangs zfs (linux). So now I'm in the process of getting my data back with zdb when I encountered this bug.
root@ubuntu:/home/ubuntu# zpool status
pool: ocean
state: DEGRADED
status: One or more devices is currently being resilvered. The pool will
continue to function, possibly in a degraded state.
action: Wait for the resilver to complete.
scan: resilver in progress since Sat Jul 1 10:01:24 2023
0B / 11.6T scanned, 0B / 11.6T issued
0B resilvered, 0.00% done, no estimated completion time
config:
NAME STATE READ WRITE CKSUM
ocean DEGRADED 0 0 0
raidz2-0 DEGRADED 0 0 0
18276338438250889807 FAULTED 0 0 0 was /dev/sdd2
sdc1 ONLINE 0 0 0
sdb2 ONLINE 0 0 0
sdd2 ONLINE 0 0 0
errors: 3962 data errors, use '-v' for a list
root@ubuntu:/home/ubuntu# zdb -u -e ocean
Uberblock:
magic = 0000000000bab10c
version = 5000
txg = 18360509
guid_sum = 15765822683839940218
timestamp = 1688241822 UTC = Sat Jul 1 20:03:42 2023
mmp_magic = 00000000a11cea11
mmp_delay = 0
mmp_valid = 0
checkpoint_txg = 0
root@ubuntu:/home/ubuntu# zdb -dP -e ocean
Dataset mos [META], ID 0, cr_txg 4, 1092430080, 6633 objects
Dataset ocean/tmtinkerbell@2023-06-24T00:00:01 [ZPL], ID 5114, cr_txg 18302756, 1111802459136, 24930 objects
...
Dataset ocean/movies [ZPL], ID 772, cr_txg 1430, 4065675030144, 9979 objects
...
dmu_bonus_hold(os, object, dl, &dl->dl_dbuf) == 0 (0x34 == 0)
ASSERT at module/zfs/dsl_deadlist.c:308:dsl_deadlist_open()Aborted (core dumped)
root@ubuntu:/home/ubuntu# zdb -dd -e ocean/movies
Dataset ocean/movies [ZPL], ID 772, cr_txg 1430, 3.70T, 9979 objects
ZIL header: claim_txg 18360433, claim_blk_seq 1, claim_lr_seq 0 replay_seq 0, flags 0x2
Object lvl iblk dblk dsize dnsize lsize %full type
0 6 128K 16K 14.7M 512 96.9M 5.03 DMU dnode
-1 1 128K 512 11.5K 512 512 100.00 ZFS user/group/project used
-2 1 128K 512 11.5K 512 512 100.00 ZFS user/group/project used
1 1 128K 1K 11.5K 512 1K 100.00 ZFS master node
2 1 128K 512 11.5K 512 512 100.00 SA master node
3 1 128K 21.5K 11.5K 512 21.5K 100.00 ZFS delete queue
4 2 128K 16K 35K 512 32K 100.00 ZFS directory
5 1 128K 2K 11.5K 512 2K 100.00 SA attr registration
...
And looking at one file:
root@ubuntu:~# zdb -ddddd -K $pass -e ocean/movies 5268
Unlocked encryption root: ocean/movies
Dataset ocean/movies [ZPL], ID 772, cr_txg 1430, 3.70T, 9979 objects, rootbp DVA[0]=<0:2400014a000:3000> DVA[1]=<0:f20081e4000:3000> [L0 DMU objset] fletcher4 uncompressed authenticated LE contiguous unique double size=800L/800P birth=18360433L/18360433P fill=9979 cksum=00000021c040a809:000025b93e7be9da:00190442ec3d9de4:0c1fdfcfea494199
Object lvl iblk dblk dsize dnsize lsize %full type
5268 1 128K 105K 40.5K 512 105K 100.00 ZFS plain file
168 bonus System attributes
dnode flags: USED_BYTES USERUSED_ACCOUNTED
dnode maxblkid: 0
path /Pictures/Testbilder/NecPattern.png
uid 1000
gid 1000
atime Tue Dec 31 08:24:53 2019
mtime Thu Feb 11 09:05:10 2016
ctime Wed Jan 27 17:59:02 2021
crtime Tue Jan 26 20:40:27 2021
gen 4641518
mode 100644
size 107030
parent 5267
links 1
pflags 40800000004
Indirect blocks:
0 L0 0:10a1263d2000:15000 1a400L/9000P F=1 B=4641518/4641518 cksum=246684c5894cb36e:ecb92ef5e414d48a:13123f3d0f3e78df:9619bdcef80cd664
segment [0000000000000000, 000000000001a400) size 105K
The Issue
Using zdb -R on an unencrypted pool works and gives me back the data, but on encrypted pools that does not work as shown here there is no PNG header as it should ( described in the how to replicate section)
root@ubuntu:/home/ubuntu# zdb -R -K $pass $disks -e ocean/movies 0:10a1263d2000:1a400 |head
Found vdev type: raidz
0:10a1263d2000:1a400
0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef
000000: 9c042f5028cc72d6 1eb5378942ca849d .r.(P/.....B.7..
000010: 39d46c5bb5ef339d 9aeb9cbf22549f9a .3..[l.9..T"....
000020: c4bcb91876bfb295 9b297424b1886cdd ...v.....l..$t).
000030: dab8c9b2e13c8f06 f9517b44e365816a ..<.....j.e.D{Q.
000040: f63da5f46b496aa5 d2a3744d20577b5a .jIk..=.Z{W Mt..
...
Describe how to reproduce the problem
Create a pool with an ecrypted pool inside and copy an image.png into it.
mkfile 100m sda sdb sdc sdd
pass='testtest'
disks='-p /Users/jolly/Desktop/zfsdebug2/sda -p /Users/jolly/Desktop/zfsdebug2/sdb -p /Users/jolly/Desktop/zfsdebug2/sdc -p /Users/jolly/Desktop/zfsdebug2/sdd'
# zpool create test raidz2 $disks
# create -o encryption=on -o keylocation=prompt -o keyformat=passphrase test/movies
# cp Image.png /Volumes/test/movies/
look for the image size in
# zdb -dd $disks -e test/movies
physmem = 2097152 pages (32.00 GB)
looking up movies in obj34
Dataset test/movies [ZPL], ID 388, cr_txg 32, 2.08M, 104 objects
ZIL header: claim_txg 0, claim_blk_seq 0, claim_lr_seq 0 replay_seq 0, flags 0x0
Object lvl iblk dblk dsize dnsize lsize %full type
0 6 128K 16K 280K 512 288K 18.06 DMU dnode
-1 1 128K 512 1.50K 512 512 100.00 ZFS user/group/project used
-2 1 128K 512 1.50K 512 512 100.00 ZFS user/group/project used
-3 1 128K 512 1.50K 512 512 100.00 ZFS user/group/project used
1 1 128K 1K 1.50K 512 1K 100.00 ZFS master node
2 2 128K 128K 1.66M 512 1.75M 100.00 ZFS plain file
3 1 128K 512 1.50K 512 512 100.00 ZFS directory
4 1 128K 512 512 512 512 100.00 ZFS plain file
5 1 128K 6.50K 4.50K 512 6.50K 100.00 ZFS directory
6 1 128K 512 0 512 512 0.00 ZFS plain file
7 1 128K 512 1.50K 512 512 100.00 ZFS directory
9 1 128K 105K 33K 512 105K 100.00 ZFS plain file
10 1 128K 512 1.50K 512 512 100.00 ZFS directory
...
I my case 105k Bytes - so it's object number 9
Then find out the blocks
# zdb -ddddd $disks -e test/movies 9
physmem = 2097152 pages (32.00 GB)
looking up movies in obj34
Dataset test/movies [ZPL], ID 388, cr_txg 32, 2.08M, 104 objects, rootbp DVA[0]=<0:ce95800:2400> DVA[1]=<0:41f1400:2400> [L0 DMU objset] fletcher4 uncompressed authenticated LE contiguous unique double size=1000L/1000P birth=43L/43P fill=104 cksum=00000022e10e3971:000064b3a2f91165:009866b8a335c5af:9fae33cbc48176dc
Object lvl iblk dblk dsize dnsize lsize %full type
9 1 128K 105K 33K 512 105K 100.00 ZFS plain file
176 bonus System attributes
dnode flags: USED_BYTES USERUSED_ACCOUNTED USEROBJUSED_ACCOUNTED
dnode maxblkid: 0
(bonus encrypted)
(object encrypted)
Indirect blocks:
0 L0 0:c969600:10800 1a400L/8200P F=1 B=40/40 cksum=3243e353334e32fb:6d3c14cd8a3ce5d2:31f1bac457bf51ec:413b385e911ad9e2
segment [0000000000000000, 000000000001a400) size 105K
and now finally try to get the image data at 0:c969600:1a400
bash-3.2# zdb -R -K $pass $disks -e test/movies 0:c969600:1a400 |head
Found vdev type: raidz
physmem = 2097152 pages (32.00 GB)
0:c969600:1a400L
0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef
000000: 5727568bba70a4aa 13faac8aec9d2530 W'V..p........%0
000010: 4e83318bd6144700 c1711eed6c4bff06 N.1...G..q..lK..
000020: 73bc77792344f8c6 071924efb202fa5a s.wy#D....$....Z
000030: 48e5991759d8d5e9 5b647ef25695d182 H...Y...[d~.V...
000040: f87618227293d53f 958eb1815f8df46b .v."r..?...._..k
those do not contain the PNG header as on an unencrypted pool:
bash-3.2# zdb -R -p /Users/jolly/Desktop/zfsdebug2/sda -p /Users/jolly/Desktop/zfsdebug2/sdb -p /Users/jolly/Desktop/zfsdebug2/sdc -e test 0:4604200:10800 |head
Found vdev type: raidz
physmem = 2097152 pages (32.00 GB)
0:4604200:10800
0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef
000000: 0000803ff0698950 4e470d0a1a0a0000 ...?.i.PNG......
000010: 000d494844520000 0780000004380802 ..IHDR.......8..
000020: 00000067b1561400 00002c7445587443 ...g.V....,tEXtC
000030: 72656174696f6e20 54696d6500536174 reation Time.Sat