Skip to content

Commit fb82306

Browse files
rincebraintonyhutter
authored andcommitted
Fix cross-endian interoperability of zstd
It turns out that layouts of union bitfields are a pain, and the current code results in an inconsistent layout between BE and LE systems, leading to zstd-active datasets on one erroring out on the other. Switch everyone over to the LE layout, and add compatibility code to read both. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Matthew Ahrens <[email protected]> Signed-off-by: Rich Ercolani <[email protected]> Closes openzfs#12008 Closes openzfs#12022
1 parent 4e8a639 commit fb82306

File tree

6 files changed

+165
-19
lines changed

6 files changed

+165
-19
lines changed

cmd/zdb/zdb.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,7 +2211,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
22112211
(void) snprintf(blkbuf + strlen(blkbuf),
22122212
buflen - strlen(blkbuf),
22132213
" ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
2214-
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
2214+
zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
2215+
zfs_get_hdrlevel(&zstd_hdr));
22152216
return;
22162217
}
22172218

@@ -2235,7 +2236,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
22352236
(void) snprintf(blkbuf + strlen(blkbuf),
22362237
buflen - strlen(blkbuf),
22372238
" ZSTD:size=%u:version=%u:level=%u:NORMAL",
2238-
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
2239+
zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
2240+
zfs_get_hdrlevel(&zstd_hdr));
22392241

22402242
abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
22412243
}

include/sys/zstd/zstd.h

Lines changed: 137 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,21 +56,24 @@ typedef struct zfs_zstd_header {
5656

5757
/*
5858
* Version and compression level
59-
* We use a union to be able to big endian encode a single 32 bit
60-
* unsigned integer, but still access the individual bitmasked
61-
* components easily.
59+
* We used to use a union to reference compression level
60+
* and version easily, but as it turns out, relying on the
61+
* ordering of bitfields is not remotely portable.
62+
* So now we have get/set functions in zfs_zstd.c for
63+
* manipulating this in just the right way forever.
6264
*/
63-
union {
64-
uint32_t raw_version_level;
65-
struct {
66-
uint32_t version : 24;
67-
uint8_t level;
68-
};
69-
};
70-
65+
uint32_t raw_version_level;
7166
char data[];
7267
} zfs_zstdhdr_t;
7368

69+
/*
70+
* Simple struct to pass the data from raw_version_level around.
71+
*/
72+
typedef struct zfs_zstd_meta {
73+
uint8_t level;
74+
uint32_t version;
75+
} zfs_zstdmeta_t;
76+
7477
/*
7578
* kstat helper macros
7679
*/
@@ -94,6 +97,129 @@ int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len,
9497
size_t d_len, int n);
9598
void zfs_zstd_cache_reap_now(void);
9699

100+
/*
101+
* So, the reason we have all these complicated set/get functions is that
102+
* originally, in the zstd "header" we wrote out to disk, we used a 32-bit
103+
* bitfield to store the "level" (8 bits) and "version" (24 bits).
104+
*
105+
* Unfortunately, bitfields make few promises about how they're arranged in
106+
* memory...
107+
*
108+
* By way of example, if we were using version 1.4.5 and level 3, it'd be
109+
* level = 0x03, version = 10405/0x0028A5, which gets broken into Vhigh = 0x00,
110+
* Vmid = 0x28, Vlow = 0xA5. We include these positions below to help follow
111+
* which data winds up where.
112+
*
113+
* As a consequence, we wound up with little endian platforms with a layout
114+
* like this in memory:
115+
*
116+
* 0 8 16 24 32
117+
* +-------+-------+-------+-------+
118+
* | Vlow | Vmid | Vhigh | level |
119+
* +-------+-------+-------+-------+
120+
* =A5 =28 =00 =03
121+
*
122+
* ...and then, after being run through BE_32(), serializing this out to
123+
* disk:
124+
*
125+
* 0 8 16 24 32
126+
* +-------+-------+-------+-------+
127+
* | level | Vhigh | Vmid | Vlow |
128+
* +-------+-------+-------+-------+
129+
* =03 =00 =28 =A5
130+
*
131+
* while on big-endian systems, since BE_32() is a noop there, both in
132+
* memory and on disk, we wind up with:
133+
*
134+
* 0 8 16 24 32
135+
* +-------+-------+-------+-------+
136+
* | Vhigh | Vmid | Vlow | level |
137+
* +-------+-------+-------+-------+
138+
* =00 =28 =A5 =03
139+
*
140+
* (Vhigh is always 0 until version exceeds 6.55.35. Vmid and Vlow are the
141+
* other two bytes of the "version" data.)
142+
*
143+
* So now we use the BF32_SET macros to get consistent behavior (the
144+
* ondisk LE encoding, since x86 currently rules the world) across
145+
* platforms, but the "get" behavior requires that we check each of the
146+
* bytes in the aforementioned former-bitfield for 0x00, and from there,
147+
* we can know which possible layout we're dealing with. (Only the two
148+
* that have been observed in the wild are illustrated above, but handlers
149+
* for all 4 positions of 0x00 are implemented.
150+
*/
151+
152+
static inline void
153+
zfs_get_hdrmeta(const zfs_zstdhdr_t *blob, zfs_zstdmeta_t *res)
154+
{
155+
uint32_t raw = blob->raw_version_level;
156+
uint8_t findme = 0xff;
157+
int shift;
158+
for (shift = 0; shift < 4; shift++) {
159+
findme = BF32_GET(raw, 8*shift, 8);
160+
if (findme == 0)
161+
break;
162+
}
163+
switch (shift) {
164+
case 0:
165+
res->level = BF32_GET(raw, 24, 8);
166+
res->version = BSWAP_32(raw);
167+
res->version = BF32_GET(res->version, 8, 24);
168+
break;
169+
case 1:
170+
res->level = BF32_GET(raw, 0, 8);
171+
res->version = BSWAP_32(raw);
172+
res->version = BF32_GET(res->version, 0, 24);
173+
break;
174+
case 2:
175+
res->level = BF32_GET(raw, 24, 8);
176+
res->version = BF32_GET(raw, 0, 24);
177+
break;
178+
case 3:
179+
res->level = BF32_GET(raw, 0, 8);
180+
res->version = BF32_GET(raw, 8, 24);
181+
break;
182+
default:
183+
res->level = 0;
184+
res->version = 0;
185+
break;
186+
}
187+
}
188+
189+
static inline uint8_t
190+
zfs_get_hdrlevel(const zfs_zstdhdr_t *blob)
191+
{
192+
uint8_t level = 0;
193+
zfs_zstdmeta_t res;
194+
zfs_get_hdrmeta(blob, &res);
195+
level = res.level;
196+
return (level);
197+
}
198+
199+
static inline uint32_t
200+
zfs_get_hdrversion(const zfs_zstdhdr_t *blob)
201+
{
202+
uint32_t version = 0;
203+
zfs_zstdmeta_t res;
204+
zfs_get_hdrmeta(blob, &res);
205+
version = res.version;
206+
return (version);
207+
208+
}
209+
210+
static inline void
211+
zfs_set_hdrversion(zfs_zstdhdr_t *blob, uint32_t version)
212+
{
213+
BF32_SET(blob->raw_version_level, 0, 24, version);
214+
}
215+
216+
static inline void
217+
zfs_set_hdrlevel(zfs_zstdhdr_t *blob, uint8_t level)
218+
{
219+
BF32_SET(blob->raw_version_level, 24, 8, level);
220+
}
221+
222+
97223
#ifdef __cplusplus
98224
}
99225
#endif

module/zstd/Makefile.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ $(obj)/zfs_zstd.o: c_flags += -include $(zstd_include)/zstd_compat_wrapper.h
3333

3434
$(MODULE)-objs += zfs_zstd.o
3535
$(MODULE)-objs += lib/zstd.o
36+
$(MODULE)-objs += zstd_sparc.o
3637

3738
all:
3839
mkdir -p lib

module/zstd/include/sparc_compat.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#if defined(__sparc)
2+
uint64_t __bswapdi2(uint64_t in);
3+
uint32_t __bswapsi2(uint32_t in);
4+
#endif

module/zstd/zfs_zstd.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
361361
return (1);
362362
}
363363

364+
364365
/* Compress block using zstd */
365366
size_t
366367
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
@@ -458,8 +459,8 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
458459
* As soon as such incompatibility occurs, handling code needs to be
459460
* added, differentiating between the versions.
460461
*/
461-
hdr->version = ZSTD_VERSION_NUMBER;
462-
hdr->level = level;
462+
zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER);
463+
zfs_set_hdrlevel(hdr, level);
463464
hdr->raw_version_level = BE_32(hdr->raw_version_level);
464465

465466
return (c_len + sizeof (*hdr));
@@ -485,6 +486,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
485486
* not modify the original data that may be used again later.
486487
*/
487488
hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
489+
uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy);
488490

489491
/*
490492
* NOTE: We ignore the ZSTD version for now. As soon as any
@@ -497,13 +499,13 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
497499
* An invalid level is a strong indicator for data corruption! In such
498500
* case return an error so the upper layers can try to fix it.
499501
*/
500-
if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) {
502+
if (zstd_enum_to_level(curlevel, &zstd_level)) {
501503
ZSTDSTAT_BUMP(zstd_stat_dec_inval);
502504
return (1);
503505
}
504506

505507
ASSERT3U(d_len, >=, s_len);
506-
ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT);
508+
ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT);
507509

508510
/* Invalid compressed buffer size encoded at start */
509511
if (c_len + sizeof (*hdr) > s_len) {
@@ -534,7 +536,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
534536
}
535537

536538
if (level) {
537-
*level = hdr_copy.level;
539+
*level = curlevel;
538540
}
539541

540542
return (0);
@@ -764,7 +766,7 @@ module_exit(zstd_fini);
764766

765767
ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
766768
ZFS_MODULE_LICENSE("Dual BSD/GPL");
767-
ZFS_MODULE_VERSION(ZSTD_VERSION_STRING);
769+
ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a");
768770

769771
EXPORT_SYMBOL(zfs_zstd_compress);
770772
EXPORT_SYMBOL(zfs_zstd_decompress_level);

module/zstd/zstd_sparc.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifdef __sparc__
2+
#include <stdint.h>
3+
#include <sys/byteorder.h>
4+
#include "include/sparc_compat.h"
5+
uint64_t __bswapdi2(uint64_t in) {
6+
return (BSWAP_64(in));
7+
}
8+
uint32_t __bswapsi2(uint32_t in) {
9+
return (BSWAP_32(in));
10+
}
11+
#endif

0 commit comments

Comments
 (0)