Skip to content

Commit a3bb1aa

Browse files
ahrensjsai20
authored andcommitted
Add zstream redup command to convert deduplicated send streams
Deduplicated send and receive is deprecated. To ease migration to the new dedup-send-less world, the commit adds a `zstream redup` utility to convert deduplicated send streams to normal streams, so that they can continue to be received indefinitely. The new `zstream` command also replaces the functionality of `zstreamdump`, by way of the `zstream dump` subcommand. The `zstreamdump` command is replaced by a shell script which invokes `zstream dump`. The way that `zstream redup` works under the hood is that as we read the send stream, we build up a hash table which maps from `<GUID, object, offset> -> <file_offset>`. Whenever we see a WRITE record, we add a new entry to the hash table, which indicates where in the stream file to find the WRITE record for this block. (The key is `drr_toguid, drr_object, drr_offset`.) For entries other than WRITE_BYREF, we pass them through unchanged (except for the running checksum, which is recalculated). For WRITE_BYREF records, we change them to WRITE records. We find the referenced WRITE record by looking in the hash table (for the record with key `drr_refguid, drr_refobject, drr_refoffset`), and then reading the record header and payload from the specified offset in the stream file. This is why the stream can not be a pipe. The found WRITE record replaces the WRITE_BYREF record, with its `drr_toguid`, `drr_object`, and `drr_offset` fields changed to be the same as the WRITE_BYREF's (i.e. we are writing the same logical block, but with the data supplied by the previous WRITE record). This algorithm requires memory proportional to the number of WRITE records (same as `zfs send -D`), but the size per WRITE record is relatively low (40 bytes, vs. 72 for `zfs send -D`). A 1TB send stream with 8KB blocks (`recordsize=8k`) would use around 5GB of RAM to "redup". Reviewed-by: Jorgen Lundman <[email protected]> Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matthew Ahrens <[email protected]> Closes openzfs#10124 Closes openzfs#10156
1 parent eb3ffe4 commit a3bb1aa

File tree

16 files changed

+728
-36
lines changed

16 files changed

+728
-36
lines changed

cmd/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest
1+
SUBDIRS = zfs zpool zdb zhack zinject zstream zstreamdump ztest
22
SUBDIRS += fsck_zfs vdev_id raidz_test zgenhostid
33

44
if USING_PYTHON

cmd/zstream/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
zstream

cmd/zstream/Makefile.am

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
include $(top_srcdir)/config/Rules.am
2+
3+
sbin_PROGRAMS = zstream
4+
5+
zstream_SOURCES = \
6+
zstream.c \
7+
zstream.h \
8+
zstream_dump.c \
9+
zstream_redup.c
10+
11+
zstream_LDADD = \
12+
$(top_builddir)/lib/libnvpair/libnvpair.la \
13+
$(top_builddir)/lib/libzfs/libzfs.la

cmd/zstream/zstream.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* This file and its contents are supplied under the terms of the
5+
* Common Development and Distribution License ("CDDL"), version 1.0.
6+
* You may only use this file in accordance with the terms of version
7+
* 1.0 of the CDDL.
8+
*
9+
* A full copy of the text of the CDDL should have accompanied this
10+
* source. A copy of the CDDL is also available via the Internet at
11+
* http://www.illumos.org/license/CDDL.
12+
*
13+
* CDDL HEADER END
14+
*/
15+
16+
/*
17+
* Copyright (c) 2020 by Delphix. All rights reserved.
18+
*/
19+
#include <sys/types.h>
20+
#include <sys/stat.h>
21+
#include <fcntl.h>
22+
#include <ctype.h>
23+
#include <stdio.h>
24+
#include <stdlib.h>
25+
#include <strings.h>
26+
#include <unistd.h>
27+
#include <libintl.h>
28+
#include <stddef.h>
29+
#include <libzfs.h>
30+
#include "zstream.h"
31+
32+
void
33+
zstream_usage(void)
34+
{
35+
(void) fprintf(stderr,
36+
"usage: zstream command args ...\n"
37+
"Available commands are:\n"
38+
"\n"
39+
"\tzstream dump [-vCd] FILE\n"
40+
"\t... | zstream dump [-vCd]\n"
41+
"\n"
42+
"\tzstream redup [-v] FILE | ...\n");
43+
exit(1);
44+
}
45+
46+
int
47+
main(int argc, char *argv[])
48+
{
49+
if (argc < 2)
50+
zstream_usage();
51+
52+
char *subcommand = argv[1];
53+
54+
if (strcmp(subcommand, "dump") == 0) {
55+
return (zstream_do_dump(argc - 1, argv + 1));
56+
} else if (strcmp(subcommand, "redup") == 0) {
57+
return (zstream_do_redup(argc - 1, argv + 1));
58+
} else {
59+
zstream_usage();
60+
}
61+
}

cmd/zstream/zstream.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* This file and its contents are supplied under the terms of the
5+
* Common Development and Distribution License ("CDDL"), version 1.0.
6+
* You may only use this file in accordance with the terms of version
7+
* 1.0 of the CDDL.
8+
*
9+
* A full copy of the text of the CDDL should have accompanied this
10+
* source. A copy of the CDDL is also available via the Internet at
11+
* http://www.illumos.org/license/CDDL.
12+
*
13+
* CDDL HEADER END
14+
*/
15+
16+
/*
17+
* Copyright (c) 2020 by Delphix. All rights reserved.
18+
*/
19+
20+
#ifndef _ZSTREAM_H
21+
#define _ZSTREAM_H
22+
23+
#ifdef __cplusplus
24+
extern "C" {
25+
#endif
26+
27+
extern int zstream_do_redup(int, char *[]);
28+
extern int zstream_do_dump(int, char *[]);
29+
extern void zstream_usage(void);
30+
31+
#ifdef __cplusplus
32+
}
33+
#endif
34+
35+
#endif /* _ZSTREAM_H */

cmd/zstreamdump/zstreamdump.c renamed to cmd/zstream/zstream_dump.c

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <sys/zfs_ioctl.h>
4343
#include <sys/zio.h>
4444
#include <zfs_fletcher.h>
45+
#include "zstream.h"
4546

4647
/*
4748
* If dump mode is enabled, the number of bytes to print per line
@@ -58,17 +59,6 @@ FILE *send_stream = 0;
5859
boolean_t do_byteswap = B_FALSE;
5960
boolean_t do_cksum = B_TRUE;
6061

61-
static void
62-
usage(void)
63-
{
64-
(void) fprintf(stderr, "usage: zstreamdump [-v] [-C] [-d] < file\n");
65-
(void) fprintf(stderr, "\t -v -- verbose\n");
66-
(void) fprintf(stderr, "\t -C -- suppress checksum verification\n");
67-
(void) fprintf(stderr, "\t -d -- dump contents of blocks modified, "
68-
"implies verbose\n");
69-
exit(1);
70-
}
71-
7262
static void *
7363
safe_malloc(size_t size)
7464
{
@@ -215,7 +205,7 @@ sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
215205
}
216206

217207
int
218-
main(int argc, char *argv[])
208+
zstream_do_dump(int argc, char *argv[])
219209
{
220210
char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
221211
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
@@ -273,26 +263,39 @@ main(int argc, char *argv[])
273263
case ':':
274264
(void) fprintf(stderr,
275265
"missing argument for '%c' option\n", optopt);
276-
usage();
266+
zstream_usage();
277267
break;
278268
case '?':
279269
(void) fprintf(stderr, "invalid option '%c'\n",
280270
optopt);
281-
usage();
271+
zstream_usage();
282272
break;
283273
}
284274
}
285275

286-
if (isatty(STDIN_FILENO)) {
287-
(void) fprintf(stderr,
288-
"Error: Backup stream can not be read "
289-
"from a terminal.\n"
290-
"You must redirect standard input.\n");
291-
exit(1);
276+
if (argc > optind) {
277+
const char *filename = argv[optind];
278+
send_stream = fopen(filename, "r");
279+
if (send_stream == NULL) {
280+
(void) fprintf(stderr,
281+
"Error while opening file '%s': %s\n",
282+
filename, strerror(errno));
283+
exit(1);
284+
}
285+
} else {
286+
if (isatty(STDIN_FILENO)) {
287+
(void) fprintf(stderr,
288+
"Error: The send stream is a binary format "
289+
"and can not be read from a\n"
290+
"terminal. Standard input must be redirected, "
291+
"or a file must be\n"
292+
"specified as a command-line argument.\n");
293+
exit(1);
294+
}
295+
send_stream = stdin;
292296
}
293297

294298
fletcher_4_init();
295-
send_stream = stdin;
296299
while (read_hdr(drr, &zc)) {
297300

298301
/*

0 commit comments

Comments
 (0)