Skip to content

Commit cda7b99

Browse files
committed
Add zstream redup command to convert deduplicated send streams
Deduplicated send and receive is deprecated. To ease migration to the new dedup-send-less world, the commit adds a `zstream redup` utility to convert deduplicated send streams to normal streams, so that they can continue to be received indefinitely. The new `zstream` command also replaces the functionality of `zstreamdump`, by way of the `zstream dump` subcommand. The `zstreamdump` command is replaced by a shell script which invokes `zstream dump`. The way that `zstream redup` works under the hood is that as we read the send stream, we build up a hash table which maps from `<GUID, object, offset> -> <file_offset>`. Whenever we see a WRITE record, we add a new entry to the hash table, which indicates where in the stream file to find the WRITE record for this block. (The key is `drr_toguid, drr_object, drr_offset`.) For entries other than WRITE_BYREF, we pass them through unchanged (except for the running checksum, which is recalculated). For WRITE_BYREF records, we change them to WRITE records. We find the referenced WRITE record by looking in the hash table (for the record with key `drr_refguid, drr_refobject, drr_refoffset`), and then reading the record header and payload from the specified offset in the stream file. This is why the stream can not be a pipe. The found WRITE record replaces the WRITE_BYREF record, with its `drr_toguid`, `drr_object`, and `drr_offset` fields changed to be the same as the WRITE_BYREF's (i.e. we are writing the same logical block, but with the data supplied by the previous WRITE record). This algorithm requires memory proportional to the number of WRITE records (same as `zfs send -D`), but the size per WRITE record is relatively low (40 bytes, vs. 72 for `zfs send -D`). A 1TB send stream with 8KB blocks (`recordsize=8k`) would use around 5GB of RAM to "redup". Signed-off-by: Matthew Ahrens <[email protected]>
1 parent 7e3df9d commit cda7b99

File tree

14 files changed

+720
-33
lines changed

14 files changed

+720
-33
lines changed

cmd/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest
1+
SUBDIRS = zfs zpool zdb zhack zinject zstream zstreamdump ztest
22
SUBDIRS += fsck_zfs vdev_id raidz_test zgenhostid
33

44
if USING_PYTHON

cmd/zstream/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
zstream

cmd/zstream/Makefile.am

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
include $(top_srcdir)/config/Rules.am
2+
3+
sbin_PROGRAMS = zstream
4+
5+
zstream_SOURCES = \
6+
zstream.c \
7+
zstream.h \
8+
zstream_dump.c \
9+
zstream_redup.c
10+
11+
zstream_LDADD = \
12+
$(top_builddir)/lib/libnvpair/libnvpair.la \
13+
$(top_builddir)/lib/libzfs/libzfs.la

cmd/zstream/zstream.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* This file and its contents are supplied under the terms of the
5+
* Common Development and Distribution License ("CDDL"), version 1.0.
6+
* You may only use this file in accordance with the terms of version
7+
* 1.0 of the CDDL.
8+
*
9+
* A full copy of the text of the CDDL should have accompanied this
10+
* source. A copy of the CDDL is also available via the Internet at
11+
* http://www.illumos.org/license/CDDL.
12+
*
13+
* CDDL HEADER END
14+
*/
15+
16+
/*
17+
* Copyright (c) 2020 by Delphix. All rights reserved.
18+
*/
19+
#include <sys/types.h>
20+
#include <sys/stat.h>
21+
#include <fcntl.h>
22+
#include <ctype.h>
23+
#include <stdio.h>
24+
#include <stdlib.h>
25+
#include <strings.h>
26+
#include <unistd.h>
27+
#include <libintl.h>
28+
#include <stddef.h>
29+
#include <libzfs.h>
30+
#include "zstream.h"
31+
32+
33+
void
34+
usage(void)
35+
{
36+
(void) fprintf(stderr,
37+
"usage: zstream command args ...\n"
38+
"Available commands are:\n"
39+
"\n"
40+
"\tzstream dump [-vCd] FILE\n"
41+
"\t... | zstream dump [-vCd]\n"
42+
"\n"
43+
"\tzstream redup [-v] FILE | ...\n");
44+
exit(1);
45+
}
46+
47+
int
48+
main(int argc, char *argv[])
49+
{
50+
if (argc < 2)
51+
usage();
52+
53+
char *subcommand = argv[1];
54+
55+
if (strcmp(subcommand, "dump") == 0) {
56+
return (zstream_do_dump(argc - 1, argv + 1));
57+
} else if (strcmp(subcommand, "redup") == 0) {
58+
return (zstream_do_redup(argc - 1, argv + 1));
59+
} else {
60+
usage();
61+
}
62+
}

cmd/zstream/zstream.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* This file and its contents are supplied under the terms of the
5+
* Common Development and Distribution License ("CDDL"), version 1.0.
6+
* You may only use this file in accordance with the terms of version
7+
* 1.0 of the CDDL.
8+
*
9+
* A full copy of the text of the CDDL should have accompanied this
10+
* source. A copy of the CDDL is also available via the Internet at
11+
* http://www.illumos.org/license/CDDL.
12+
*
13+
* CDDL HEADER END
14+
*/
15+
16+
/*
17+
* Copyright (c) 2020 by Delphix. All rights reserved.
18+
*/
19+
20+
#ifndef _ZSTREAM_H
21+
#define _ZSTREAM_H
22+
23+
#ifdef __cplusplus
24+
extern "C" {
25+
#endif
26+
27+
extern int zstream_do_redup(int, char *[]);
28+
extern int zstream_do_dump(int, char *[]);
29+
extern void usage(void);
30+
31+
#ifdef __cplusplus
32+
}
33+
#endif
34+
35+
#endif /* _ZSTREAM_H */

cmd/zstreamdump/zstreamdump.c renamed to cmd/zstream/zstream_dump.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <sys/zfs_ioctl.h>
4343
#include <sys/zio.h>
4444
#include <zfs_fletcher.h>
45+
#include "zstream.h"
4546

4647
/*
4748
* If dump mode is enabled, the number of bytes to print per line
@@ -58,17 +59,6 @@ FILE *send_stream = 0;
5859
boolean_t do_byteswap = B_FALSE;
5960
boolean_t do_cksum = B_TRUE;
6061

61-
static void
62-
usage(void)
63-
{
64-
(void) fprintf(stderr, "usage: zstreamdump [-v] [-C] [-d] < file\n");
65-
(void) fprintf(stderr, "\t -v -- verbose\n");
66-
(void) fprintf(stderr, "\t -C -- suppress checksum verification\n");
67-
(void) fprintf(stderr, "\t -d -- dump contents of blocks modified, "
68-
"implies verbose\n");
69-
exit(1);
70-
}
71-
7262
static void *
7363
safe_malloc(size_t size)
7464
{
@@ -215,7 +205,7 @@ sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
215205
}
216206

217207
int
218-
main(int argc, char *argv[])
208+
zstream_do_dump(int argc, char *argv[])
219209
{
220210
char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
221211
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
@@ -283,16 +273,29 @@ main(int argc, char *argv[])
283273
}
284274
}
285275

286-
if (isatty(STDIN_FILENO)) {
287-
(void) fprintf(stderr,
288-
"Error: Backup stream can not be read "
289-
"from a terminal.\n"
290-
"You must redirect standard input.\n");
291-
exit(1);
276+
if (argc > optind) {
277+
const char *filename = argv[optind];
278+
send_stream = fopen(filename, "r");
279+
if (send_stream == NULL) {
280+
(void) fprintf(stderr,
281+
"Error while opening file '%s': %s\n",
282+
filename, strerror(errno));
283+
exit(1);
284+
}
285+
} else {
286+
if (isatty(STDIN_FILENO)) {
287+
(void) fprintf(stderr,
288+
"Error: The send stream is a binary format "
289+
"and can not be read from a\n"
290+
"terminal. Standard input must be redirected, "
291+
"or a file must be\n"
292+
"specified as a command-line argument.\n");
293+
exit(1);
294+
}
295+
send_stream = stdin;
292296
}
293297

294298
fletcher_4_init();
295-
send_stream = stdin;
296299
while (read_hdr(drr, &zc)) {
297300

298301
/*

0 commit comments

Comments
 (0)