Skip to content

Commit 71fc408

Browse files
libzfs, libzfs_core: send: always write to pipe
By introducing lzc_send_wrapper() and routing all ZFS_IOC_SEND* users through it, we fix a Linux 5.10-introduced bug (see comment) This is all /transparent/ to the users API, ABI, and usage-wise, and disabled on FreeBSD and if the output is already a pipe, and transparently nestable (i.e. zfs_send_one() is wrapped, but so is lzc_send_redacted() it calls to ‒ this wouldn't be strictly necessary if ZFS_IOC_SEND_PROGRESS wasn't strictly denominational w.r.t. the descriptor the send is happening on) Supersedes openzfs#11992 Closes openzfs#11445 Co-authored-by: Rich Ercolani <[email protected]> Signed-off-by: Ahelenia Ziemiańska <[email protected]>
1 parent 6ccd507 commit 71fc408

File tree

3 files changed

+261
-11
lines changed

3 files changed

+261
-11
lines changed

include/libzfs_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ enum lzc_send_flags {
8686
LZC_SEND_FLAG_SAVED = 1 << 4,
8787
};
8888

89+
_LIBZFS_CORE_H int lzc_send_wrapper(int (*)(int, void *), int, void *);
8990
_LIBZFS_CORE_H int lzc_send(const char *, const char *, int,
9091
enum lzc_send_flags);
9192
_LIBZFS_CORE_H int lzc_send_resume(const char *, const char *, int,

lib/libzfs/libzfs_sendrecv.c

Lines changed: 101 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,8 +1682,8 @@ lzc_flags_from_resume_nvl(nvlist_t *resume_nvl)
16821682
}
16831683

16841684
static int
1685-
zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1686-
nvlist_t *resume_nvl)
1685+
zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
1686+
int outfd, nvlist_t *resume_nvl)
16871687
{
16881688
char errbuf[1024];
16891689
char *toname;
@@ -1893,6 +1893,32 @@ zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
18931893
return (error);
18941894
}
18951895

1896+
struct zfs_send_resume_impl {
1897+
libzfs_handle_t *hdl;
1898+
sendflags_t *flags;
1899+
nvlist_t *resume_nvl;
1900+
};
1901+
1902+
static int
1903+
zfs_send_resume_impl_cb(int outfd, void *arg)
1904+
{
1905+
struct zfs_send_resume_impl *zsri = arg;
1906+
return (zfs_send_resume_impl_cb_impl(zsri->hdl, zsri->flags, outfd,
1907+
zsri->resume_nvl));
1908+
}
1909+
1910+
static int
1911+
zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1912+
nvlist_t *resume_nvl)
1913+
{
1914+
struct zfs_send_resume_impl zsri = {
1915+
.hdl = hdl,
1916+
.flags = flags,
1917+
.resume_nvl = resume_nvl,
1918+
};
1919+
return (lzc_send_wrapper(zfs_send_resume_impl_cb, outfd, &zsri));
1920+
}
1921+
18961922
int
18971923
zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
18981924
const char *resume_token)
@@ -2170,9 +2196,11 @@ send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
21702196
* if "replicate" is set. If "doall" is set, dump all the intermediate
21712197
* snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
21722198
* case too. If "props" is set, send properties.
2199+
*
2200+
* Pre-wrapped (cf. lzc_send_wrapper()).
21732201
*/
2174-
int
2175-
zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2202+
static int
2203+
zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
21762204
sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
21772205
void *cb_arg, nvlist_t **debugnvp)
21782206
{
@@ -2374,6 +2402,42 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
23742402
return (err);
23752403
}
23762404

2405+
struct zfs_send {
2406+
zfs_handle_t *zhp;
2407+
const char *fromsnap;
2408+
const char *tosnap;
2409+
sendflags_t *flags;
2410+
snapfilter_cb_t *filter_func;
2411+
void *cb_arg;
2412+
nvlist_t **debugnvp;
2413+
};
2414+
2415+
static int
2416+
zfs_send_cb(int outfd, void *arg)
2417+
{
2418+
struct zfs_send *zs = arg;
2419+
return (zfs_send_cb_impl(zs->zhp, zs->fromsnap, zs->tosnap, zs->flags,
2420+
outfd, zs->filter_func, zs->cb_arg, zs->debugnvp));
2421+
}
2422+
2423+
int
2424+
zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2425+
sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2426+
void *cb_arg, nvlist_t **debugnvp)
2427+
{
2428+
struct zfs_send arg = {
2429+
.zhp = zhp,
2430+
.fromsnap = fromsnap,
2431+
.tosnap = tosnap,
2432+
.flags = flags,
2433+
.filter_func = filter_func,
2434+
.cb_arg = cb_arg,
2435+
.debugnvp = debugnvp,
2436+
};
2437+
return (lzc_send_wrapper(zfs_send_cb, outfd, &arg));
2438+
}
2439+
2440+
23772441
static zfs_handle_t *
23782442
name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
23792443
{
@@ -2450,10 +2514,12 @@ snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
24502514
* The "zhp" argument is the handle of the dataset to send (typically a
24512515
* snapshot). The "from" argument is the full name of the snapshot or
24522516
* bookmark that is the incremental source.
2517+
*
2518+
* Pre-wrapped (cf. lzc_send_wrapper()).
24532519
*/
2454-
int
2455-
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
2456-
const char *redactbook)
2520+
static int
2521+
zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
2522+
sendflags_t *flags, const char *redactbook)
24572523
{
24582524
int err;
24592525
libzfs_handle_t *hdl = zhp->zfs_hdl;
@@ -2642,6 +2708,34 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
26422708
return (err != 0);
26432709
}
26442710

2711+
struct zfs_send_one {
2712+
zfs_handle_t *zhp;
2713+
const char *from;
2714+
sendflags_t *flags;
2715+
const char *redactbook;
2716+
};
2717+
2718+
static int
2719+
zfs_send_one_cb(int fd, void *arg)
2720+
{
2721+
struct zfs_send_one *zso = arg;
2722+
return (zfs_send_one_cb_impl(zso->zhp, zso->from, fd, zso->flags,
2723+
zso->redactbook));
2724+
}
2725+
2726+
int
2727+
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
2728+
const char *redactbook)
2729+
{
2730+
struct zfs_send_one zso = {
2731+
.zhp = zhp,
2732+
.from = from,
2733+
.flags = flags,
2734+
.redactbook = redactbook,
2735+
};
2736+
return (lzc_send_wrapper(zfs_send_one_cb, fd, &zso));
2737+
}
2738+
26452739
/*
26462740
* Routines specific to "zfs recv"
26472741
*/

lib/libzfs_core/libzfs_core.c

Lines changed: 159 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,84 @@ max_pipe_buffer(int infd)
617617
#endif
618618
}
619619

620+
#if __linux__
621+
struct send_worker_ctx {
622+
int from; /* read end of pipe, with send data; closed on exit */
623+
int to; /* original arbitrary output fd; mustn't be a pipe */
624+
};
625+
626+
static void *
627+
send_worker(void *arg)
628+
{
629+
struct send_worker_ctx *ctx = arg;
630+
unsigned int bufsiz = max_pipe_buffer(ctx->from);
631+
ssize_t rd;
632+
633+
while ((rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz,
634+
SPLICE_F_MOVE | SPLICE_F_MORE)) > 0)
635+
;
636+
637+
int err = (rd == -1) ? errno : 0;
638+
close(ctx->from);
639+
return ((void *)(uintptr_t)err);
640+
}
641+
#endif
642+
643+
/*
644+
* Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf
645+
* ("fs: don't allow kernel reads and writes without iter ops"),
646+
* ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c.
647+
*
648+
* This wrapper transparently executes func() with a pipe
649+
* by spawning a thread to copy from that pipe to the original output
650+
* in the background.
651+
*
652+
* Returns the error from func(), if nonzero,
653+
* otherwise the error from the thread.
654+
*
655+
* No-op if orig_fd is -1, already a pipe, and on not-Linux;
656+
* as such, it is safe to wrap/call wrapped functions in a wrapped context.
657+
*/
658+
int
659+
lzc_send_wrapper(int (*func)(int, void *), int orig_fd, void *data)
660+
{
661+
#if __linux__
662+
struct stat sb;
663+
if (orig_fd != -1 && fstat(orig_fd, &sb) == -1)
664+
return (errno);
665+
if (orig_fd == -1 || S_ISFIFO(sb.st_mode))
666+
return (func(orig_fd, data));
667+
if ((fcntl(orig_fd, F_GETFL) & O_ACCMODE) == O_RDONLY)
668+
return (errno = EBADF);
669+
670+
int rw[2];
671+
if (pipe2(rw, O_CLOEXEC) == -1)
672+
return (errno);
673+
674+
int err;
675+
pthread_t send_thread;
676+
struct send_worker_ctx ctx = {.from = rw[0], .to = orig_fd};
677+
if ((err = pthread_create(&send_thread, NULL, send_worker, &ctx))
678+
!= 0) {
679+
close(rw[0]);
680+
close(rw[1]);
681+
return (errno = err);
682+
}
683+
684+
err = func(rw[1], data);
685+
686+
void *send_err;
687+
close(rw[1]);
688+
pthread_join(send_thread, &send_err);
689+
if (err == 0 && send_err != 0)
690+
errno = err = (uintptr_t)send_err;
691+
692+
return (err);
693+
#else
694+
return (func(orig_fd, data));
695+
#endif
696+
}
697+
620698
/*
621699
* Generate a zfs send stream for the specified snapshot and write it to
622700
* the specified file descriptor.
@@ -687,9 +765,11 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
687765
* redactnv: nvlist of string -> boolean(ignored) containing the names of all
688766
* the snapshots that we should redact with respect to.
689767
* redactbook: Name of the redaction bookmark to create.
768+
*
769+
* Pre-wrapped.
690770
*/
691-
int
692-
lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
771+
static int
772+
lzc_send_resume_redacted_cb_impl(const char *snapname, const char *from, int fd,
693773
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
694774
const char *redactbook)
695775
{
@@ -722,6 +802,40 @@ lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
722802
return (err);
723803
}
724804

805+
struct lzc_send_resume_redacted {
806+
const char *snapname;
807+
const char *from;
808+
enum lzc_send_flags flags;
809+
uint64_t resumeobj;
810+
uint64_t resumeoff;
811+
const char *redactbook;
812+
};
813+
814+
static int
815+
lzc_send_resume_redacted_cb(int fd, void *arg)
816+
{
817+
struct lzc_send_resume_redacted *zsrr = arg;
818+
return (lzc_send_resume_redacted_cb_impl(zsrr->snapname, zsrr->from,
819+
fd, zsrr->flags, zsrr->resumeobj, zsrr->resumeoff,
820+
zsrr->redactbook));
821+
}
822+
823+
int
824+
lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
825+
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
826+
const char *redactbook)
827+
{
828+
struct lzc_send_resume_redacted zsrr = {
829+
.snapname = snapname,
830+
.from = from,
831+
.flags = flags,
832+
.resumeobj = resumeobj,
833+
.resumeoff = resumeoff,
834+
.redactbook = redactbook,
835+
};
836+
return (lzc_send_wrapper(lzc_send_resume_redacted_cb, fd, &zsrr));
837+
}
838+
725839
/*
726840
* "from" can be NULL, a snapshot, or a bookmark.
727841
*
@@ -737,9 +851,11 @@ lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
737851
* significantly more I/O and be less efficient than a send space estimation on
738852
* an equivalent snapshot. This process is also used if redact_snaps is
739853
* non-null.
854+
*
855+
* Pre-wrapped.
740856
*/
741-
int
742-
lzc_send_space_resume_redacted(const char *snapname, const char *from,
857+
static int
858+
lzc_send_space_resume_redacted_cb_impl(const char *snapname, const char *from,
743859
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
744860
uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
745861
{
@@ -776,6 +892,45 @@ lzc_send_space_resume_redacted(const char *snapname, const char *from,
776892
return (err);
777893
}
778894

895+
struct lzc_send_space_resume_redacted {
896+
const char *snapname;
897+
const char *from;
898+
enum lzc_send_flags flags;
899+
uint64_t resumeobj;
900+
uint64_t resumeoff;
901+
uint64_t resume_bytes;
902+
const char *redactbook;
903+
uint64_t *spacep;
904+
};
905+
906+
static int
907+
lzc_send_space_resume_redacted_cb(int fd, void *arg)
908+
{
909+
struct lzc_send_space_resume_redacted *zssrr = arg;
910+
return (lzc_send_space_resume_redacted_cb_impl(zssrr->snapname,
911+
zssrr->from, zssrr->flags, zssrr->resumeobj, zssrr->resumeoff,
912+
zssrr->resume_bytes, zssrr->redactbook, fd, zssrr->spacep));
913+
}
914+
915+
int
916+
lzc_send_space_resume_redacted(const char *snapname, const char *from,
917+
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
918+
uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
919+
{
920+
struct lzc_send_space_resume_redacted zssrr = {
921+
.snapname = snapname,
922+
.from = from,
923+
.flags = flags,
924+
.resumeobj = resumeobj,
925+
.resumeoff = resumeoff,
926+
.resume_bytes = resume_bytes,
927+
.redactbook = redactbook,
928+
.spacep = spacep,
929+
};
930+
return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb,
931+
fd, &zssrr));
932+
}
933+
779934
int
780935
lzc_send_space(const char *snapname, const char *from,
781936
enum lzc_send_flags flags, uint64_t *spacep)

0 commit comments

Comments
 (0)