Skip to content

Commit bc77ba7

Browse files
pcd1193182behlendorf
authored andcommitted
OpenZFS 6513 - partially filled holes lose birth time
Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Boris Protopopov <[email protected]> Approved by: Richard Lowe <[email protected]>a Ported by: Boris Protopopov <[email protected]> Signed-off-by: Boris Protopopov <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> OpenZFS-issue: https://www.illumos.org/issues/6513 OpenZFS-commit: openzfs/openzfs@8df0bcf0 If a ZFS object contains a hole at level one, and then a data block is created at level 0 underneath that l1 block, l0 holes will be created. However, these l0 holes do not have the birth time property set; as a result, incremental sends will not send those holes. Fix is to modify the dbuf_read code to fill in birth time data.
1 parent 100a91a commit bc77ba7

File tree

9 files changed

+192
-54
lines changed

9 files changed

+192
-54
lines changed

include/sys/arc.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
23+
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
2424
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
2525
*/
2626

@@ -193,9 +193,11 @@ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
193193
arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
194194
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
195195
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
196-
const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
197-
arc_done_func_t *done, void *private, zio_priority_t priority,
198-
int zio_flags, const zbookmark_phys_t *zb);
196+
const zio_prop_t *zp,
197+
arc_done_func_t *ready, arc_done_func_t *child_ready,
198+
arc_done_func_t *physdone, arc_done_func_t *done,
199+
void *private, zio_priority_t priority, int zio_flags,
200+
const zbookmark_phys_t *zb);
199201

200202
arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *private);
201203
void arc_remove_prune_callback(arc_prune_t *p);

include/sys/arc_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ typedef struct arc_write_callback arc_write_callback_t;
101101
struct arc_write_callback {
102102
void *awcb_private;
103103
arc_done_func_t *awcb_ready;
104+
arc_done_func_t *awcb_children_ready;
104105
arc_done_func_t *awcb_physdone;
105106
arc_done_func_t *awcb_done;
106107
arc_buf_t *awcb_buf;

include/sys/zio.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2424
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25-
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
25+
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
2626
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
2727
*/
2828

@@ -400,7 +400,8 @@ struct zio {
400400
zio_transform_t *io_transform_stack;
401401

402402
/* Callback info */
403-
zio_done_func_t *io_ready;
403+
zio_done_func_t *io_ready;
404+
zio_done_func_t *io_children_ready;
404405
zio_done_func_t *io_physdone;
405406
zio_done_func_t *io_done;
406407
void *io_private;
@@ -468,9 +469,10 @@ extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
468469

469470
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
470471
void *data, uint64_t size, const zio_prop_t *zp,
471-
zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
472-
void *private,
473-
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
472+
zio_done_func_t *ready, zio_done_func_t *children_ready,
473+
zio_done_func_t *physdone, zio_done_func_t *done,
474+
void *private, zio_priority_t priority, enum zio_flag flags,
475+
const zbookmark_phys_t *zb);
474476

475477
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
476478
void *data, uint64_t size, zio_done_func_t *done, void *private,

module/zfs/arc.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2323
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
24-
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
24+
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
2525
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
2626
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
2727
*/
@@ -4981,6 +4981,15 @@ arc_write_ready(zio_t *zio)
49814981
hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
49824982
}
49834983

4984+
static void
4985+
arc_write_children_ready(zio_t *zio)
4986+
{
4987+
arc_write_callback_t *callback = zio->io_private;
4988+
arc_buf_t *buf = callback->awcb_buf;
4989+
4990+
callback->awcb_children_ready(zio, buf, callback->awcb_private);
4991+
}
4992+
49844993
/*
49854994
* The SPA calls this callback for each physical write that happens on behalf
49864995
* of a logical write. See the comment in dbuf_write_physdone() for details.
@@ -5077,7 +5086,8 @@ arc_write_done(zio_t *zio)
50775086
zio_t *
50785087
arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
50795088
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
5080-
const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
5089+
const zio_prop_t *zp, arc_done_func_t *ready,
5090+
arc_done_func_t *children_ready, arc_done_func_t *physdone,
50815091
arc_done_func_t *done, void *private, zio_priority_t priority,
50825092
int zio_flags, const zbookmark_phys_t *zb)
50835093
{
@@ -5097,13 +5107,16 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
50975107
hdr->b_flags |= ARC_FLAG_L2COMPRESS;
50985108
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
50995109
callback->awcb_ready = ready;
5110+
callback->awcb_children_ready = children_ready;
51005111
callback->awcb_physdone = physdone;
51015112
callback->awcb_done = done;
51025113
callback->awcb_private = private;
51035114
callback->awcb_buf = buf;
51045115

51055116
zio = zio_write(pio, spa, txg, bp, buf->b_data, hdr->b_size, zp,
5106-
arc_write_ready, arc_write_physdone, arc_write_done, callback,
5117+
arc_write_ready,
5118+
(children_ready != NULL) ? arc_write_children_ready : NULL,
5119+
arc_write_physdone, arc_write_done, callback,
51075120
priority, zio_flags, zb);
51085121

51095122
return (zio);

module/zfs/dbuf.c

Lines changed: 118 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -543,13 +543,50 @@ dbuf_verify(dmu_buf_impl_t *db)
543543
* If the blkptr isn't set but they have nonzero data,
544544
* it had better be dirty, otherwise we'll lose that
545545
* data when we evict this buffer.
546+
*
547+
* There is an exception to this rule for indirect blocks; in
548+
* this case, if the indirect block is a hole, we fill in a few
549+
* fields on each of the child blocks (importantly, birth time)
550+
* to prevent hole birth times from being lost when you
551+
* partially fill in a hole.
546552
*/
547553
if (db->db_dirtycnt == 0) {
548-
ASSERTV(uint64_t *buf = db->db.db_data);
549-
int i;
554+
if (db->db_level == 0) {
555+
uint64_t *buf = db->db.db_data;
556+
int i;
550557

551-
for (i = 0; i < db->db.db_size >> 3; i++) {
552-
ASSERT(buf[i] == 0);
558+
for (i = 0; i < db->db.db_size >> 3; i++) {
559+
ASSERT(buf[i] == 0);
560+
}
561+
} else {
562+
int i;
563+
blkptr_t *bps = db->db.db_data;
564+
ASSERT3U(1 << DB_DNODE(db)->dn_indblkshift, ==,
565+
db->db.db_size);
566+
/*
567+
* We want to verify that all the blkptrs in the
568+
* indirect block are holes, but we may have
569+
* automatically set up a few fields for them.
570+
* We iterate through each blkptr and verify
571+
* they only have those fields set.
572+
*/
573+
for (i = 0;
574+
i < db->db.db_size / sizeof (blkptr_t);
575+
i++) {
576+
blkptr_t *bp = &bps[i];
577+
ASSERT(ZIO_CHECKSUM_IS_ZERO(
578+
&bp->blk_cksum));
579+
ASSERT(
580+
DVA_IS_EMPTY(&bp->blk_dva[0]) &&
581+
DVA_IS_EMPTY(&bp->blk_dva[1]) &&
582+
DVA_IS_EMPTY(&bp->blk_dva[2]));
583+
ASSERT0(bp->blk_fill);
584+
ASSERT0(bp->blk_pad[0]);
585+
ASSERT0(bp->blk_pad[1]);
586+
ASSERT(!BP_IS_EMBEDDED(bp));
587+
ASSERT(BP_IS_HOLE(bp));
588+
ASSERT0(bp->blk_phys_birth);
589+
}
553590
}
554591
}
555592
}
@@ -718,10 +755,32 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
718755
BP_IS_HOLE(db->db_blkptr)))) {
719756
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
720757

721-
DB_DNODE_EXIT(db);
722758
dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
723759
db->db.db_size, db, type));
724760
bzero(db->db.db_data, db->db.db_size);
761+
762+
if (db->db_blkptr != NULL && db->db_level > 0 &&
763+
BP_IS_HOLE(db->db_blkptr) &&
764+
db->db_blkptr->blk_birth != 0) {
765+
blkptr_t *bps = db->db.db_data;
766+
int i;
767+
for (i = 0; i < ((1 <<
768+
DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
769+
i++) {
770+
blkptr_t *bp = &bps[i];
771+
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
772+
1 << dn->dn_indblkshift);
773+
BP_SET_LSIZE(bp,
774+
BP_GET_LEVEL(db->db_blkptr) == 1 ?
775+
dn->dn_datablksz :
776+
BP_GET_LSIZE(db->db_blkptr));
777+
BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
778+
BP_SET_LEVEL(bp,
779+
BP_GET_LEVEL(db->db_blkptr) - 1);
780+
BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
781+
}
782+
}
783+
DB_DNODE_EXIT(db);
725784
db->db_state = DB_CACHED;
726785
mutex_exit(&db->db_mtx);
727786
return (0);
@@ -3094,6 +3153,45 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
30943153
rw_exit(&dn->dn_struct_rwlock);
30953154
}
30963155

3156+
/* ARGSUSED */
3157+
/*
3158+
* This function gets called just prior to running through the compression
3159+
* stage of the zio pipeline. If we're an indirect block comprised of only
3160+
* holes, then we want this indirect to be compressed away to a hole. In
3161+
* order to do that we must zero out any information about the holes that
3162+
* this indirect points to prior to before we try to compress it.
3163+
*/
3164+
static void
3165+
dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
3166+
{
3167+
dmu_buf_impl_t *db = vdb;
3168+
dnode_t *dn;
3169+
blkptr_t *bp;
3170+
uint64_t i;
3171+
int epbs;
3172+
3173+
ASSERT3U(db->db_level, >, 0);
3174+
DB_DNODE_ENTER(db);
3175+
dn = DB_DNODE(db);
3176+
epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
3177+
3178+
/* Determine if all our children are holes */
3179+
for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
3180+
if (!BP_IS_HOLE(bp))
3181+
break;
3182+
}
3183+
3184+
/*
3185+
* If all the children are holes, then zero them all out so that
3186+
* we may get compressed away.
3187+
*/
3188+
if (i == 1 << epbs) {
3189+
/* didn't find any non-holes */
3190+
bzero(db->db.db_data, db->db.db_size);
3191+
}
3192+
DB_DNODE_EXIT(db);
3193+
}
3194+
30973195
/*
30983196
* The SPA will call this callback several times for each zio - once
30993197
* for every physical child i/o (zio->io_phys_children times). This
@@ -3348,7 +3446,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
33483446

33493447
dr->dr_zio = zio_write(zio, os->os_spa, txg,
33503448
&dr->dr_bp_copy, contents, db->db.db_size, &zp,
3351-
dbuf_write_override_ready, NULL, dbuf_write_override_done,
3449+
dbuf_write_override_ready, NULL, NULL,
3450+
dbuf_write_override_done,
33523451
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
33533452
mutex_enter(&db->db_mtx);
33543453
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@@ -3359,14 +3458,26 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
33593458
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
33603459
dr->dr_zio = zio_write(zio, os->os_spa, txg,
33613460
&dr->dr_bp_copy, NULL, db->db.db_size, &zp,
3362-
dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
3461+
dbuf_write_nofill_ready, NULL, NULL,
3462+
dbuf_write_nofill_done, db,
33633463
ZIO_PRIORITY_ASYNC_WRITE,
33643464
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
33653465
} else {
3466+
arc_done_func_t *children_ready_cb = NULL;
33663467
ASSERT(arc_released(data));
3468+
3469+
/*
3470+
* For indirect blocks, we want to setup the children
3471+
* ready callback so that we can properly handle an indirect
3472+
* block that only contains holes.
3473+
*/
3474+
if (db->db_level != 0)
3475+
children_ready_cb = dbuf_write_children_ready;
3476+
33673477
dr->dr_zio = arc_write(zio, os->os_spa, txg,
33683478
&dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
33693479
DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
3480+
children_ready_cb,
33703481
dbuf_write_physdone, dbuf_write_done, db,
33713482
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
33723483
}

module/zfs/dmu.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
23+
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
2424
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25+
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
2526
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
2627
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
2728
*/
@@ -1480,10 +1481,11 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
14801481
dsa->dsa_zgd = zgd;
14811482
dsa->dsa_tx = tx;
14821483

1483-
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
1484-
zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
1485-
dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa,
1486-
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL|ZIO_FLAG_FASTWRITE, zb));
1484+
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx),
1485+
zgd->zgd_bp, zgd->zgd_db->db_data, zgd->zgd_db->db_size,
1486+
zp, dmu_sync_late_arrival_ready, NULL,
1487+
NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
1488+
ZIO_FLAG_CANFAIL, zb));
14871489

14881490
return (0);
14891491
}
@@ -1636,8 +1638,8 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
16361638
zio_nowait(arc_write(pio, os->os_spa, txg,
16371639
bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
16381640
DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready,
1639-
NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
1640-
ZIO_FLAG_CANFAIL, &zb));
1641+
NULL, NULL, dmu_sync_done, dsa,
1642+
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
16411643

16421644
return (0);
16431645
}

module/zfs/dmu_objset.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
23+
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
2424
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
2525
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
2626
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -1110,9 +1110,9 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
11101110

11111111
zio = arc_write(pio, os->os_spa, tx->tx_txg,
11121112
os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
1113-
DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
1114-
NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
1115-
ZIO_FLAG_MUSTSUCCEED, &zb);
1113+
DMU_OS_IS_L2COMPRESSIBLE(os),
1114+
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
1115+
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
11161116

11171117
/*
11181118
* Sync special dnodes - the parent IO for the sync is the root block

module/zfs/dnode_sync.c

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24-
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24+
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
2525
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2626
*/
2727

@@ -60,20 +60,14 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
6060
dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
6161
dn->dn_object, dn->dn_phys->dn_nlevels);
6262

63-
/* check for existing blkptrs in the dnode */
64-
for (i = 0; i < nblkptr; i++)
65-
if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
66-
break;
67-
if (i != nblkptr) {
68-
/* transfer dnode's block pointers to new indirect block */
69-
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
70-
ASSERT(db->db.db_data);
71-
ASSERT(arc_released(db->db_buf));
72-
ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
73-
bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
74-
sizeof (blkptr_t) * nblkptr);
75-
arc_buf_freeze(db->db_buf);
76-
}
63+
/* transfer dnode's block pointers to new indirect block */
64+
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
65+
ASSERT(db->db.db_data);
66+
ASSERT(arc_released(db->db_buf));
67+
ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
68+
bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
69+
sizeof (blkptr_t) * nblkptr);
70+
arc_buf_freeze(db->db_buf);
7771

7872
/* set dbuf's parent pointers to new indirect buf */
7973
for (i = 0; i < nblkptr; i++) {

0 commit comments

Comments
 (0)