Skip to content

Commit b62f7d3

Browse files
committed
Improve block cloning transactions accounting
Previous dmu_tx_count_clone() was broken, stating that cloning is similar to free. While they might be from some points, cloning is not net-free. It will likely consume space and memory, and unlike free it will do it no matter whether the destination has the blocks or not (usually not, so previous code did nothing). Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc.
1 parent b96f1a4 commit b62f7d3

File tree

5 files changed

+45
-13
lines changed

5 files changed

+45
-13
lines changed

include/sys/dmu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,7 @@ void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
856856
void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
857857
int len);
858858
void dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
859-
int len);
859+
uint64_t len, uint_t blksz);
860860
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
861861
uint64_t len);
862862
void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,

module/zfs/dbuf.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3912,7 +3912,8 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
39123912

39133913
ASSERT(blkid != DMU_BONUS_BLKID);
39143914
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
3915-
ASSERT3U(dn->dn_nlevels, >, level);
3915+
if (!fail_sparse)
3916+
ASSERT3U(dn->dn_nlevels, >, level);
39163917

39173918
*dbp = NULL;
39183919

module/zfs/dmu_tx.c

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <sys/dsl_pool.h>
3737
#include <sys/zap_impl.h>
3838
#include <sys/spa.h>
39+
#include <sys/brt_impl.h>
3940
#include <sys/sa.h>
4041
#include <sys/sa_impl.h>
4142
#include <sys/zfs_context.h>
@@ -547,17 +548,45 @@ dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
547548
}
548549

549550
static void
550-
dmu_tx_count_clone(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
551+
dmu_tx_count_clone(dmu_tx_hold_t *txh, uint64_t off, uint64_t len,
552+
uint_t blksz)
551553
{
554+
dmu_tx_t *tx = txh->txh_tx;
555+
dnode_t *dn = txh->txh_dnode;
556+
int err;
552557

553-
/*
554-
* Reuse dmu_tx_count_free(), it does exactly what we need for clone.
555-
*/
556-
dmu_tx_count_free(txh, off, len);
558+
ASSERT0(tx->tx_txg);
559+
ASSERT(dn->dn_indblkshift != 0);
560+
ASSERT(blksz != 0);
561+
ASSERT0(off % blksz);
562+
563+
(void) zfs_refcount_add_many(&txh->txh_memory_tohold,
564+
len / blksz * sizeof (brt_entry_t), FTAG);
565+
566+
int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
567+
uint64_t start = off / blksz >> shift;
568+
uint64_t end = (off + len) / blksz >> shift;
569+
570+
(void) zfs_refcount_add_many(&txh->txh_space_towrite,
571+
(end - start + 1) << dn->dn_indblkshift, FTAG);
572+
573+
zio_t *zio = zio_root(tx->tx_pool->dp_spa,
574+
NULL, NULL, ZIO_FLAG_CANFAIL);
575+
for (uint64_t i = start; i <= end; i++) {
576+
err = dmu_tx_check_ioerr(zio, dn, 1, i);
577+
if (err != 0) {
578+
tx->tx_err = err;
579+
break;
580+
}
581+
}
582+
err = zio_wait(zio);
583+
if (err != 0)
584+
tx->tx_err = err;
557585
}
558586

559587
void
560-
dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
588+
dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
589+
uint64_t len, uint_t blksz)
561590
{
562591
dmu_tx_hold_t *txh;
563592

@@ -567,7 +596,7 @@ dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
567596
txh = dmu_tx_hold_dnode_impl(tx, dn, THT_CLONE, off, len);
568597
if (txh != NULL) {
569598
dmu_tx_count_dnode(txh);
570-
dmu_tx_count_clone(txh, off, len);
599+
dmu_tx_count_clone(txh, off, len, blksz);
571600
}
572601
}
573602

module/zfs/zfs_vnops.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1828,7 +1828,8 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
18281828
dmu_tx_hold_sa(tx, outzp->z_sa_hdl, B_FALSE);
18291829
db = (dmu_buf_impl_t *)sa_get_db(outzp->z_sa_hdl);
18301830
DB_DNODE_ENTER(db);
1831-
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), outoff, size);
1831+
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), outoff, size,
1832+
inblksz);
18321833
DB_DNODE_EXIT(db);
18331834
zfs_sa_upgrade_txholds(tx, outzp);
18341835
error = dmu_tx_assign(tx, DMU_TX_WAIT);
@@ -1995,7 +1996,7 @@ zfs_clone_range_replay(znode_t *zp, uint64_t off, uint64_t len, uint64_t blksz,
19951996
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
19961997
db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
19971998
DB_DNODE_ENTER(db);
1998-
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), off, len);
1999+
dmu_tx_hold_clone_by_dnode(tx, DB_DNODE(db), off, len, blksz);
19992000
DB_DNODE_EXIT(db);
20002001
zfs_sa_upgrade_txholds(tx, zp);
20012002
error = dmu_tx_assign(tx, DMU_TX_WAIT);

module/zfs/zvol.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ zvol_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap)
577577
if (error != 0 || !zv->zv_dn)
578578
return (error);
579579
tx = dmu_tx_create(os);
580-
dmu_tx_hold_clone_by_dnode(tx, zv->zv_dn, off, len);
580+
dmu_tx_hold_clone_by_dnode(tx, zv->zv_dn, off, len, blksz);
581581
error = dmu_tx_assign(tx, DMU_TX_WAIT);
582582
if (error != 0) {
583583
dmu_tx_abort(tx);
@@ -742,7 +742,8 @@ zvol_clone_range(zvol_state_t *zv_src, uint64_t inoff, zvol_state_t *zv_dst,
742742
}
743743

744744
tx = dmu_tx_create(zv_dst->zv_objset);
745-
dmu_tx_hold_clone_by_dnode(tx, zv_dst->zv_dn, outoff, size);
745+
dmu_tx_hold_clone_by_dnode(tx, zv_dst->zv_dn, outoff, size,
746+
zv_src->zv_volblocksize);
746747
error = dmu_tx_assign(tx, DMU_TX_WAIT);
747748
if (error != 0) {
748749
dmu_tx_abort(tx);

0 commit comments

Comments
 (0)