20
20
*/
21
21
/*
22
22
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
24
23
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25
25
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
26
26
* Copyright 2014 HybridCluster. All rights reserved.
27
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
28
27
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
29
28
*/
30
29
@@ -173,6 +172,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
173
172
return (0 );
174
173
}
175
174
175
+ /*
176
+ * Fill in the drr_free struct, or perform aggregation if the previous record is
177
+ * also a free record, and the two are adjacent.
178
+ *
179
+ * Note that we send free records even for a full send, because we want to be
180
+ * able to receive a full send as a clone, which requires a list of all the free
181
+ * and freeobject records that were generated on the source.
182
+ */
176
183
static int
177
184
dump_free (dmu_sendarg_t * dsp , uint64_t object , uint64_t offset ,
178
185
uint64_t length )
@@ -196,15 +203,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
196
203
(object == dsp -> dsa_last_data_object &&
197
204
offset > dsp -> dsa_last_data_offset ));
198
205
199
- /*
200
- * If we are doing a non-incremental send, then there can't
201
- * be any data in the dataset we're receiving into. Therefore
202
- * a free record would simply be a no-op. Save space by not
203
- * sending it to begin with.
204
- */
205
- if (!dsp -> dsa_incremental )
206
- return (0 );
207
-
208
206
if (length != -1ULL && offset + length < offset )
209
207
length = -1ULL ;
210
208
@@ -382,10 +380,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
382
380
{
383
381
struct drr_freeobjects * drrfo = & (dsp -> dsa_drr -> drr_u .drr_freeobjects );
384
382
385
- /* See comment in dump_free(). */
386
- if (!dsp -> dsa_incremental )
387
- return (0 );
388
-
389
383
/*
390
384
* If there is a pending op, but it's not PENDING_FREEOBJECTS,
391
385
* push it out, since free block aggregation can only be done for
@@ -796,6 +790,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
796
790
drr -> drr_u .drr_begin .drr_toguid = dsl_dataset_phys (to_ds )-> ds_guid ;
797
791
if (dsl_dataset_phys (to_ds )-> ds_flags & DS_FLAG_CI_DATASET )
798
792
drr -> drr_u .drr_begin .drr_flags |= DRR_FLAG_CI_DATA ;
793
+ drr -> drr_u .drr_begin .drr_flags |= DRR_FLAG_FREERECORDS ;
799
794
800
795
if (ancestor_zb != NULL ) {
801
796
drr -> drr_u .drr_begin .drr_fromguid =
@@ -818,7 +813,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
818
813
dsp -> dsa_off = off ;
819
814
dsp -> dsa_toguid = dsl_dataset_phys (to_ds )-> ds_guid ;
820
815
dsp -> dsa_pending_op = PENDING_NONE ;
821
- dsp -> dsa_incremental = (ancestor_zb != NULL );
822
816
dsp -> dsa_featureflags = featureflags ;
823
817
dsp -> dsa_resume_object = resumeobj ;
824
818
dsp -> dsa_resume_offset = resumeoff ;
@@ -1336,7 +1330,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
1336
1330
/* target fs already exists; recv into temp clone */
1337
1331
1338
1332
/* Can't recv a clone into an existing fs */
1339
- if (flags & DRR_FLAG_CLONE ) {
1333
+ if (flags & DRR_FLAG_CLONE || drba -> drba_origin ) {
1340
1334
dsl_dataset_rele (ds , FTAG );
1341
1335
return (SET_ERROR (EINVAL ));
1342
1336
}
@@ -1355,6 +1349,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
1355
1349
drba -> drba_origin ))
1356
1350
return (SET_ERROR (ENOENT ));
1357
1351
1352
+ /*
1353
+ * If we're receiving a full send as a clone, and it doesn't
1354
+ * contain all the necessary free records and freeobject
1355
+ * records, reject it.
1356
+ */
1357
+ if (fromguid == 0 && drba -> drba_origin &&
1358
+ !(flags & DRR_FLAG_FREERECORDS ))
1359
+ return (SET_ERROR (EINVAL ));
1360
+
1358
1361
/* Open the parent of tofs */
1359
1362
ASSERT3U (strlen (tofs ), < , MAXNAMELEN );
1360
1363
(void ) strlcpy (buf , tofs , strrchr (tofs , '/' ) - tofs + 1 );
@@ -1394,7 +1397,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
1394
1397
dsl_dataset_rele (ds , FTAG );
1395
1398
return (SET_ERROR (EINVAL ));
1396
1399
}
1397
- if (dsl_dataset_phys (origin )-> ds_guid != fromguid ) {
1400
+ if (dsl_dataset_phys (origin )-> ds_guid != fromguid &&
1401
+ fromguid != 0 ) {
1398
1402
dsl_dataset_rele (origin , FTAG );
1399
1403
dsl_dataset_rele (ds , FTAG );
1400
1404
return (SET_ERROR (ENODEV ));
@@ -1724,6 +1728,20 @@ struct receive_writer_arg {
1724
1728
uint64_t bytes_read ; /* bytes read when current record created */
1725
1729
};
1726
1730
1731
+ struct objlist {
1732
+ list_t list ; /* List of struct receive_objnode. */
1733
+ /*
1734
+ * Last object looked up. Used to assert that objects are being looked
1735
+ * up in ascending order.
1736
+ */
1737
+ uint64_t last_lookup ;
1738
+ };
1739
+
1740
+ struct receive_objnode {
1741
+ list_node_t node ;
1742
+ uint64_t object ;
1743
+ };
1744
+
1727
1745
struct receive_arg {
1728
1746
objset_t * os ;
1729
1747
vnode_t * vp ; /* The vnode to read the stream from */
@@ -1741,12 +1759,7 @@ struct receive_arg {
1741
1759
int err ;
1742
1760
boolean_t byteswap ;
1743
1761
/* Sorted list of objects not to issue prefetches for. */
1744
- list_t ignore_obj_list ;
1745
- };
1746
-
1747
- struct receive_ign_obj_node {
1748
- list_node_t node ;
1749
- uint64_t object ;
1762
+ struct objlist ignore_objlist ;
1750
1763
};
1751
1764
1752
1765
typedef struct guid_map_entry {
@@ -2063,13 +2076,14 @@ receive_freeobjects(struct receive_writer_arg *rwa,
2063
2076
struct drr_freeobjects * drrfo )
2064
2077
{
2065
2078
uint64_t obj ;
2079
+ int next_err = 0 ;
2066
2080
2067
2081
if (drrfo -> drr_firstobj + drrfo -> drr_numobjs < drrfo -> drr_firstobj )
2068
2082
return (SET_ERROR (EINVAL ));
2069
2083
2070
2084
for (obj = drrfo -> drr_firstobj == 0 ? 1 : drrfo -> drr_firstobj ;
2071
- obj < drrfo -> drr_firstobj + drrfo -> drr_numobjs ;
2072
- ( void ) dmu_object_next (rwa -> os , & obj , FALSE, 0 )) {
2085
+ obj < drrfo -> drr_firstobj + drrfo -> drr_numobjs && next_err == 0 ;
2086
+ next_err = dmu_object_next (rwa -> os , & obj , FALSE, 0 )) {
2073
2087
dmu_object_info_t doi ;
2074
2088
int err ;
2075
2089
@@ -2085,7 +2099,8 @@ receive_freeobjects(struct receive_writer_arg *rwa,
2085
2099
if (err != 0 )
2086
2100
return (err );
2087
2101
}
2088
-
2102
+ if (next_err != ESRCH )
2103
+ return (next_err );
2089
2104
return (0 );
2090
2105
}
2091
2106
@@ -2415,6 +2430,70 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
2415
2430
return (0 );
2416
2431
}
2417
2432
2433
+ static void
2434
+ objlist_create (struct objlist * list )
2435
+ {
2436
+ list_create (& list -> list , sizeof (struct receive_objnode ),
2437
+ offsetof(struct receive_objnode , node ));
2438
+ list -> last_lookup = 0 ;
2439
+ }
2440
+
2441
+ static void
2442
+ objlist_destroy (struct objlist * list )
2443
+ {
2444
+ struct receive_objnode * n ;
2445
+
2446
+ for (n = list_remove_head (& list -> list );
2447
+ n != NULL ; n = list_remove_head (& list -> list )) {
2448
+ kmem_free (n , sizeof (* n ));
2449
+ }
2450
+ list_destroy (& list -> list );
2451
+ }
2452
+
2453
+ /*
2454
+ * This function looks through the objlist to see if the specified object number
2455
+ * is contained in the objlist. In the process, it will remove all object
2456
+ * numbers in the list that are smaller than the specified object number. Thus,
2457
+ * any lookup of an object number smaller than a previously looked up object
2458
+ * number will always return false; therefore, all lookups should be done in
2459
+ * ascending order.
2460
+ */
2461
+ static boolean_t
2462
+ objlist_exists (struct objlist * list , uint64_t object )
2463
+ {
2464
+ struct receive_objnode * node = list_head (& list -> list );
2465
+ ASSERT3U (object , >=, list -> last_lookup );
2466
+ list -> last_lookup = object ;
2467
+ while (node != NULL && node -> object < object ) {
2468
+ VERIFY3P (node , = = , list_remove_head (& list -> list ));
2469
+ kmem_free (node , sizeof (* node ));
2470
+ node = list_head (& list -> list );
2471
+ }
2472
+ return (node != NULL && node -> object == object );
2473
+ }
2474
+
2475
+ /*
2476
+ * The objlist is a list of object numbers stored in ascending order. However,
2477
+ * the insertion of new object numbers does not seek out the correct location to
2478
+ * store a new object number; instead, it appends it to the list for simplicity.
2479
+ * Thus, any users must take care to only insert new object numbers in ascending
2480
+ * order.
2481
+ */
2482
+ static void
2483
+ objlist_insert (struct objlist * list , uint64_t object )
2484
+ {
2485
+ struct receive_objnode * node = kmem_zalloc (sizeof (* node ), KM_SLEEP );
2486
+ node -> object = object ;
2487
+ #ifdef ZFS_DEBUG
2488
+ {
2489
+ struct receive_objnode * last_object = list_tail (& list -> list );
2490
+ uint64_t last_objnum = (last_object != NULL ? last_object -> object : 0 );
2491
+ ASSERT3U (node -> object , > , last_objnum );
2492
+ }
2493
+ #endif
2494
+ list_insert_tail (& list -> list , node );
2495
+ }
2496
+
2418
2497
/*
2419
2498
* Issue the prefetch reads for any necessary indirect blocks.
2420
2499
*
@@ -2437,13 +2516,7 @@ static void
2437
2516
receive_read_prefetch (struct receive_arg * ra ,
2438
2517
uint64_t object , uint64_t offset , uint64_t length )
2439
2518
{
2440
- struct receive_ign_obj_node * node = list_head (& ra -> ignore_obj_list );
2441
- while (node != NULL && node -> object < object ) {
2442
- VERIFY3P (node , = = , list_remove_head (& ra -> ignore_obj_list ));
2443
- kmem_free (node , sizeof (* node ));
2444
- node = list_head (& ra -> ignore_obj_list );
2445
- }
2446
- if (node == NULL || node -> object > object ) {
2519
+ if (!objlist_exists (& ra -> ignore_objlist , object )) {
2447
2520
dmu_prefetch (ra -> os , object , 1 , offset , length ,
2448
2521
ZIO_PRIORITY_SYNC_READ );
2449
2522
}
@@ -2476,20 +2549,7 @@ receive_read_record(struct receive_arg *ra)
2476
2549
*/
2477
2550
if (err == ENOENT ||
2478
2551
(err == 0 && doi .doi_data_block_size != drro -> drr_blksz )) {
2479
- struct receive_ign_obj_node * node =
2480
- kmem_zalloc (sizeof (* node ),
2481
- KM_SLEEP );
2482
- node -> object = drro -> drr_object ;
2483
- #ifdef ZFS_DEBUG
2484
- {
2485
- struct receive_ign_obj_node * last_object =
2486
- list_tail (& ra -> ignore_obj_list );
2487
- uint64_t last_objnum = (last_object != NULL ?
2488
- last_object -> object : 0 );
2489
- ASSERT3U (node -> object , > , last_objnum );
2490
- }
2491
- #endif
2492
- list_insert_tail (& ra -> ignore_obj_list , node );
2552
+ objlist_insert (& ra -> ignore_objlist , drro -> drr_object );
2493
2553
err = 0 ;
2494
2554
}
2495
2555
return (err );
@@ -2706,7 +2766,6 @@ resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
2706
2766
return (0 );
2707
2767
}
2708
2768
2709
-
2710
2769
/*
2711
2770
* Read in the stream's records, one by one, and apply them to the pool. There
2712
2771
* are two threads involved; the thread that calls this function will spin up a
@@ -2727,7 +2786,6 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
2727
2786
struct receive_arg * ra ;
2728
2787
struct receive_writer_arg * rwa ;
2729
2788
int featureflags ;
2730
- struct receive_ign_obj_node * n ;
2731
2789
uint32_t payloadlen ;
2732
2790
void * payload ;
2733
2791
nvlist_t * begin_nvl = NULL ;
@@ -2746,8 +2804,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
2746
2804
sizeof (ra -> bytes_read ), 1 , & ra -> bytes_read );
2747
2805
}
2748
2806
2749
- list_create (& ra -> ignore_obj_list , sizeof (struct receive_ign_obj_node ),
2750
- offsetof(struct receive_ign_obj_node , node ));
2807
+ objlist_create (& ra -> ignore_objlist );
2751
2808
2752
2809
/* these were verified in dmu_recv_begin */
2753
2810
ASSERT3U (DMU_GET_STREAM_HDRTYPE (drc -> drc_drrb -> drr_versioninfo ), = = ,
@@ -2901,12 +2958,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
2901
2958
}
2902
2959
2903
2960
* voffp = ra -> voff ;
2904
-
2905
- for (n = list_remove_head (& ra -> ignore_obj_list ); n != NULL ;
2906
- n = list_remove_head (& ra -> ignore_obj_list )) {
2907
- kmem_free (n , sizeof (* n ));
2908
- }
2909
- list_destroy (& ra -> ignore_obj_list );
2961
+ objlist_destroy (& ra -> ignore_objlist );
2910
2962
kmem_free (ra , sizeof (* ra ));
2911
2963
kmem_free (rwa , sizeof (* rwa ));
2912
2964
return (err );
0 commit comments