@@ -169,6 +169,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
169
169
return (0 );
170
170
}
171
171
172
+ /*
173
+ * Fill in the drr_free struct, or perform aggregation if the previous record is
174
+ * also a free record, and the two are adjacent.
175
+ *
176
+ * Note that we send free records even for a full send, because we want to be
177
+ * able to receive a full send as a clone, which requires a list of all the free
178
+ * and freeobject records that were generated on the source.
179
+ */
172
180
static int
173
181
dump_free (dmu_sendarg_t * dsp , uint64_t object , uint64_t offset ,
174
182
uint64_t length )
@@ -192,15 +200,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
192
200
(object == dsp -> dsa_last_data_object &&
193
201
offset > dsp -> dsa_last_data_offset ));
194
202
195
- /*
196
- * If we are doing a non-incremental send, then there can't
197
- * be any data in the dataset we're receiving into. Therefore
198
- * a free record would simply be a no-op. Save space by not
199
- * sending it to begin with.
200
- */
201
- if (!dsp -> dsa_incremental )
202
- return (0 );
203
-
204
203
if (length != -1ULL && offset + length < offset )
205
204
length = -1ULL ;
206
205
@@ -378,10 +377,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
378
377
{
379
378
struct drr_freeobjects * drrfo = & (dsp -> dsa_drr -> drr_u .drr_freeobjects );
380
379
381
- /* See comment in dump_free(). */
382
- if (!dsp -> dsa_incremental )
383
- return (0 );
384
-
385
380
/*
386
381
* If there is a pending op, but it's not PENDING_FREEOBJECTS,
387
382
* push it out, since free block aggregation can only be done for
@@ -787,6 +782,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
787
782
drr -> drr_u .drr_begin .drr_toguid = dsl_dataset_phys (to_ds )-> ds_guid ;
788
783
if (dsl_dataset_phys (to_ds )-> ds_flags & DS_FLAG_CI_DATASET )
789
784
drr -> drr_u .drr_begin .drr_flags |= DRR_FLAG_CI_DATA ;
785
+ drr -> drr_u .drr_begin .drr_flags |= DRR_FLAG_FREERECORDS ;
790
786
791
787
if (ancestor_zb != NULL ) {
792
788
drr -> drr_u .drr_begin .drr_fromguid =
@@ -809,7 +805,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
809
805
dsp -> dsa_off = off ;
810
806
dsp -> dsa_toguid = dsl_dataset_phys (to_ds )-> ds_guid ;
811
807
dsp -> dsa_pending_op = PENDING_NONE ;
812
- dsp -> dsa_incremental = (ancestor_zb != NULL );
813
808
dsp -> dsa_featureflags = featureflags ;
814
809
dsp -> dsa_resume_object = resumeobj ;
815
810
dsp -> dsa_resume_offset = resumeoff ;
@@ -1319,7 +1314,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
1319
1314
/* target fs already exists; recv into temp clone */
1320
1315
1321
1316
/* Can't recv a clone into an existing fs */
1322
- if (flags & DRR_FLAG_CLONE ) {
1317
+ if (flags & DRR_FLAG_CLONE || drba -> drba_origin ) {
1323
1318
dsl_dataset_rele (ds , FTAG );
1324
1319
return (SET_ERROR (EINVAL ));
1325
1320
}
@@ -1338,6 +1333,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
1338
1333
drba -> drba_origin ))
1339
1334
return (SET_ERROR (ENOENT ));
1340
1335
1336
+ /*
1337
+ * If we're receiving a full send as a clone, and it doesn't
1338
+ * contain all the necessary free records and freeobject
1339
+ * records, reject it.
1340
+ */
1341
+ if (fromguid == 0 && drba -> drba_origin &&
1342
+ !(flags & DRR_FLAG_FREERECORDS ))
1343
+ return (SET_ERROR (EINVAL ));
1344
+
1341
1345
/* Open the parent of tofs */
1342
1346
ASSERT3U (strlen (tofs ), < , MAXNAMELEN );
1343
1347
(void ) strlcpy (buf , tofs , strrchr (tofs , '/' ) - tofs + 1 );
@@ -1377,7 +1381,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
1377
1381
dsl_dataset_rele (ds , FTAG );
1378
1382
return (SET_ERROR (EINVAL ));
1379
1383
}
1380
- if (dsl_dataset_phys (origin )-> ds_guid != fromguid ) {
1384
+ if (dsl_dataset_phys (origin )-> ds_guid != fromguid &&
1385
+ fromguid != 0 ) {
1381
1386
dsl_dataset_rele (origin , FTAG );
1382
1387
dsl_dataset_rele (ds , FTAG );
1383
1388
return (SET_ERROR (ENODEV ));
@@ -1706,6 +1711,20 @@ struct receive_writer_arg {
1706
1711
uint64_t bytes_read ; /* bytes read when current record created */
1707
1712
};
1708
1713
1714
+ struct objlist {
1715
+ list_t list ; /* List of struct receive_objnode. */
1716
+ /*
1717
+ * Last object looked up. Used to assert that objects are being looked
1718
+ * up in ascending order.
1719
+ */
1720
+ uint64_t last_lookup ;
1721
+ };
1722
+
1723
+ struct receive_objnode {
1724
+ list_node_t node ;
1725
+ uint64_t object ;
1726
+ };
1727
+
1709
1728
struct receive_arg {
1710
1729
objset_t * os ;
1711
1730
vnode_t * vp ; /* The vnode to read the stream from */
@@ -1723,12 +1742,7 @@ struct receive_arg {
1723
1742
int err ;
1724
1743
boolean_t byteswap ;
1725
1744
/* Sorted list of objects not to issue prefetches for. */
1726
- list_t ignore_obj_list ;
1727
- };
1728
-
1729
- struct receive_ign_obj_node {
1730
- list_node_t node ;
1731
- uint64_t object ;
1745
+ struct objlist ignore_objlist ;
1732
1746
};
1733
1747
1734
1748
typedef struct guid_map_entry {
@@ -2042,13 +2056,14 @@ receive_freeobjects(struct receive_writer_arg *rwa,
2042
2056
struct drr_freeobjects * drrfo )
2043
2057
{
2044
2058
uint64_t obj ;
2059
+ int next_err = 0 ;
2045
2060
2046
2061
if (drrfo -> drr_firstobj + drrfo -> drr_numobjs < drrfo -> drr_firstobj )
2047
2062
return (SET_ERROR (EINVAL ));
2048
2063
2049
2064
for (obj = drrfo -> drr_firstobj ;
2050
- obj < drrfo -> drr_firstobj + drrfo -> drr_numobjs ;
2051
- ( void ) dmu_object_next (rwa -> os , & obj , FALSE, 0 )) {
2065
+ obj < drrfo -> drr_firstobj + drrfo -> drr_numobjs && next_err == 0 ;
2066
+ next_err = dmu_object_next (rwa -> os , & obj , FALSE, 0 )) {
2052
2067
int err ;
2053
2068
2054
2069
if (dmu_object_info (rwa -> os , obj , NULL ) != 0 )
@@ -2058,7 +2073,8 @@ receive_freeobjects(struct receive_writer_arg *rwa,
2058
2073
if (err != 0 )
2059
2074
return (err );
2060
2075
}
2061
-
2076
+ if (next_err != ESRCH )
2077
+ return (next_err );
2062
2078
return (0 );
2063
2079
}
2064
2080
@@ -2389,6 +2405,72 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
2389
2405
return (0 );
2390
2406
}
2391
2407
2408
+ static void
2409
+ objlist_create (struct objlist * list )
2410
+ {
2411
+ list_create (& list -> list , sizeof (struct receive_objnode ),
2412
+ offsetof(struct receive_objnode , node ));
2413
+ list -> last_lookup = 0 ;
2414
+ }
2415
+
2416
+ static void
2417
+ objlist_destroy (struct objlist * list )
2418
+ {
2419
+ struct receive_objnode * n ;
2420
+
2421
+ for (n = list_remove_head (& list -> list );
2422
+ n != NULL ; n = list_remove_head (& list -> list )) {
2423
+ kmem_free (n , sizeof (* n ));
2424
+ }
2425
+ list_destroy (& list -> list );
2426
+ }
2427
+
2428
+ /*
2429
+ * This function looks through the objlist to see if the specified object number
2430
+ * is contained in the objlist. In the process, it will remove all object
2431
+ * numbers in the list that are smaller than the specified object number. Thus,
2432
+ * any lookup of an object number smaller than a previously looked up object
2433
+ * number will always return false; therefore, all lookups should be done in
2434
+ * ascending order.
2435
+ */
2436
+ static boolean_t
2437
+ objlist_exists (struct objlist * list , uint64_t object )
2438
+ {
2439
+ struct receive_objnode * node = list_head (& list -> list );
2440
+ ASSERT3U (object , >=, list -> last_lookup );
2441
+ list -> last_lookup = object ;
2442
+ while (node != NULL && node -> object < object ) {
2443
+ VERIFY3P (node , = = , list_remove_head (& list -> list ));
2444
+ kmem_free (node , sizeof (* node ));
2445
+ node = list_head (& list -> list );
2446
+ }
2447
+ return (node != NULL && node -> object == object );
2448
+ }
2449
+
2450
+ /*
2451
+ * The objlist is a list of object numbers stored in ascending order. However,
2452
+ * the insertion of new object numbers does not seek out the correct location to
2453
+ * store a new object number; instead, it appends it to the list for simplicity.
2454
+ * Thus, any users must take care to only insert new object numbers in ascending
2455
+ * order.
2456
+ */
2457
+ static void
2458
+ objlist_insert (struct objlist * list , uint64_t object )
2459
+ {
2460
+ struct receive_objnode * node ;
2461
+ node = kmem_zalloc (sizeof (node ), KM_SLEEP );
2462
+ node -> object = object ;
2463
+ #ifdef ZFS_DEBUG
2464
+ struct receive_objnode * last_object ;
2465
+ uint64_t last_objnum ;
2466
+
2467
+ last_object = list_tail (& list -> list );
2468
+ last_objnum = (last_object != NULL ? last_object -> object : 0 );
2469
+ ASSERT3U (node -> object , > , last_objnum );
2470
+ #endif
2471
+ list_insert_tail (& list -> list , node );
2472
+ }
2473
+
2392
2474
/*
2393
2475
* Issue the prefetch reads for any necessary indirect blocks.
2394
2476
*
@@ -2411,13 +2493,7 @@ static void
2411
2493
receive_read_prefetch (struct receive_arg * ra ,
2412
2494
uint64_t object , uint64_t offset , uint64_t length )
2413
2495
{
2414
- struct receive_ign_obj_node * node = list_head (& ra -> ignore_obj_list );
2415
- while (node != NULL && node -> object < object ) {
2416
- VERIFY3P (node , = = , list_remove_head (& ra -> ignore_obj_list ));
2417
- kmem_free (node , sizeof (* node ));
2418
- node = list_head (& ra -> ignore_obj_list );
2419
- }
2420
- if (node == NULL || node -> object > object ) {
2496
+ if (!objlist_exists (& ra -> ignore_objlist , object )) {
2421
2497
dmu_prefetch (ra -> os , object , 1 , offset , length ,
2422
2498
ZIO_PRIORITY_SYNC_READ );
2423
2499
}
@@ -2450,20 +2526,7 @@ receive_read_record(struct receive_arg *ra)
2450
2526
*/
2451
2527
if (err == ENOENT ||
2452
2528
(err == 0 && doi .doi_data_block_size != drro -> drr_blksz )) {
2453
- struct receive_ign_obj_node * node =
2454
- kmem_zalloc (sizeof (* node ),
2455
- KM_SLEEP );
2456
- node -> object = drro -> drr_object ;
2457
- #ifdef ZFS_DEBUG
2458
- {
2459
- struct receive_ign_obj_node * last_object =
2460
- list_tail (& ra -> ignore_obj_list );
2461
- uint64_t last_objnum = (last_object != NULL ?
2462
- last_object -> object : 0 );
2463
- ASSERT3U (node -> object , > , last_objnum );
2464
- }
2465
- #endif
2466
- list_insert_tail (& ra -> ignore_obj_list , node );
2529
+ objlist_insert (& ra -> ignore_objlist , drro -> drr_object );
2467
2530
err = 0 ;
2468
2531
}
2469
2532
return (err );
@@ -2680,7 +2743,6 @@ resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
2680
2743
return (0 );
2681
2744
}
2682
2745
2683
-
2684
2746
/*
2685
2747
* Read in the stream's records, one by one, and apply them to the pool. There
2686
2748
* are two threads involved; the thread that calls this function will spin up a
@@ -2701,7 +2763,6 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
2701
2763
struct receive_arg ra = { 0 };
2702
2764
struct receive_writer_arg rwa = { 0 };
2703
2765
int featureflags ;
2704
- struct receive_ign_obj_node * n ;
2705
2766
uint32_t payloadlen ;
2706
2767
void * payload ;
2707
2768
nvlist_t * begin_nvl = NULL ;
@@ -2717,8 +2778,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
2717
2778
sizeof (ra .bytes_read ), 1 , & ra .bytes_read );
2718
2779
}
2719
2780
2720
- list_create (& ra .ignore_obj_list , sizeof (struct receive_ign_obj_node ),
2721
- offsetof(struct receive_ign_obj_node , node ));
2781
+ objlist_create (& ra .ignore_objlist );
2722
2782
2723
2783
/* these were verified in dmu_recv_begin */
2724
2784
ASSERT3U (DMU_GET_STREAM_HDRTYPE (drc -> drc_drrb -> drr_versioninfo ), = = ,
@@ -2873,11 +2933,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
2873
2933
2874
2934
* voffp = ra .voff ;
2875
2935
2876
- for (n = list_remove_head (& ra .ignore_obj_list ); n != NULL ;
2877
- n = list_remove_head (& ra .ignore_obj_list )) {
2878
- kmem_free (n , sizeof (* n ));
2879
- }
2880
- list_destroy (& ra .ignore_obj_list );
2936
+ objlist_destroy (& ra .ignore_objlist );
2881
2937
return (err );
2882
2938
}
2883
2939
0 commit comments