@@ -214,9 +214,10 @@ raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
214
214
}
215
215
216
216
217
- #define raidz_copy (dabd , sabd , size ) \
217
+ #define raidz_copy (dabd , sabd , off , size ) \
218
218
{ \
219
- abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
219
+ abd_iterate_func2(dabd, sabd, off, off, size, raidz_copy_abd_cb, \
220
+ NULL); \
220
221
}
221
222
222
223
/*
@@ -254,9 +255,10 @@ raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
254
255
return (0 );
255
256
}
256
257
257
- #define raidz_add (dabd , sabd , size ) \
258
+ #define raidz_add (dabd , sabd , off , size ) \
258
259
{ \
259
- abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
260
+ abd_iterate_func2(dabd, sabd, off, off, size, raidz_add_abd_cb, \
261
+ NULL); \
260
262
}
261
263
262
264
/*
@@ -343,7 +345,10 @@ raidz_mul_abd_cb(void *dc, size_t size, void *private)
343
345
* the parity/syndrome if data column is shorter.
344
346
*
345
347
* P parity is calculated using raidz_add_abd().
348
+ *
349
+ * For CPU L2 cache blocking we process 64KB at a time.
346
350
*/
351
+ #define BLOCK 65536
347
352
348
353
/*
349
354
* Generate P parity (RAIDZ1)
@@ -357,20 +362,26 @@ raidz_generate_p_impl(raidz_row_t * const rr)
357
362
const size_t ncols = rr -> rr_cols ;
358
363
const size_t psize = rr -> rr_col [CODE_P ].rc_size ;
359
364
abd_t * pabd = rr -> rr_col [CODE_P ].rc_abd ;
360
- size_t size ;
361
- abd_t * dabd ;
365
+ size_t off , size ;
362
366
363
367
raidz_math_begin ();
364
368
365
- /* start with first data column */
366
- raidz_copy (pabd , rr -> rr_col [1 ].rc_abd , psize );
369
+ for (off = 0 ; off < psize ; off += BLOCK ) {
367
370
368
- for ( c = 2 ; c < ncols ; c ++ ) {
369
- dabd = rr -> rr_col [ c ]. rc_abd ;
370
- size = rr -> rr_col [c ]. rc_size ;
371
+ /* start with first data column */
372
+ size = MIN ( BLOCK , psize - off ) ;
373
+ raidz_copy ( pabd , rr -> rr_col [1 ]. rc_abd , off , size ) ;
371
374
372
- /* add data column */
373
- raidz_add (pabd , dabd , size );
375
+ for (c = 2 ; c < ncols ; c ++ ) {
376
+ size = rr -> rr_col [c ].rc_size ;
377
+ if (size <= off )
378
+ continue ;
379
+
380
+ /* add data column */
381
+ size = MIN (BLOCK , size - off );
382
+ abd_t * dabd = rr -> rr_col [c ].rc_abd ;
383
+ raidz_add (pabd , dabd , off , size );
384
+ }
374
385
}
375
386
376
387
raidz_math_end ();
@@ -423,7 +434,7 @@ raidz_generate_pq_impl(raidz_row_t * const rr)
423
434
size_t c ;
424
435
const size_t ncols = rr -> rr_cols ;
425
436
const size_t csize = rr -> rr_col [CODE_P ].rc_size ;
426
- size_t dsize ;
437
+ size_t off , size , dsize ;
427
438
abd_t * dabd ;
428
439
abd_t * cabds [] = {
429
440
rr -> rr_col [CODE_P ].rc_abd ,
@@ -432,15 +443,20 @@ raidz_generate_pq_impl(raidz_row_t * const rr)
432
443
433
444
raidz_math_begin ();
434
445
435
- raidz_copy (cabds [CODE_P ], rr -> rr_col [2 ].rc_abd , csize );
436
- raidz_copy (cabds [CODE_Q ], rr -> rr_col [2 ].rc_abd , csize );
446
+ for (off = 0 ; off < csize ; off += BLOCK ) {
447
+
448
+ size = MIN (BLOCK , csize - off );
449
+ raidz_copy (cabds [CODE_P ], rr -> rr_col [2 ].rc_abd , off , size );
450
+ raidz_copy (cabds [CODE_Q ], rr -> rr_col [2 ].rc_abd , off , size );
437
451
438
- for (c = 3 ; c < ncols ; c ++ ) {
439
- dabd = rr -> rr_col [c ].rc_abd ;
440
- dsize = rr -> rr_col [c ].rc_size ;
452
+ for (c = 3 ; c < ncols ; c ++ ) {
453
+ dabd = rr -> rr_col [c ].rc_abd ;
454
+ dsize = rr -> rr_col [c ].rc_size ;
455
+ dsize = (dsize > off ) ? MIN (BLOCK , dsize - off ) : 0 ;
441
456
442
- abd_raidz_gen_iterate (cabds , dabd , csize , dsize , 2 ,
443
- raidz_gen_pq_add );
457
+ abd_raidz_gen_iterate (cabds , dabd , off , size , dsize , 2 ,
458
+ raidz_gen_pq_add );
459
+ }
444
460
}
445
461
446
462
raidz_math_end ();
@@ -496,7 +512,7 @@ raidz_generate_pqr_impl(raidz_row_t * const rr)
496
512
size_t c ;
497
513
const size_t ncols = rr -> rr_cols ;
498
514
const size_t csize = rr -> rr_col [CODE_P ].rc_size ;
499
- size_t dsize ;
515
+ size_t off , size , dsize ;
500
516
abd_t * dabd ;
501
517
abd_t * cabds [] = {
502
518
rr -> rr_col [CODE_P ].rc_abd ,
@@ -506,16 +522,21 @@ raidz_generate_pqr_impl(raidz_row_t * const rr)
506
522
507
523
raidz_math_begin ();
508
524
509
- raidz_copy (cabds [CODE_P ], rr -> rr_col [3 ].rc_abd , csize );
510
- raidz_copy (cabds [CODE_Q ], rr -> rr_col [3 ].rc_abd , csize );
511
- raidz_copy (cabds [CODE_R ], rr -> rr_col [3 ].rc_abd , csize );
525
+ for (off = 0 ; off < csize ; off += BLOCK ) {
512
526
513
- for (c = 4 ; c < ncols ; c ++ ) {
514
- dabd = rr -> rr_col [c ].rc_abd ;
515
- dsize = rr -> rr_col [c ].rc_size ;
527
+ size = MIN (BLOCK , csize - off );
528
+ raidz_copy (cabds [CODE_P ], rr -> rr_col [3 ].rc_abd , off , size );
529
+ raidz_copy (cabds [CODE_Q ], rr -> rr_col [3 ].rc_abd , off , size );
530
+ raidz_copy (cabds [CODE_R ], rr -> rr_col [3 ].rc_abd , off , size );
531
+
532
+ for (c = 4 ; c < ncols ; c ++ ) {
533
+ dabd = rr -> rr_col [c ].rc_abd ;
534
+ dsize = rr -> rr_col [c ].rc_size ;
535
+ dsize = (dsize > off ) ? MIN (BLOCK , dsize - off ) : 0 ;
516
536
517
- abd_raidz_gen_iterate (cabds , dabd , csize , dsize , 3 ,
518
- raidz_gen_pqr_add );
537
+ abd_raidz_gen_iterate (cabds , dabd , off , size , dsize , 3 ,
538
+ raidz_gen_pqr_add );
539
+ }
519
540
}
520
541
521
542
raidz_math_end ();
@@ -592,26 +613,31 @@ raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
592
613
const size_t x = tgtidx [TARGET_X ];
593
614
const size_t xsize = rr -> rr_col [x ].rc_size ;
594
615
abd_t * xabd = rr -> rr_col [x ].rc_abd ;
595
- size_t size ;
596
- abd_t * dabd ;
616
+ size_t off , size ;
597
617
598
618
if (xabd == NULL )
599
619
return (1 << CODE_P );
600
620
601
621
raidz_math_begin ();
602
622
603
- /* copy P into target */
604
- raidz_copy (xabd , rr -> rr_col [CODE_P ].rc_abd , xsize );
623
+ for (off = 0 ; off < xsize ; off += BLOCK ) {
605
624
606
- /* generate p_syndrome */
607
- for (c = firstdc ; c < ncols ; c ++ ) {
608
- if (c == x )
609
- continue ;
625
+ /* copy P into target */
626
+ size = MIN (BLOCK , xsize - off );
627
+ raidz_copy (xabd , rr -> rr_col [CODE_P ].rc_abd , off , size );
610
628
611
- dabd = rr -> rr_col [c ].rc_abd ;
612
- size = MIN (rr -> rr_col [c ].rc_size , xsize );
629
+ /* generate p_syndrome */
630
+ for (c = firstdc ; c < ncols ; c ++ ) {
631
+ if (c == x )
632
+ continue ;
633
+ size = rr -> rr_col [c ].rc_size ;
634
+ if (size <= off )
635
+ continue ;
613
636
614
- raidz_add (xabd , dabd , size );
637
+ size = MIN (BLOCK , MIN (size , xsize ) - off );
638
+ abd_t * dabd = rr -> rr_col [c ].rc_abd ;
639
+ raidz_add (xabd , dabd , off , size );
640
+ }
615
641
}
616
642
617
643
raidz_math_end ();
@@ -683,7 +709,7 @@ raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
683
709
684
710
/* Start with first data column if present */
685
711
if (firstdc != x ) {
686
- raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , xsize );
712
+ raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
687
713
} else {
688
714
raidz_zero (xabd , xsize );
689
715
}
@@ -698,12 +724,12 @@ raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
698
724
dsize = rr -> rr_col [c ].rc_size ;
699
725
}
700
726
701
- abd_raidz_gen_iterate (tabds , dabd , xsize , dsize , 1 ,
727
+ abd_raidz_gen_iterate (tabds , dabd , 0 , xsize , dsize , 1 ,
702
728
raidz_syn_q_abd );
703
729
}
704
730
705
731
/* add Q to the syndrome */
706
- raidz_add (xabd , rr -> rr_col [CODE_Q ].rc_abd , xsize );
732
+ raidz_add (xabd , rr -> rr_col [CODE_Q ].rc_abd , 0 , xsize );
707
733
708
734
/* transform the syndrome */
709
735
abd_iterate_func (xabd , 0 , xsize , raidz_mul_abd_cb , (void * ) coeff );
@@ -777,7 +803,7 @@ raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
777
803
778
804
/* Start with first data column if present */
779
805
if (firstdc != x ) {
780
- raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , xsize );
806
+ raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
781
807
} else {
782
808
raidz_zero (xabd , xsize );
783
809
}
@@ -793,12 +819,12 @@ raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
793
819
dsize = rr -> rr_col [c ].rc_size ;
794
820
}
795
821
796
- abd_raidz_gen_iterate (tabds , dabd , xsize , dsize , 1 ,
822
+ abd_raidz_gen_iterate (tabds , dabd , 0 , xsize , dsize , 1 ,
797
823
raidz_syn_r_abd );
798
824
}
799
825
800
826
/* add R to the syndrome */
801
- raidz_add (xabd , rr -> rr_col [CODE_R ].rc_abd , xsize );
827
+ raidz_add (xabd , rr -> rr_col [CODE_R ].rc_abd , 0 , xsize );
802
828
803
829
/* transform the syndrome */
804
830
abd_iterate_func (xabd , 0 , xsize , raidz_mul_abd_cb , (void * )coeff );
@@ -934,8 +960,8 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
934
960
935
961
/* Start with first data column if present */
936
962
if (firstdc != x ) {
937
- raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , xsize );
938
- raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , xsize );
963
+ raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
964
+ raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
939
965
} else {
940
966
raidz_zero (xabd , xsize );
941
967
raidz_zero (yabd , xsize );
@@ -951,15 +977,15 @@ raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
951
977
dsize = rr -> rr_col [c ].rc_size ;
952
978
}
953
979
954
- abd_raidz_gen_iterate (tabds , dabd , xsize , dsize , 2 ,
980
+ abd_raidz_gen_iterate (tabds , dabd , 0 , xsize , dsize , 2 ,
955
981
raidz_syn_pq_abd );
956
982
}
957
983
958
984
abd_raidz_rec_iterate (cabds , tabds , xsize , 2 , raidz_rec_pq_abd , coeff );
959
985
960
986
/* Copy shorter targets back to the original abd buffer */
961
987
if (ysize < xsize )
962
- raidz_copy (rr -> rr_col [y ].rc_abd , yabd , ysize );
988
+ raidz_copy (rr -> rr_col [y ].rc_abd , yabd , 0 , ysize );
963
989
964
990
raidz_math_end ();
965
991
@@ -1094,8 +1120,8 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1094
1120
1095
1121
/* Start with first data column if present */
1096
1122
if (firstdc != x ) {
1097
- raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , xsize );
1098
- raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , xsize );
1123
+ raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
1124
+ raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
1099
1125
} else {
1100
1126
raidz_zero (xabd , xsize );
1101
1127
raidz_zero (yabd , xsize );
@@ -1111,7 +1137,7 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1111
1137
dsize = rr -> rr_col [c ].rc_size ;
1112
1138
}
1113
1139
1114
- abd_raidz_gen_iterate (tabds , dabd , xsize , dsize , 2 ,
1140
+ abd_raidz_gen_iterate (tabds , dabd , 0 , xsize , dsize , 2 ,
1115
1141
raidz_syn_pr_abd );
1116
1142
}
1117
1143
@@ -1121,7 +1147,7 @@ raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1121
1147
* Copy shorter targets back to the original abd buffer
1122
1148
*/
1123
1149
if (ysize < xsize )
1124
- raidz_copy (rr -> rr_col [y ].rc_abd , yabd , ysize );
1150
+ raidz_copy (rr -> rr_col [y ].rc_abd , yabd , 0 , ysize );
1125
1151
1126
1152
raidz_math_end ();
1127
1153
@@ -1261,8 +1287,8 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1261
1287
1262
1288
/* Start with first data column if present */
1263
1289
if (firstdc != x ) {
1264
- raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , xsize );
1265
- raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , xsize );
1290
+ raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
1291
+ raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
1266
1292
} else {
1267
1293
raidz_zero (xabd , xsize );
1268
1294
raidz_zero (yabd , xsize );
@@ -1278,7 +1304,7 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1278
1304
dsize = rr -> rr_col [c ].rc_size ;
1279
1305
}
1280
1306
1281
- abd_raidz_gen_iterate (tabds , dabd , xsize , dsize , 2 ,
1307
+ abd_raidz_gen_iterate (tabds , dabd , 0 , xsize , dsize , 2 ,
1282
1308
raidz_syn_qr_abd );
1283
1309
}
1284
1310
@@ -1288,7 +1314,7 @@ raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1288
1314
* Copy shorter targets back to the original abd buffer
1289
1315
*/
1290
1316
if (ysize < xsize )
1291
- raidz_copy (rr -> rr_col [y ].rc_abd , yabd , ysize );
1317
+ raidz_copy (rr -> rr_col [y ].rc_abd , yabd , 0 , ysize );
1292
1318
1293
1319
raidz_math_end ();
1294
1320
@@ -1456,9 +1482,9 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1456
1482
1457
1483
/* Start with first data column if present */
1458
1484
if (firstdc != x ) {
1459
- raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , xsize );
1460
- raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , xsize );
1461
- raidz_copy (zabd , rr -> rr_col [firstdc ].rc_abd , xsize );
1485
+ raidz_copy (xabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
1486
+ raidz_copy (yabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
1487
+ raidz_copy (zabd , rr -> rr_col [firstdc ].rc_abd , 0 , xsize );
1462
1488
} else {
1463
1489
raidz_zero (xabd , xsize );
1464
1490
raidz_zero (yabd , xsize );
@@ -1475,7 +1501,7 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1475
1501
dsize = rr -> rr_col [c ].rc_size ;
1476
1502
}
1477
1503
1478
- abd_raidz_gen_iterate (tabds , dabd , xsize , dsize , 3 ,
1504
+ abd_raidz_gen_iterate (tabds , dabd , 0 , xsize , dsize , 3 ,
1479
1505
raidz_syn_pqr_abd );
1480
1506
}
1481
1507
@@ -1485,9 +1511,9 @@ raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1485
1511
* Copy shorter targets back to the original abd buffer
1486
1512
*/
1487
1513
if (ysize < xsize )
1488
- raidz_copy (rr -> rr_col [y ].rc_abd , yabd , ysize );
1514
+ raidz_copy (rr -> rr_col [y ].rc_abd , yabd , 0 , ysize );
1489
1515
if (zsize < xsize )
1490
- raidz_copy (rr -> rr_col [z ].rc_abd , zabd , zsize );
1516
+ raidz_copy (rr -> rr_col [z ].rc_abd , zabd , 0 , zsize );
1491
1517
1492
1518
raidz_math_end ();
1493
1519
0 commit comments