Skip to content

Commit 998c33e

Browse files
committed
[VPlan] Mark FirstOrderRecurrenceSplice as not having side-effects.
Now that FOR exit and resume value creation is explicitly modeled in VPlan (05e1b53, 07b3301) it doesn't depend on the first order recurrence splice being preserved and it can now be marked as not having side-effects. This allows removal of first-order-recurrence-splce if the FOR is only used in the exit or as scalar ph resume value.
1 parent 643e471 commit 998c33e

File tree

9 files changed

+42
-37
lines changed

9 files changed

+42
-37
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
138138
case VPInstruction::CalculateTripCountMinusVF:
139139
case VPInstruction::CanonicalIVIncrementForPart:
140140
case VPInstruction::ExtractFromEnd:
141+
case VPInstruction::FirstOrderRecurrenceSplice:
141142
case VPInstruction::LogicalAnd:
142143
case VPInstruction::PtrAdd:
143144
return false;

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
120120
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1
121121
; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64>
122122
; CHECK-NEXT: [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64>
123-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[TMP8]], <2 x i32> <i32 1, i32 2>
124123
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
125124
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
126125
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -184,7 +183,6 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
184183
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
185184
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
186185
; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64>
187-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
188186
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
189187
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
190188
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,6 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur
776776
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
777777
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
778778
; CHECK-NEXT: [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
779-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP4]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
780779
; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
781780
; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
782781
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
@@ -860,7 +859,6 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
860859
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
861860
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
862861
; CHECK-NEXT: [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
863-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP6]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
864862
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP6]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
865863
; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP8]] to <16 x i8>
866864
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]

llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,6 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C
234234
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
235235
; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <16 x i64> zeroinitializer, [[VEC_IND]]
236236
; CHECK-NEXT: [[TMP1]] = sub nsw <16 x i64> zeroinitializer, [[STEP_ADD]]
237-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VECTOR_RECUR]], <16 x i64> [[TMP0]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
238237
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i64> [[TMP1]], i32 15
239238
; CHECK-NEXT: store i64 [[TMP4]], ptr [[GEP]], align 8
240239
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32

llvm/test/Transforms/LoopVectorize/X86/pr72969.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ define void @test(ptr %p) {
7777
; VEC-NEXT: store i64 0, ptr [[TMP26]], align 8
7878
; VEC-NEXT: [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
7979
; VEC-NEXT: [[TMP28]] = zext <4 x i16> [[TMP27]] to <4 x i64>
80-
; VEC-NEXT: [[TMP29:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP28]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
8180
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
8281
; VEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
8382
; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,6 @@ define ptr @test_first_order_recurrences_and_pointer_induction1(ptr %ptr) {
560560
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ <ptr poison, ptr poison, ptr poison, ptr null>, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
561561
; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
562562
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
563-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[VECTOR_RECUR]], <4 x ptr> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
564563
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[TMP1]]
565564
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0
566565
; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8
@@ -604,7 +603,6 @@ define ptr @test_first_order_recurrences_and_pointer_induction2(ptr %ptr) {
604603
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ <ptr poison, ptr poison, ptr poison, ptr null>, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
605604
; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
606605
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
607-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[VECTOR_RECUR]], <4 x ptr> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
608606
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[TMP1]]
609607
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0
610608
; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8
@@ -653,7 +651,6 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
653651
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0
654652
; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer
655653
; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
656-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
657654
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
658655
; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8
659656
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -220,22 +220,54 @@ exit:
220220
define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) {
221221
; CHECK-LABEL: @test_pr54233_for_depend_on_each_other(
222222
; CHECK-NEXT: entry:
223+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
224+
; CHECK: vector.ph:
225+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
226+
; CHECK: vector.body:
227+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
228+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
229+
; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
230+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
231+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4
232+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
233+
; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
234+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
235+
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], <i32 10, i32 10, i32 10, i32 10>
236+
; CHECK-NEXT: [[TMP4]] = xor <4 x i32> <i32 12, i32 12, i32 12, i32 12>, [[TMP2]]
237+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
238+
; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]]
239+
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 255, i32 255, i32 255, i32 255>
240+
; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]]
241+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
242+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
243+
; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4
244+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
245+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
246+
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
247+
; CHECK: middle.block:
248+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
249+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3
250+
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
251+
; CHECK: scalar.ph:
252+
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
253+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
254+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
223255
; CHECK-NEXT: br label [[LOOP:%.*]]
224256
; CHECK: loop:
225-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
226-
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
227-
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
228-
; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10
229-
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]]
257+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
258+
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
259+
; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
260+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10
261+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]]
230262
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255
231263
; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]]
232-
; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]]
233-
; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4
234-
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
264+
; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]]
265+
; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4
266+
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
235267
; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4
236268
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
237269
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000
238-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
270+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
239271
; CHECK: exit:
240272
; CHECK-NEXT: ret void
241273
;

0 commit comments

Comments
 (0)