Skip to content

Commit 930ae93

Browse files
[SimplifyCFG] Relax cttz cost check in simplifySwitchOfPowersOfTwo
We should be able to allow `simplifySwitchOfPowersOfTwo` transform to take place, as, on recent X86 targets, the weighted latency-size appears to be 2. This favours computing trailing zeroes and indexing into a smaller value table, over generating a jump table with an indirect branch (which overall should be more efficient).
1 parent cbcff3d commit 930ae93

File tree

2 files changed

+13
-29
lines changed

2 files changed

+13
-29
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7198,8 +7198,10 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
71987198
/// will be transformed to:
71997199
/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
72007200
///
7201-
/// This transformation allows better lowering and could allow transforming into
7202-
/// a lookup table.
7201+
/// This transformation allows better lowering and may transform the switch
7202+
/// instruction into a sequence of bit manipulation and a smaller
7203+
/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7204+
/// address of the jump target, and indirectly jump to it).
72037205
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
72047206
const DataLayout &DL,
72057207
const TargetTransformInfo &TTI) {
@@ -7211,17 +7213,15 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
72117213
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
72127214
return false;
72137215

7214-
const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7215-
IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7216-
{Condition, ConstantInt::getTrue(Context)}),
7217-
TTI::TCK_SizeAndLatency);
7218-
7219-
if (CttzIntrinsicCost > TTI::TCC_Basic)
7220-
// Inserting intrinsic is too expensive.
7216+
// Ensure trailing zeroes count intrinsic emission is not too expensive.
7217+
IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7218+
{Condition, ConstantInt::getTrue(Context)});
7219+
if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7220+
TTI::TCC_Basic * 2)
72217221
return false;
72227222

72237223
// Only bother with this optimization if there are more than 3 switch cases.
7224-
// SDAG will only bother creating jump tables for 4 or more cases.
7224+
// SDAG will start emitting jump tables for 4 or more cases.
72257225
if (SI->getNumCases() < 4)
72267226
return false;
72277227

llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,9 @@ target triple = "x86_64-unknown-linux-gnu"
66
define i32 @switch_of_powers_two(i32 %arg) {
77
; CHECK-LABEL: @switch_of_powers_two(
88
; CHECK-NEXT: entry:
9-
; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[DEFAULT_CASE:%.*]] [
10-
; CHECK-NEXT: i32 1, label [[RETURN:%.*]]
11-
; CHECK-NEXT: i32 8, label [[BB2:%.*]]
12-
; CHECK-NEXT: i32 16, label [[BB3:%.*]]
13-
; CHECK-NEXT: i32 32, label [[BB4:%.*]]
14-
; CHECK-NEXT: i32 64, label [[BB5:%.*]]
15-
; CHECK-NEXT: ]
16-
; CHECK: default_case:
17-
; CHECK-NEXT: unreachable
18-
; CHECK: bb2:
19-
; CHECK-NEXT: br label [[RETURN]]
20-
; CHECK: bb3:
21-
; CHECK-NEXT: br label [[RETURN]]
22-
; CHECK: bb4:
23-
; CHECK-NEXT: br label [[RETURN]]
24-
; CHECK: bb5:
25-
; CHECK-NEXT: br label [[RETURN]]
26-
; CHECK: return:
27-
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = phi i32 [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 3, [[ENTRY:%.*]] ]
9+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG:%.*]], i1 true)
10+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two, i32 0, i32 [[TMP0]]
11+
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
2812
; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
2913
;
3014
entry:

0 commit comments

Comments
 (0)