Skip to content

Commit 50c3b29

Browse files
committed
[x86] make 8-bit shl undesirable
I was looking at a potential DAGCombiner fix for 1 of the regressions in D60278, and it caused severe regression test pain because x86 TLI lies about the desirability of 8-bit shift ops. We've hinted at making all 8-bit ops undesirable for the reason in the code comment: // TODO: Almost no 8-bit ops are desirable because they have no actual // size/speed advantages vs. 32-bit ops, but they do have a major // potential disadvantage by causing partial register stalls. ...but that leads to massive diffs and exposes all kinds of optimization holes itself. Differential Revision: https://reviews.llvm.org/D60286 llvm-svn: 357912
1 parent 7671a1d commit 50c3b29

File tree

6 files changed

+38
-35
lines changed

6 files changed

+38
-35
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42798,11 +42798,15 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
4279842798
if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
4279942799
return false;
4280042800

42801-
// 8-bit multiply is probably not much cheaper than 32-bit multiply, and
42802-
// we have specializations to turn 32-bit multiply into LEA or other ops.
42801+
// TODO: Almost no 8-bit ops are desirable because they have no actual
42802+
// size/speed advantages vs. 32-bit ops, but they do have a major
42803+
// potential disadvantage by causing partial register stalls.
42804+
//
42805+
// 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and
42806+
// we have specializations to turn 32-bit multiply/shl into LEA or other ops.
4280342807
// Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
4280442808
// check for a constant operand to the multiply.
42805-
if (Opc == ISD::MUL && VT == MVT::i8)
42809+
if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8)
4280642810
return false;
4280742811

4280842812
// i16 instruction encodings are longer and some i16 instructions are slow,

llvm/test/CodeGen/X86/bt.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,19 +1150,18 @@ define void @demanded_i32(i32* nocapture readonly, i32* nocapture, i32) nounwind
11501150
define zeroext i1 @demanded_with_known_zeroes(i32 %bit, i32 %bits) {
11511151
; X86-LABEL: demanded_with_known_zeroes:
11521152
; X86: # %bb.0: # %entry
1153-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1154-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
1155-
; X86-NEXT: shlb $2, %cl
1156-
; X86-NEXT: movzbl %cl, %ecx
1157-
; X86-NEXT: btl %ecx, %eax
1153+
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1154+
; X86-NEXT: shlb $2, %al
1155+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1156+
; X86-NEXT: movzbl %al, %eax
1157+
; X86-NEXT: btl %eax, %ecx
11581158
; X86-NEXT: setb %al
11591159
; X86-NEXT: retl
11601160
;
11611161
; X64-LABEL: demanded_with_known_zeroes:
11621162
; X64: # %bb.0: # %entry
1163-
; X64-NEXT: shlb $2, %dil
1164-
; X64-NEXT: movzbl %dil, %eax
1165-
; X64-NEXT: btl %eax, %esi
1163+
; X64-NEXT: shll $2, %edi
1164+
; X64-NEXT: btl %edi, %esi
11661165
; X64-NEXT: setb %al
11671166
; X64-NEXT: retq
11681167
entry:

llvm/test/CodeGen/X86/btc_bts_btr.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -954,15 +954,15 @@ define i32 @btr_32_mask_zeros(i32 %x, i32 %n) {
954954
; X64-LABEL: btr_32_mask_zeros:
955955
; X64: # %bb.0:
956956
; X64-NEXT: movl %edi, %eax
957-
; X64-NEXT: shlb $2, %sil
957+
; X64-NEXT: shll $2, %esi
958958
; X64-NEXT: btrl %esi, %eax
959959
; X64-NEXT: retq
960960
;
961961
; X86-LABEL: btr_32_mask_zeros:
962962
; X86: # %bb.0:
963-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
964963
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
965964
; X86-NEXT: shlb $2, %cl
965+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
966966
; X86-NEXT: btrl %ecx, %eax
967967
; X86-NEXT: retl
968968
%1 = shl i32 %n, 2
@@ -977,15 +977,15 @@ define i32 @bts_32_mask_zeros(i32 %x, i32 %n) {
977977
; X64-LABEL: bts_32_mask_zeros:
978978
; X64: # %bb.0:
979979
; X64-NEXT: movl %edi, %eax
980-
; X64-NEXT: shlb $2, %sil
980+
; X64-NEXT: shll $2, %esi
981981
; X64-NEXT: btsl %esi, %eax
982982
; X64-NEXT: retq
983983
;
984984
; X86-LABEL: bts_32_mask_zeros:
985985
; X86: # %bb.0:
986-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
987986
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
988987
; X86-NEXT: shlb $2, %cl
988+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
989989
; X86-NEXT: btsl %ecx, %eax
990990
; X86-NEXT: retl
991991
%1 = shl i32 %n, 2
@@ -999,15 +999,15 @@ define i32 @btc_32_mask_zeros(i32 %x, i32 %n) {
999999
; X64-LABEL: btc_32_mask_zeros:
10001000
; X64: # %bb.0:
10011001
; X64-NEXT: movl %edi, %eax
1002-
; X64-NEXT: shlb $2, %sil
1002+
; X64-NEXT: shll $2, %esi
10031003
; X64-NEXT: btcl %esi, %eax
10041004
; X64-NEXT: retq
10051005
;
10061006
; X86-LABEL: btc_32_mask_zeros:
10071007
; X86: # %bb.0:
1008-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10091008
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
10101009
; X86-NEXT: shlb $2, %cl
1010+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10111011
; X86-NEXT: btcl %ecx, %eax
10121012
; X86-NEXT: retl
10131013
%1 = shl i32 %n, 2
@@ -1021,14 +1021,14 @@ define i64 @btr_64_mask_zeros(i64 %x, i64 %n) {
10211021
; X64-LABEL: btr_64_mask_zeros:
10221022
; X64: # %bb.0:
10231023
; X64-NEXT: movq %rdi, %rax
1024-
; X64-NEXT: shlb $2, %sil
1024+
; X64-NEXT: shlq $2, %rsi
10251025
; X64-NEXT: btrq %rsi, %rax
10261026
; X64-NEXT: retq
10271027
;
10281028
; X86-LABEL: btr_64_mask_zeros:
10291029
; X86: # %bb.0:
1030-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
1031-
; X86-NEXT: shlb $2, %cl
1030+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1031+
; X86-NEXT: shll $2, %ecx
10321032
; X86-NEXT: movl $1, %eax
10331033
; X86-NEXT: xorl %edx, %edx
10341034
; X86-NEXT: shldl %cl, %eax, %edx
@@ -1056,14 +1056,14 @@ define i64 @bts_64_mask_zeros(i64 %x, i64 %n) {
10561056
; X64-LABEL: bts_64_mask_zeros:
10571057
; X64: # %bb.0:
10581058
; X64-NEXT: movq %rdi, %rax
1059-
; X64-NEXT: shlb $2, %sil
1059+
; X64-NEXT: shlq $2, %rsi
10601060
; X64-NEXT: btsq %rsi, %rax
10611061
; X64-NEXT: retq
10621062
;
10631063
; X86-LABEL: bts_64_mask_zeros:
10641064
; X86: # %bb.0:
1065-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
1066-
; X86-NEXT: shlb $2, %cl
1065+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1066+
; X86-NEXT: shll $2, %ecx
10671067
; X86-NEXT: movl $1, %eax
10681068
; X86-NEXT: xorl %edx, %edx
10691069
; X86-NEXT: shldl %cl, %eax, %edx
@@ -1088,14 +1088,14 @@ define i64 @btc_64_mask_zeros(i64 %x, i64 %n) {
10881088
; X64-LABEL: btc_64_mask_zeros:
10891089
; X64: # %bb.0:
10901090
; X64-NEXT: movq %rdi, %rax
1091-
; X64-NEXT: shlb $2, %sil
1091+
; X64-NEXT: shlq $2, %rsi
10921092
; X64-NEXT: btcq %rsi, %rax
10931093
; X64-NEXT: retq
10941094
;
10951095
; X86-LABEL: btc_64_mask_zeros:
10961096
; X86: # %bb.0:
1097-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
1098-
; X86-NEXT: shlb $2, %cl
1097+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1098+
; X86-NEXT: shll $2, %ecx
10991099
; X86-NEXT: movl $1, %eax
11001100
; X86-NEXT: xorl %edx, %edx
11011101
; X86-NEXT: shldl %cl, %eax, %edx

llvm/test/CodeGen/X86/rotate4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,9 +633,9 @@ define i32 @rotate_demanded_bits_2(i32, i32) {
633633
define i32 @rotate_demanded_bits_3(i32, i32) {
634634
; X86-LABEL: rotate_demanded_bits_3:
635635
; X86: # %bb.0:
636-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
637636
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
638637
; X86-NEXT: addb %cl, %cl
638+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
639639
; X86-NEXT: roll %cl, %eax
640640
; X86-NEXT: retl
641641
;

llvm/test/CodeGen/X86/scheduler-backtracking.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ define i256 @test1(i256 %a) nounwind {
1616
; ILP-NEXT: pushq %rbx
1717
; ILP-NEXT: movq %rdi, %rax
1818
; ILP-NEXT: xorl %r8d, %r8d
19-
; ILP-NEXT: addb %sil, %sil
19+
; ILP-NEXT: addq %rsi, %rsi
2020
; ILP-NEXT: addb $2, %sil
2121
; ILP-NEXT: orb $1, %sil
2222
; ILP-NEXT: movl $1, %r10d
@@ -61,7 +61,7 @@ define i256 @test1(i256 %a) nounwind {
6161
; HYBRID-LABEL: test1:
6262
; HYBRID: # %bb.0:
6363
; HYBRID-NEXT: movq %rdi, %rax
64-
; HYBRID-NEXT: addb %sil, %sil
64+
; HYBRID-NEXT: addq %rsi, %rsi
6565
; HYBRID-NEXT: addb $2, %sil
6666
; HYBRID-NEXT: orb $1, %sil
6767
; HYBRID-NEXT: movb $-128, %cl
@@ -104,7 +104,7 @@ define i256 @test1(i256 %a) nounwind {
104104
; BURR-LABEL: test1:
105105
; BURR: # %bb.0:
106106
; BURR-NEXT: movq %rdi, %rax
107-
; BURR-NEXT: addb %sil, %sil
107+
; BURR-NEXT: addq %rsi, %rsi
108108
; BURR-NEXT: addb $2, %sil
109109
; BURR-NEXT: orb $1, %sil
110110
; BURR-NEXT: movb $-128, %cl
@@ -148,7 +148,7 @@ define i256 @test1(i256 %a) nounwind {
148148
; SRC: # %bb.0:
149149
; SRC-NEXT: pushq %rbx
150150
; SRC-NEXT: movq %rdi, %rax
151-
; SRC-NEXT: addb %sil, %sil
151+
; SRC-NEXT: addq %rsi, %rsi
152152
; SRC-NEXT: addb $2, %sil
153153
; SRC-NEXT: orb $1, %sil
154154
; SRC-NEXT: movb $-128, %cl
@@ -195,7 +195,7 @@ define i256 @test1(i256 %a) nounwind {
195195
; LIN-NEXT: movq %rdi, %rax
196196
; LIN-NEXT: xorl %r9d, %r9d
197197
; LIN-NEXT: movl $1, %r8d
198-
; LIN-NEXT: addb %sil, %sil
198+
; LIN-NEXT: addq %rsi, %rsi
199199
; LIN-NEXT: addb $2, %sil
200200
; LIN-NEXT: orb $1, %sil
201201
; LIN-NEXT: movl $1, %edx

llvm/test/CodeGen/X86/select_const.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -353,9 +353,9 @@ define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
353353
define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
354354
; CHECK-LABEL: select_pow2_diff_neg:
355355
; CHECK: # %bb.0:
356-
; CHECK-NEXT: shlb $4, %dil
357-
; CHECK-NEXT: movzbl %dil, %eax
358-
; CHECK-NEXT: orl $-25, %eax
356+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
357+
; CHECK-NEXT: shll $4, %edi
358+
; CHECK-NEXT: leal -25(%rdi), %eax
359359
; CHECK-NEXT: retq
360360
%sel = select i1 %cond, i32 -9, i32 -25
361361
ret i32 %sel

0 commit comments

Comments
 (0)