Skip to content

Commit aeef43e

Browse files
committed
[X86] In LowerEXTEND_VECTOR_INREG, emit a vector shuffle instead of directly using X86ISD::UNPCKL
The majority of the changes are because the rest of shuffle lowering/combining prefers to replace the undef input with the other operand. Using UNPCKL directly seemed to avoid this and just grabbed a randomish register for the undef which can create false dependencies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346050 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ef5bf36 commit aeef43e

File tree

4 files changed

+134
-140
lines changed

4 files changed

+134
-140
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19820,7 +19820,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
1982019820
// As SRAI is only available on i16/i32 types, we expand only up to i32
1982119821
// and handle i64 separately.
1982219822
while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
19823-
Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
19823+
Curr = getUnpackl(DAG, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
1982419824
MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
1982519825
CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
1982619826
Curr = DAG.getBitcast(CurrVT, Curr);

test/CodeGen/X86/madd.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -954,38 +954,38 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
954954
; SSE2-NEXT: .p2align 4, 0x90
955955
; SSE2-NEXT: .LBB7_1: # %vector.body
956956
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
957-
; SSE2-NEXT: movq {{.*#+}} xmm5 = mem[0],zero
958-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
959-
; SSE2-NEXT: psraw $8, %xmm5
960-
; SSE2-NEXT: movq {{.*#+}} xmm6 = mem[0],zero
961-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
962-
; SSE2-NEXT: psraw $8, %xmm6
963957
; SSE2-NEXT: movq {{.*#+}} xmm7 = mem[0],zero
964958
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
965959
; SSE2-NEXT: psraw $8, %xmm7
966960
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
967961
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
968962
; SSE2-NEXT: psraw $8, %xmm0
969-
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
970-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
971-
; SSE2-NEXT: psraw $8, %xmm2
972-
; SSE2-NEXT: pmaddwd %xmm5, %xmm2
973-
; SSE2-NEXT: paddd %xmm2, %xmm9
974-
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
975-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
976-
; SSE2-NEXT: psraw $8, %xmm2
977-
; SSE2-NEXT: pmaddwd %xmm6, %xmm2
978-
; SSE2-NEXT: paddd %xmm2, %xmm4
963+
; SSE2-NEXT: movq {{.*#+}} xmm6 = mem[0],zero
964+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
965+
; SSE2-NEXT: psraw $8, %xmm6
966+
; SSE2-NEXT: movq {{.*#+}} xmm5 = mem[0],zero
967+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
968+
; SSE2-NEXT: psraw $8, %xmm5
979969
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
980970
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
981971
; SSE2-NEXT: psraw $8, %xmm2
982972
; SSE2-NEXT: pmaddwd %xmm7, %xmm2
983-
; SSE2-NEXT: paddd %xmm2, %xmm1
973+
; SSE2-NEXT: paddd %xmm2, %xmm9
984974
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
985975
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
986976
; SSE2-NEXT: psraw $8, %xmm2
987977
; SSE2-NEXT: pmaddwd %xmm0, %xmm2
988-
; SSE2-NEXT: paddd %xmm2, %xmm3
978+
; SSE2-NEXT: paddd %xmm2, %xmm4
979+
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
980+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
981+
; SSE2-NEXT: psraw $8, %xmm0
982+
; SSE2-NEXT: pmaddwd %xmm6, %xmm0
983+
; SSE2-NEXT: paddd %xmm0, %xmm1
984+
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
985+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
986+
; SSE2-NEXT: psraw $8, %xmm0
987+
; SSE2-NEXT: pmaddwd %xmm5, %xmm0
988+
; SSE2-NEXT: paddd %xmm0, %xmm3
989989
; SSE2-NEXT: addq $32, %rcx
990990
; SSE2-NEXT: cmpq %rcx, %rax
991991
; SSE2-NEXT: jne .LBB7_1

test/CodeGen/X86/vec_int_to_fp.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,13 +1726,11 @@ define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
17261726
define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
17271727
; SSE2-LABEL: sitofp_8i8_to_8f32:
17281728
; SSE2: # %bb.0:
1729-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1730-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1729+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1730+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
17311731
; SSE2-NEXT: psrad $24, %xmm1
17321732
; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
1733-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1734-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1735-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1733+
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
17361734
; SSE2-NEXT: psrad $24, %xmm0
17371735
; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
17381736
; SSE2-NEXT: movaps %xmm2, %xmm0
@@ -1776,13 +1774,11 @@ define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
17761774
define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
17771775
; SSE2-LABEL: sitofp_16i8_to_8f32:
17781776
; SSE2: # %bb.0:
1779-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1780-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1777+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1778+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
17811779
; SSE2-NEXT: psrad $24, %xmm1
17821780
; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
1783-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1784-
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1785-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1781+
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
17861782
; SSE2-NEXT: psrad $24, %xmm0
17871783
; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
17881784
; SSE2-NEXT: movaps %xmm2, %xmm0

0 commit comments

Comments
 (0)