[llvm] r364469 - [X86] Rework the logic in LowerBuildVectorv16i8 to make better use of any_extend and break false dependencies. Other improvements
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 26 13:16:19 PDT 2019
Author: ctopper
Date: Wed Jun 26 13:16:19 2019
New Revision: 364469
URL: http://llvm.org/viewvc/llvm-project?rev=364469&view=rev
Log:
[X86] Rework the logic in LowerBuildVectorv16i8 to make better use of any_extend and break false dependencies. Other improvements
This patch rewrites the loop iteration to only visit every other element starting with element 0. And we work on the "even" element and "next" element at the same time. The "First" logic has been moved to the bottom of the loop and doesn't run on every element. I believe it could create dangling nodes previously since we didn't check if we were going to use SCALAR_TO_VECTOR for the first insertion. I got rid of the "First" variable and just do a null check on V which should be equivalent. We also no longer use undef as the starting V for vectors with no zeroes to avoid false dependencies. This matches v8i16.
I've changed all the extends and OR operations to use MVT::i32 since that's what they'll be promoted to anyway. I've tried to use zero_extend only when necessary and use any_extend otherwise. This resulted in some improvements in tests where we are now able to promote aligned (i32 (extload i8)) to a 32-bit load.
Differential Revision: https://reviews.llvm.org/D63702
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
llvm/trunk/test/CodeGen/X86/promote-vec3.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=364469&r1=364468&r2=364469&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jun 26 13:16:19 2019
@@ -7175,51 +7175,51 @@ static SDValue LowerBuildVectorv16i8(SDV
SDLoc dl(Op);
SDValue V;
- bool First = true;
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
- for (unsigned i = 0; i < 16; ++i) {
+ for (unsigned i = 0; i < 16; i += 2) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
- if (ThisIsNonZero && First) {
- if (NumZero)
- V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0;
+ if (!ThisIsNonZero && !NextIsNonZero)
+ continue;
+
+ // FIXME: Investigate combining the first 4 bytes as a i32 instead.
+ SDValue Elt;
+ if (ThisIsNonZero) {
+ if (NumZero || NextIsNonZero)
+ Elt = DAG.getZExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
else
- V = DAG.getUNDEF(MVT::v8i16);
- First = false;
+ Elt = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
}
- if ((i & 1) != 0) {
- // FIXME: Investigate extending to i32 instead of just i16.
- // FIXME: Investigate combining the first 4 bytes as a i32 instead.
- SDValue ThisElt, LastElt;
- bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
- if (LastIsNonZero) {
- LastElt =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
- }
- if (ThisIsNonZero) {
- ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
- ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
- DAG.getConstant(8, dl, MVT::i8));
- if (LastIsNonZero)
- ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
- } else
- ThisElt = LastElt;
-
- if (ThisElt) {
- if (1 == i) {
- V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
- : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
- V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
- if (NumZero)
- V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
- V = DAG.getBitcast(MVT::v8i16, V);
- } else {
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
- DAG.getIntPtrConstant(i / 2, dl));
- }
+ if (NextIsNonZero) {
+ SDValue NextElt;
+ if (i == 0 && NumZero)
+ NextElt = DAG.getZExtOrTrunc(Op.getOperand(i+1), dl, MVT::i32);
+ else
+ NextElt = DAG.getAnyExtOrTrunc(Op.getOperand(i+1), dl, MVT::i32);
+ NextElt = DAG.getNode(ISD::SHL, dl, MVT::i32, NextElt,
+ DAG.getConstant(8, dl, MVT::i8));
+ if (ThisIsNonZero)
+ Elt = DAG.getNode(ISD::OR, dl, MVT::i32, NextElt, Elt);
+ else
+ Elt = NextElt;
+ }
+
+ // If our first insertion is not the first index then insert into zero
+ // vector to break any register dependency else use SCALAR_TO_VECTOR.
+ if (!V) {
+ if (i != 0)
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ else {
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ continue;
}
}
+ Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Elt);
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Elt,
+ DAG.getIntPtrConstant(i / 2, dl));
}
return DAG.getBitcast(MVT::v16i8, V);
Modified: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll?rev=364469&r1=364468&r2=364469&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll Wed Jun 26 13:16:19 2019
@@ -413,16 +413,13 @@ define <16 x i8> @test_buildvector_v16i8
define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
; SSE2-LABEL: test_buildvector_v16i8_partial:
; SSE2: # %bb.0:
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: pinsrw $1, %eax, %xmm0
-; SSE2-NEXT: movzbl %sil, %eax
-; SSE2-NEXT: pinsrw $3, %eax, %xmm0
-; SSE2-NEXT: movzbl %dl, %eax
-; SSE2-NEXT: pinsrw $4, %eax, %xmm0
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pinsrw $1, %edi, %xmm0
+; SSE2-NEXT: pinsrw $3, %esi, %xmm0
+; SSE2-NEXT: pinsrw $4, %edx, %xmm0
; SSE2-NEXT: shll $8, %ecx
; SSE2-NEXT: pinsrw $5, %ecx, %xmm0
-; SSE2-NEXT: movzbl %r8b, %eax
-; SSE2-NEXT: pinsrw $6, %eax, %xmm0
+; SSE2-NEXT: pinsrw $6, %r8d, %xmm0
; SSE2-NEXT: shll $8, %r9d
; SSE2-NEXT: pinsrw $7, %r9d, %xmm0
; SSE2-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/promote-vec3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/promote-vec3.ll?rev=364469&r1=364468&r2=364469&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/promote-vec3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/promote-vec3.ll Wed Jun 26 13:16:19 2019
@@ -70,11 +70,10 @@ define <3 x i16> @zext_i8(<3 x i8>) {
define <3 x i16> @sext_i8(<3 x i8>) {
; SSE3-LABEL: sext_i8:
; SSE3: # %bb.0:
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE3-NEXT: pinsrw $1, %eax, %xmm0
+; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
; SSE3-NEXT: psllw $8, %xmm0
; SSE3-NEXT: psraw $8, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=364469&r1=364468&r2=364469&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Wed Jun 26 13:16:19 2019
@@ -2045,8 +2045,7 @@ define <16 x i8> @PR31364(i8* nocapture
; SSE2-NEXT: movzbl (%rsi), %ecx
; SSE2-NEXT: shll $8, %ecx
; SSE2-NEXT: orl %eax, %ecx
-; SSE2-NEXT: movzwl %cx, %eax
-; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[1,1,1,3,4,5,6,7]
Modified: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-3.ll?rev=364469&r1=364468&r2=364469&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll Wed Jun 26 13:16:19 2019
@@ -51,6 +51,7 @@ define void @convert_v3i8_to_v3f32(<3 x
; X86-SSE2-NEXT: movl (%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE2-NEXT: shll $8, %edx
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
; X86-SSE2-NEXT: pinsrw $1, %edx, %xmm0
; X86-SSE2-NEXT: shll $8, %esi
; X86-SSE2-NEXT: pinsrw $3, %esi, %xmm0
@@ -99,6 +100,7 @@ define void @convert_v3i8_to_v3f32(<3 x
; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE2-NEXT: shll $8, %eax
+; X64-SSE2-NEXT: pxor %xmm0, %xmm0
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
; X64-SSE2-NEXT: shll $8, %ecx
; X64-SSE2-NEXT: pinsrw $3, %ecx, %xmm0
More information about the llvm-commits
mailing list