[llvm] fd2de54 - [X86] Canonicalize vXi64 SIGN_EXTEND_INREG vXi1 to use v2Xi32 splatted shifts instead
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 17 02:18:21 PDT 2023
Author: Simon Pilgrim
Date: 2023-07-17T10:18:03+01:00
New Revision: fd2de54920d653eded0260ca7d056ec5ec86fb4d
URL: https://github.com/llvm/llvm-project/commit/fd2de54920d653eded0260ca7d056ec5ec86fb4d
DIFF: https://github.com/llvm/llvm-project/commit/fd2de54920d653eded0260ca7d056ec5ec86fb4d.diff
LOG: [X86] Canonicalize vXi64 SIGN_EXTEND_INREG vXi1 to use v2Xi32 splatted shifts instead
If somehow a vXi64 bool sign_extend_inreg pattern has been lowered to vector shifts (without PSRAQ support), then try to canonicalize to vXi32 shifts to improve likelihood of value tracking being able to fold them away.
Using a PSLLQ and bitcasted PSRAD node make it very difficult for later fold to recover from this.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
llvm/test/CodeGen/X86/promote-cmp.ll
llvm/test/CodeGen/X86/vector-bo-select.ll
llvm/test/CodeGen/X86/vector-sext.ll
llvm/test/CodeGen/X86/vselect.ll
llvm/test/CodeGen/X86/vsplit-and.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 15fedf6b63bc7c..240dd2a1c0e076 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49888,6 +49888,29 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
return Res;
}
+ // Attempt to detect an expanded vXi64 SIGN_EXTEND_INREG vXi1 pattern, and
+ // convert to a splatted v2Xi32 SIGN_EXTEND_INREG pattern:
+ // psrad(pshufd(psllq(X,63),1,1,3,3),31) ->
+ // pshufd(psrad(pslld(X,31),31),0,0,2,2).
+ if (Opcode == X86ISD::VSRAI && NumBitsPerElt == 32 && ShiftVal == 31 &&
+ N0.getOpcode() == X86ISD::PSHUFD &&
+ N0.getConstantOperandVal(1) == getV4X86ShuffleImm({1, 1, 3, 3}) &&
+ N0->hasOneUse()) {
+ SDValue BC = peekThroughOneUseBitcasts(N0.getOperand(0));
+ if (BC.getOpcode() == X86ISD::VSHLI &&
+ BC.getScalarValueSizeInBits() == 64 &&
+ BC.getConstantOperandVal(1) == 63) {
+ SDLoc DL(N);
+ SDValue Src = BC.getOperand(0);
+ Src = DAG.getBitcast(VT, Src);
+ Src = DAG.getNode(X86ISD::PSHUFD, DL, VT, Src,
+ getV4X86ShuffleImm8ForMask({0, 0, 2, 2}, DL, DAG));
+ Src = DAG.getNode(X86ISD::VSHLI, DL, VT, Src, N1);
+ Src = DAG.getNode(X86ISD::VSRAI, DL, VT, Src, N1);
+ return Src;
+ }
+ }
+
auto TryConstantFold = [&](SDValue V) {
APInt UndefElts;
SmallVector<APInt, 32> EltBits;
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
index 2930efe7334b0d..c49bb9c0f8f86a 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -587,9 +587,8 @@ define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE41-NEXT: orps %xmm2, %xmm0
; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE41-NEXT: psllq $63, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
@@ -612,12 +611,11 @@ define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE2-NEXT: andps %xmm3, %xmm0
; SSE2-NEXT: orps %xmm2, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: psllq $63, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: retq
%cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
@@ -729,9 +727,8 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
; SSE41-NEXT: orps %xmm2, %xmm0
; SSE41-NEXT: xorps %xmm3, %xmm0
; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE41-NEXT: psllq $63, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
@@ -756,12 +753,11 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
; SSE2-NEXT: andps %xmm4, %xmm0
; SSE2-NEXT: orps %xmm2, %xmm0
; SSE2-NEXT: xorps %xmm3, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: psllq $63, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll
index 529396ca461707..614d86bd4c7942 100644
--- a/llvm/test/CodeGen/X86/promote-cmp.ll
+++ b/llvm/test/CodeGen/X86/promote-cmp.ll
@@ -35,9 +35,8 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,1,3,3]
-; SSE2-NEXT: psllq $63, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3]
+; SSE2-NEXT: pslld $31, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-bo-select.ll b/llvm/test/CodeGen/X86/vector-bo-select.ll
index 3e44e2cdb2b185..890514fbdc022b 100644
--- a/llvm/test/CodeGen/X86/vector-bo-select.ll
+++ b/llvm/test/CodeGen/X86/vector-bo-select.ll
@@ -5510,8 +5510,8 @@ define <8 x i64> @select_sdiv_neutral_constant_v8i64(<8 x i1> %b, <8 x i64> %x,
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,2,2,2]
; SSE2-NEXT: pshufhw {{.*#+}} xmm8 = xmm8[0,1,2,3,5,5,5,5]
-; SSE2-NEXT: psllq $63, %xmm8
-; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,0,2,2]
+; SSE2-NEXT: pslld $31, %xmm8
; SSE2-NEXT: psrad $31, %xmm8
; SSE2-NEXT: movdqa %xmm8, %xmm10
; SSE2-NEXT: pandn %xmm7, %xmm10
@@ -5520,8 +5520,8 @@ define <8 x i64> @select_sdiv_neutral_constant_v8i64(<8 x i1> %b, <8 x i64> %x,
; SSE2-NEXT: por %xmm10, %xmm8
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE2-NEXT: pshufhw {{.*#+}} xmm7 = xmm7[0,1,2,3,5,5,5,5]
-; SSE2-NEXT: psllq $63, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,0,2,2]
+; SSE2-NEXT: pslld $31, %xmm7
; SSE2-NEXT: psrad $31, %xmm7
; SSE2-NEXT: movdqa %xmm7, %xmm10
; SSE2-NEXT: pandn %xmm6, %xmm10
@@ -5529,8 +5529,8 @@ define <8 x i64> @select_sdiv_neutral_constant_v8i64(<8 x i1> %b, <8 x i64> %x,
; SSE2-NEXT: por %xmm10, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,0,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5]
-; SSE2-NEXT: psllq $63, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,2,2]
+; SSE2-NEXT: pslld $31, %xmm6
; SSE2-NEXT: psrad $31, %xmm6
; SSE2-NEXT: movdqa %xmm6, %xmm10
; SSE2-NEXT: pandn %xmm5, %xmm10
@@ -5538,8 +5538,8 @@ define <8 x i64> @select_sdiv_neutral_constant_v8i64(<8 x i1> %b, <8 x i64> %x,
; SSE2-NEXT: por %xmm10, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
-; SSE2-NEXT: psllq $63, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
+; SSE2-NEXT: pslld $31, %xmm5
; SSE2-NEXT: psrad $31, %xmm5
; SSE2-NEXT: pand %xmm5, %xmm9
; SSE2-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 23d6f256e1ab6b..65b324c793428f 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1775,13 +1775,11 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
; SSE2-NEXT: shrb $3, %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: pinsrw $6, %eax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; SSE2-NEXT: psllq $63, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
+; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
-; SSE2-NEXT: psllq $63, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: retq
;
@@ -1805,13 +1803,11 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
; SSSE3-NEXT: shrb $3, %al
; SSSE3-NEXT: movzbl %al, %eax
; SSSE3-NEXT: pinsrw $6, %eax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; SSSE3-NEXT: psllq $63, %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
+; SSSE3-NEXT: pslld $31, %xmm0
; SSSE3-NEXT: psrad $31, %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
-; SSSE3-NEXT: psllq $63, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSSE3-NEXT: pslld $31, %xmm1
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: retq
;
@@ -1831,17 +1827,15 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
; SSE41-NEXT: shrb $2, %cl
; SSE41-NEXT: andb $1, %cl
; SSE41-NEXT: movzbl %cl, %ecx
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SSE41-NEXT: pinsrb $8, %ecx, %xmm1
; SSE41-NEXT: shrb $3, %al
; SSE41-NEXT: movzbl %al, %eax
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
; SSE41-NEXT: pinsrb $12, %eax, %xmm1
-; SSE41-NEXT: psllq $63, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; SSE41-NEXT: psllq $63, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: retq
;
@@ -1939,13 +1933,11 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
; X86-SSE2-NEXT: shrb $3, %al
; X86-SSE2-NEXT: movzbl %al, %eax
; X86-SSE2-NEXT: pinsrw $6, %eax, %xmm1
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; X86-SSE2-NEXT: psllq $63, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
+; X86-SSE2-NEXT: pslld $31, %xmm0
; X86-SSE2-NEXT: psrad $31, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
-; X86-SSE2-NEXT: psllq $63, %xmm1
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; X86-SSE2-NEXT: pslld $31, %xmm1
; X86-SSE2-NEXT: psrad $31, %xmm1
; X86-SSE2-NEXT: retl
;
@@ -1966,17 +1958,15 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
; X86-SSE41-NEXT: shrb $2, %cl
; X86-SSE41-NEXT: andb $1, %cl
; X86-SSE41-NEXT: movzbl %cl, %ecx
-; X86-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; X86-SSE41-NEXT: pinsrb $8, %ecx, %xmm1
; X86-SSE41-NEXT: shrb $3, %al
; X86-SSE41-NEXT: movzbl %al, %eax
+; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
; X86-SSE41-NEXT: pinsrb $12, %eax, %xmm1
-; X86-SSE41-NEXT: psllq $63, %xmm0
-; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X86-SSE41-NEXT: pslld $31, %xmm0
; X86-SSE41-NEXT: psrad $31, %xmm0
; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; X86-SSE41-NEXT: psllq $63, %xmm1
-; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE41-NEXT: pslld $31, %xmm1
; X86-SSE41-NEXT: psrad $31, %xmm1
; X86-SSE41-NEXT: retl
entry:
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index 3fd74a253b0f25..0c57f497aa8aad 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -483,8 +483,8 @@ define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) {
define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
; SSE2-LABEL: shrunkblend_2uses:
; SSE2: # %bb.0:
-; SSE2-NEXT: psllq $63, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: pandn %xmm2, %xmm5
@@ -522,8 +522,8 @@ define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b,
define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
; SSE2-LABEL: shrunkblend_nonvselectuse:
; SSE2: # %bb.0:
-; SSE2-NEXT: psllq $63, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2]
+; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: psrad $31, %xmm3
; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm0
diff --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll
index 04869aa0b9a920..85def820ecb92a 100644
--- a/llvm/test/CodeGen/X86/vsplit-and.ll
+++ b/llvm/test/CodeGen/X86/vsplit-and.ll
@@ -23,31 +23,30 @@ define void @t0(ptr %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
define void @t2(ptr %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
; CHECK-LABEL: t2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %r9, %xmm1
-; CHECK-NEXT: movq %r8, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: movq %r9, %xmm0
+; CHECK-NEXT: movq %r8, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movq %rdx, %xmm0
; CHECK-NEXT: movq %rsi, %xmm2
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; CHECK-NEXT: movq %rcx, %xmm0
; CHECK-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
; CHECK-NEXT: pxor %xmm4, %xmm4
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
; CHECK-NEXT: pcmpeqq %xmm4, %xmm2
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pcmpeqq %xmm4, %xmm3
-; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
-; CHECK-NEXT: orps %xmm2, %xmm0
-; CHECK-NEXT: xorps %xmm1, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; CHECK-NEXT: psllq $63, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; CHECK-NEXT: psrad $31, %xmm1
-; CHECK-NEXT: pmovsxdq %xmm0, %xmm0
-; CHECK-NEXT: movdqa %xmm0, (%rdi)
-; CHECK-NEXT: movq %xmm1, 16(%rdi)
+; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
+; CHECK-NEXT: orps %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm0, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,2,2]
+; CHECK-NEXT: pslld $31, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: pmovsxdq %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm1, (%rdi)
+; CHECK-NEXT: movq %xmm0, 16(%rdi)
; CHECK-NEXT: retq
%cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
%cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
More information about the llvm-commits
mailing list