[llvm] r245160 - [DAGCombiner] Attempt to mask vectors before zero extension instead of after.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 15 06:27:35 PDT 2015
Author: rksimon
Date: Sat Aug 15 08:27:30 2015
New Revision: 245160
URL: http://llvm.org/viewvc/llvm-project?rev=245160&view=rev
Log:
[DAGCombiner] Attempt to mask vectors before zero extension instead of after.
For cases where we TRUNCATE and then ZERO_EXTEND to a larger size (often from vector legalization), see if we can mask the source data and then ZERO_EXTEND (instead of after a ANY_EXTEND). This can help avoid having to generate a larger mask, and possibly applying it to several sub-vectors.
(zext (truncate x)) -> (zext (and(x, m))
Includes a minor patch to SystemZ to better recognise 8/16-bit zero extension patterns from RISBG bit-extraction code.
This is the first of a number of minor patches to help improve the conversion of byte masks to clear mask shuffles.
Differential Revision: http://reviews.llvm.org/D11764
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll
llvm/trunk/test/CodeGen/AArch64/arm64-arith.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vector-ext.ll
llvm/trunk/test/CodeGen/AArch64/bitfield.ll
llvm/trunk/test/CodeGen/SystemZ/insert-05.ll
llvm/trunk/test/CodeGen/X86/avx2-conversions.ll
llvm/trunk/test/CodeGen/X86/vec_cast2.ll
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Aug 15 08:27:30 2015
@@ -6112,31 +6112,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SD
}
// fold (zext (truncate x)) -> (and x, mask)
- if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
-
+ if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getNode()->getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorklist(oye);
}
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ EVT MinVT = N0.getValueType();
+
+ // Try to mask before the extension to avoid having to generate a larger mask,
+ // possibly over several sub-vectors.
+ if (SrcVT.bitsLT(VT)) {
+ if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ AddToWorklist(Op.getNode());
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ }
}
- SDValue Op = N0.getOperand(0);
- if (Op.getValueType().bitsLT(VT)) {
- Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- } else if (Op.getValueType().bitsGT(VT)) {
- Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
+ if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+ SDValue Op = N0.getOperand(0);
+ if (SrcVT.bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ } else if (SrcVT.bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ }
+ return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
}
- return DAG.getZeroExtendInReg(Op, SDLoc(N),
- N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp Sat Aug 15 08:27:30 2015
@@ -801,7 +801,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(Rx
RxSBG.Input = N.getOperand(0);
return true;
}
-
+
case ISD::ANY_EXTEND:
// Bits above the extended operand are don't-care.
RxSBG.Input = N.getOperand(0);
@@ -818,7 +818,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(Rx
return true;
}
// Fall through.
-
+
case ISD::SIGN_EXTEND: {
// Check that the extension bits are don't-care (i.e. are masked out
// by the final mask).
@@ -938,7 +938,23 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZer
}
return nullptr;
}
- }
+ }
+
+ // If the RISBG operands require no rotation and just masks the bottom
+ // 8/16 bits, attempt to convert this to a LLC zero extension.
+ if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) {
+ unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR);
+ if (VT == MVT::i32) {
+ if (Subtarget->hasHighWord())
+ OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux);
+ else
+ OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR);
+ }
+
+ SDValue In = convertTo(DL, VT, RISBG.Input);
+ N = CurDAG->getMachineNode(OpCode, DL, VT, In);
+ return convertTo(DL, VT, SDValue(N, 0)).getNode();
+ }
unsigned Opcode = SystemZ::RISBG;
// Prefer RISBGN if available, since it does not clobber CC.
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-aapcs.ll Sat Aug 15 08:27:30 2015
@@ -27,12 +27,13 @@ define [2 x i64] @test_i64x2_align(i32,
; Check stack slots are 64-bit at all times.
define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
i32 %int, i64 %long) {
- ; Part of last store. Blasted scheduler.
-; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
-
%ext_bool = zext i1 %bool to i64
store volatile i64 %ext_bool, i64* @var64, align 8
; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
+
+ ; Part of last store. Blasted scheduler.
+; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
+
; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
@@ -63,8 +64,8 @@ define void @test_stack_slots([8 x i32],
define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
%ext_bool = zext i1 %bool to i64
store volatile i64 %ext_bool, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w0, #0x1
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
%ext_char = sext i8 %char to i64
store volatile i64 %ext_char, i64* @var64
@@ -73,8 +74,8 @@ define void @test_extension(i1 %bool, i8
%ext_short = zext i16 %short to i64
store volatile i64 %ext_short, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w2, #0xffff
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
%ext_int = zext i32 %int to i64
store volatile i64 %ext_int, i64* @var64
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-arith.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-arith.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-arith.ll Sat Aug 15 08:27:30 2015
@@ -123,7 +123,8 @@ entry:
define i64 @t14(i16 %a, i64 %x) nounwind ssp {
entry:
; CHECK-LABEL: t14:
-; CHECK: add x0, x1, w0, uxth #3
+; CHECK: and w8, w0, #0xffff
+; CHECK: add x0, x1, w8, uxtw #3
; CHECK: ret
%c = zext i16 %a to i64
%d = shl i64 %c, 3
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vector-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vector-ext.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vector-ext.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vector-ext.ll Sat Aug 15 08:27:30 2015
@@ -1,27 +1,27 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-;CHECK: @func30
-;CHECK: ushll.4s v0, v0, #0
-;CHECK: movi.4s v1, #0x1
-;CHECK: and.16b v0, v0, v1
-;CHECK: str q0, [x0]
-;CHECK: ret
-
-%T0_30 = type <4 x i1>
-%T1_30 = type <4 x i32>
-define void @func30(%T0_30 %v0, %T1_30* %p1) {
- %r = zext %T0_30 %v0 to %T1_30
- store %T1_30 %r, %T1_30* %p1
- ret void
-}
-
-; Extend from v1i1 was crashing things (PR20791). Make sure we do something
-; sensible instead.
-define <1 x i32> @autogen_SD7918() {
-; CHECK-LABEL: autogen_SD7918
-; CHECK: movi d0, #0000000000000000
-; CHECK-NEXT: ret
- %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
- %ZE = zext <1 x i1> %I29 to <1 x i32>
- ret <1 x i32> %ZE
-}
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func30
+;CHECK: movi.4h v1, #0x1
+;CHECK: and.8b v0, v0, v1
+;CHECK: ushll.4s v0, v0, #0
+;CHECK: str q0, [x0]
+;CHECK: ret
+
+%T0_30 = type <4 x i1>
+%T1_30 = type <4 x i32>
+define void @func30(%T0_30 %v0, %T1_30* %p1) {
+ %r = zext %T0_30 %v0 to %T1_30
+ store %T1_30 %r, %T1_30* %p1
+ ret void
+}
+
+; Extend from v1i1 was crashing things (PR20791). Make sure we do something
+; sensible instead.
+define <1 x i32> @autogen_SD7918() {
+; CHECK-LABEL: autogen_SD7918
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+ %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
+ %ZE = zext <1 x i1> %I29 to <1 x i32>
+ ret <1 x i32> %ZE
+}
Modified: llvm/trunk/test/CodeGen/AArch64/bitfield.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/bitfield.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/bitfield.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/bitfield.ll Sat Aug 15 08:27:30 2015
@@ -3,51 +3,67 @@
@var32 = global i32 0
@var64 = global i64 0
-define void @test_extendb(i8 %var) {
-; CHECK-LABEL: test_extendb:
+define void @test_extendb32(i8 %var) {
+; CHECK-LABEL: test_extendb32:
%sxt32 = sext i8 %var to i32
store volatile i32 %sxt32, i32* @var32
; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
- %sxt64 = sext i8 %var to i64
- store volatile i64 %sxt64, i64* @var64
-; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
-
; N.b. this doesn't actually produce a bitfield instruction at the
; moment, but it's still a good test to have and the semantics are
; correct.
%uxt32 = zext i8 %var to i32
store volatile i32 %uxt32, i32* @var32
; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+ ret void
+}
+define void @test_extendb64(i8 %var) {
+; CHECK-LABEL: test_extendb64:
+
+ %sxt64 = sext i8 %var to i64
+ store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
%uxt64 = zext i8 %var to i64
store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
ret void
}
-define void @test_extendh(i16 %var) {
-; CHECK-LABEL: test_extendh:
+define void @test_extendh32(i16 %var) {
+; CHECK-LABEL: test_extendh32:
%sxt32 = sext i16 %var to i32
store volatile i32 %sxt32, i32* @var32
; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
- %sxt64 = sext i16 %var to i64
- store volatile i64 %sxt64, i64* @var64
-; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
-
; N.b. this doesn't actually produce a bitfield instruction at the
; moment, but it's still a good test to have and the semantics are
; correct.
%uxt32 = zext i16 %var to i32
store volatile i32 %uxt32, i32* @var32
; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+ ret void
+}
+define void @test_extendh64(i16 %var) {
+; CHECK-LABEL: test_extendh64:
+
+ %sxt64 = sext i16 %var to i64
+ store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
%uxt64 = zext i16 %var to i64
store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
ret void
}
Modified: llvm/trunk/test/CodeGen/SystemZ/insert-05.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/insert-05.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/insert-05.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/insert-05.ll Sat Aug 15 08:27:30 2015
@@ -214,8 +214,8 @@ define i64 @f18(i32 %a) {
; The truncation here isn't free; we need an explicit zero extension.
define i64 @f19(i32 %a) {
; CHECK-LABEL: f19:
-; CHECK: llgcr %r2, %r2
-; CHECK: oihl %r2, 1
+; CHECK: llcr %r2, %r2
+; CHECK: iihf %r2, 1
; CHECK: br %r14
%trunc = trunc i32 %a to i8
%ext = zext i8 %trunc to i64
Modified: llvm/trunk/test/CodeGen/X86/avx2-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-conversions.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-conversions.ll Sat Aug 15 08:27:30 2015
@@ -61,8 +61,8 @@ define <8 x i32> @zext8(<8 x i16> %A) no
define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
; CHECK-LABEL: zext_8i8_8i32:
; CHECK: ## BB#0:
+; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: retq
%B = zext <8 x i8> %A to <8 x i32>
ret <8 x i32>%B
Modified: llvm/trunk/test/CodeGen/X86/vec_cast2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cast2.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cast2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cast2.ll Sat Aug 15 08:27:30 2015
@@ -46,11 +46,10 @@ define <4 x float> @foo1_4(<4 x i8> %src
define <8 x float> @foo2_8(<8 x i8> %src) {
; CHECK-LABEL: foo2_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpand LCPI2_0, %xmm0, %xmm0
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=245160&r1=245159&r2=245160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Sat Aug 15 08:27:30 2015
@@ -910,50 +910,46 @@ entry:
define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
; SSE2-LABEL: zext_8i8_to_8i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_8i8_to_8i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSSE3-NEXT: pand %xmm1, %xmm2
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pand %xmm0, %xmm1
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_8i8_to_8i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE41-NEXT: pand %xmm1, %xmm2
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: zext_8i8_to_8i32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: zext_8i8_to_8i32:
; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
%t = zext <8 x i8> %z to <8 x i32>
More information about the llvm-commits
mailing list