[llvm] r244723 - [InstCombine] Move SSE/AVX vector blend folding to instcombiner
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 12 01:08:57 PDT 2015
Author: rksimon
Date: Wed Aug 12 03:08:56 2015
New Revision: 244723
URL: http://llvm.org/viewvc/llvm-project?rev=244723&view=rev
Log:
[InstCombine] Move SSE/AVX vector blend folding to instcombiner
As discussed in D11886, this patch moves the SSE/AVX vector blend folding to instcombiner from PerformINTRINSIC_WO_CHAINCombine (which allows us to remove this completely).
InstCombiner already had partial support for this, I just had to add support for zero (ConstantAggregateZero) masks and also the case where both selection inputs were the same (allowing us to ignore the mask).
I also moved all the relevant combine tests into InstCombine/blend_x86.ll
Differential Revision: http://reviews.llvm.org/D11934
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/CodeGen/X86/combine-avx-intrinsics.ll
llvm/trunk/test/CodeGen/X86/combine-avx2-intrinsics.ll
llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll
llvm/trunk/test/Transforms/InstCombine/blend_x86.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=244723&r1=244722&r2=244723&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Aug 12 03:08:56 2015
@@ -1716,7 +1716,6 @@ X86TargetLowering::X86TargetLowering(con
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
@@ -12707,7 +12706,7 @@ static SDValue LowerFABSorFNEG(SDValue O
MVT LogicVT;
MVT EltVT;
unsigned NumElts;
-
+
if (VT.isVector()) {
LogicVT = VT;
EltVT = VT.getVectorElementType();
@@ -15424,7 +15423,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
SDValue Mask = Op.getOperand(3);
SDValue RoundingMode;
// We allways add rounding mode to the Node.
- // If the rounding mode is not specified, we add the
+ // If the rounding mode is not specified, we add the
// "current direction" mode.
if (Op.getNumOperands() == 4)
RoundingMode =
@@ -23432,57 +23431,6 @@ static SDValue PerformCMOVCombine(SDNode
return SDValue();
}
-static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default: return SDValue();
- // SSE/AVX/AVX2 blend intrinsics.
- case Intrinsic::x86_avx2_pblendvb:
- // Don't try to simplify this intrinsic if we don't have AVX2.
- if (!Subtarget->hasAVX2())
- return SDValue();
- // FALL-THROUGH
- case Intrinsic::x86_avx_blendv_pd_256:
- case Intrinsic::x86_avx_blendv_ps_256:
- // Don't try to simplify this intrinsic if we don't have AVX.
- if (!Subtarget->hasAVX())
- return SDValue();
- // FALL-THROUGH
- case Intrinsic::x86_sse41_blendvps:
- case Intrinsic::x86_sse41_blendvpd:
- case Intrinsic::x86_sse41_pblendvb: {
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(2);
- SDValue Mask = N->getOperand(3);
-
- // Don't try to simplify this intrinsic if we don't have SSE4.1.
- if (!Subtarget->hasSSE41())
- return SDValue();
-
- // fold (blend A, A, Mask) -> A
- if (Op0 == Op1)
- return Op0;
- // fold (blend A, B, allZeros) -> A
- if (ISD::isBuildVectorAllZeros(Mask.getNode()))
- return Op0;
- // fold (blend A, B, allOnes) -> B
- if (ISD::isBuildVectorAllOnes(Mask.getNode()))
- return Op1;
-
- // Simplify the case where the mask is a constant i32 value.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
- if (C->isNullValue())
- return Op0;
- if (C->isAllOnesValue())
- return Op1;
- }
-
- return SDValue();
- }
- }
-}
-
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
@@ -25747,8 +25695,6 @@ SDValue X86TargetLowering::PerformDAGCom
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
case X86ISD::INSERTPS: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None)
return PerformINSERTPSCombine(N, DAG, Subtarget);
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=244723&r1=244722&r2=244723&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Wed Aug 12 03:08:56 2015
@@ -960,7 +960,20 @@ Instruction *InstCombiner::visitCallInst
// This optimization is convoluted because the intrinsic is defined as
// getting a vector of floats or doubles for the ps and pd versions.
// FIXME: That should be changed.
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
Value *Mask = II->getArgOperand(2);
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Zero Mask - select 1st argument.
+ if (auto C = dyn_cast<ConstantAggregateZero>(Mask))
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
auto Tyi1 = Builder->getInt1Ty();
auto SelectorType = cast<VectorType>(Mask->getType());
@@ -983,11 +996,9 @@ Instruction *InstCombiner::visitCallInst
Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
}
auto NewSelector = ConstantVector::get(Selectors);
- return SelectInst::Create(NewSelector, II->getArgOperand(1),
- II->getArgOperand(0), "blendv");
- } else {
- break;
+ return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
}
+ break;
}
case Intrinsic::x86_avx_vpermilvar_ps:
Modified: llvm/trunk/test/CodeGen/X86/combine-avx-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-avx-intrinsics.ll?rev=244723&r1=244722&r2=244723&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-avx-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-avx-intrinsics.ll Wed Aug 12 03:08:56 2015
@@ -19,24 +19,6 @@ define <8 x float> @test_x86_avx_blend_p
; CHECK: ret
-define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
- %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a0, <4 x double> %a1)
- ret <4 x double> %1
-}
-; CHECK-LABEL: test_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
- %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a0, <8 x float> %a1)
- ret <8 x float> %1
-}
-; CHECK-LABEL: test_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
define <4 x double> @test2_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
%1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
ret <4 x double> %1
@@ -55,24 +37,6 @@ define <8 x float> @test2_x86_avx_blend_
; CHECK: ret
-define <4 x double> @test2_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
- %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> zeroinitializer)
- ret <4 x double> %1
-}
-; CHECK-LABEL: test2_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test2_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
- %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> zeroinitializer)
- ret <8 x float> %1
-}
-; CHECK-LABEL: test2_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
define <4 x double> @test3_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
%1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 -1)
ret <4 x double> %1
@@ -91,29 +55,6 @@ define <8 x float> @test3_x86_avx_blend_
; CHECK: ret
-define <4 x double> @test3_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
- %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <4 x double>
- %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %Mask)
- ret <4 x double> %1
-}
-; CHECK-LABEL: test3_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test3_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
- %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x float>
- %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %Mask)
- ret <8 x float> %1
-}
-; CHECK-LABEL: test3_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
-
declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32)
declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32)
-declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
-declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
Modified: llvm/trunk/test/CodeGen/X86/combine-avx2-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-avx2-intrinsics.ll?rev=244723&r1=244722&r2=244723&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-avx2-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-avx2-intrinsics.ll Wed Aug 12 03:08:56 2015
@@ -3,15 +3,6 @@
; Verify that the backend correctly combines AVX2 builtin intrinsics.
-define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
- %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1)
- ret <32 x i8> %res
-}
-; CHECK-LABEL: test_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) {
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7)
ret <16 x i16> %res
@@ -39,15 +30,6 @@ define <8 x i32> @test_x86_avx2_pblendd_
; CHECK: ret
-define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
- %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer)
- ret <32 x i8> %res
-}
-; CHECK-LABEL: test2_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0)
ret <16 x i16> %res
@@ -75,16 +57,6 @@ define <8 x i32> @test2_x86_avx2_pblendd
; CHECK: ret
-define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
- %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8>
- %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1)
- ret <32 x i8> %res
-}
-; CHECK-LABEL: test3_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1)
ret <16 x i16> %res
@@ -112,7 +84,6 @@ define <8 x i32> @test3_x86_avx2_pblendd
; CHECK: ret
-declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32)
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32)
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32)
Modified: llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll?rev=244723&r1=244722&r2=244723&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll Wed Aug 12 03:08:56 2015
@@ -19,33 +19,6 @@ define <4 x float> @test_x86_sse41_blend
; CHECK: ret
-define <2 x double> @test_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
- %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer)
- ret <2 x double> %1
-}
-; CHECK-LABEL: test_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: ret
-
-
-define <4 x float> @test_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
- %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer)
- ret <4 x float> %1
-}
-; CHECK-LABEL: test_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: ret
-
-
-define <16 x i8> @test_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
- %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> zeroinitializer)
- ret <16 x i8> %1
-}
-; CHECK-LABEL: test_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: ret
-
-
define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
%1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 0)
ret <8 x i16> %1
@@ -75,39 +48,6 @@ define <4 x float> @test2_x86_sse41_blen
; CHECK-NEXT: ret
-define <2 x double> @test2_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
- %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <2 x double>
- %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %Mask )
- ret <2 x double> %1
-}
-; CHECK-LABEL: test2_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
-define <4 x float> @test2_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
- %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x float>
- %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %Mask)
- ret <4 x float> %1
-}
-; CHECK-LABEL: test2_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
-define <16 x i8> @test2_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
- %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <16 x i8>
- %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %Mask)
- ret <16 x i8> %1
-}
-; CHECK-LABEL: test2_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
%1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1)
ret <8 x i16> %1
@@ -136,33 +76,6 @@ define <4 x float> @test3_x86_sse41_blen
; CHECK: ret
-define <2 x double> @test3_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
- %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a1 )
- ret <2 x double> %1
-}
-; CHECK-LABEL: test3_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: ret
-
-
-define <4 x float> @test3_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
- %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a0, <4 x float> %a1)
- ret <4 x float> %1
-}
-; CHECK-LABEL: test3_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: ret
-
-
-define <16 x i8> @test3_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
- %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> %a1)
- ret <16 x i8> %1
-}
-; CHECK-LABEL: test3_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: ret
-
-
define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) {
%1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a0, i32 7)
ret <8 x i16> %1
@@ -174,9 +87,5 @@ define <8 x i16> @test3_x86_sse41_pblend
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32)
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32)
-declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
-declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32)
-declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>)
Modified: llvm/trunk/test/Transforms/InstCombine/blend_x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/blend_x86.ll?rev=244723&r1=244722&r2=244723&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/blend_x86.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/blend_x86.ll Wed Aug 12 03:08:56 2015
@@ -2,42 +2,118 @@
define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd
-; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: ret <2 x double> %1
%1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <2 x double> %1
}
+define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_zero
+; CHECK-NEXT: ret <2 x double> %xy
+ %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
+ ret <2 x double> %1
+}
+
+define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_dup
+; CHECK-NEXT: ret <2 x double> %xy
+ %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
+ ret <2 x double> %1
+}
+
define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps
-; CHECK: select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: ret <4 x float> %1
%1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <4 x float> %1
}
+define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_zero
+; CHECK-NEXT: ret <4 x float> %xyzw
+ %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
+ ret <4 x float> %1
+}
+
+define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_dup
+; CHECK-NEXT: ret <4 x float> %xyzw
+ %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
+ ret <4 x float> %1
+}
+
define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb
-; CHECK: select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: ret <16 x i8> %1
%1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
ret <16 x i8> %1
}
+define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_zero
+; CHECK-NEXT: ret <16 x i8> %xyzw
+ %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_dup
+; CHECK-NEXT: ret <16 x i8> %xyzw
+ %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
+ ret <16 x i8> %1
+}
+
define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd_avx
-; CHECK: select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: ret <4 x double> %1
%1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <4 x double> %1
}
+define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_avx_zero
+; CHECK-NEXT: ret <4 x double> %xy
+ %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
+ ret <4 x double> %1
+}
+
+define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_avx_dup
+; CHECK-NEXT: ret <4 x double> %xy
+ %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
+ ret <4 x double> %1
+}
+
define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps_avx
-; CHECK: select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: ret <8 x float> %1
%1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <8 x float> %1
}
+define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_avx_zero
+; CHECK-NEXT: ret <8 x float> %xyzw
+ %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
+ ret <8 x float> %1
+}
+
+define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_avx_dup
+; CHECK-NEXT: ret <8 x float> %xyzw
+ %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
+ ret <8 x float> %1
+}
+
define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb_avx2
-; CHECK: select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: %1 = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: ret <32 x i8> %1
%1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
<32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
@@ -46,6 +122,20 @@ define <32 x i8> @constant_pblendvb_avx2
ret <32 x i8> %1
}
+define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_avx2_zero
+; CHECK-NEXT: ret <32 x i8> %xyzw
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_avx2_dup
+; CHECK-NEXT: ret <32 x i8> %xyzw
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
+ ret <32 x i8> %1
+}
+
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
More information about the llvm-commits
mailing list