[llvm] r361425 - [X86][InstCombine] Remove InstCombine code that turns X86 round intrinsics into llvm.ceil/floor. Remove some isel patterns that existed because that was happening.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 13:04:56 PDT 2019
Author: ctopper
Date: Wed May 22 13:04:55 2019
New Revision: 361425
URL: http://llvm.org/viewvc/llvm-project?rev=361425&view=rev
Log:
[X86][InstCombine] Remove InstCombine code that turns X86 round intrinsics into llvm.ceil/floor. Remove some isel patterns that existed because that was happening.
We were turning roundss/sd/ps/pd intrinsics with immediates of 1 or 2 into
llvm.floor/ceil. The llvm.ceil/floor intrinsics are supposed to correspond
to the libm functions. For the libm functions we need to disable the
precision exception so the llvm.floor/ceil functions should always map to
encodings 0x9 and 0xA.
We had a mix of isel patterns where some used 0x9 and 0xA and others used
0x1 and 0x2. We need to be consistent and always use 0x9 and 0xA.
Since we have no way in isel of knowing where the llvm.ceil/floor came
from, we can't map X86 specific intrinsics with encodings 1 or 2 to it.
We could map 0x9 and 0xA to llvm.ceil/floor instead, but I'd really like
to see a use case and optimization advantage first.
I've left the backend test cases to show the blend we now emit without
the extra isel patterns. But I've removed the InstCombine tests completely.
Removed:
llvm/trunk/test/Transforms/InstCombine/X86/x86-avx.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/CodeGen/X86/vec_floor.ll
llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll
llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=361425&r1=361424&r2=361425&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed May 22 13:04:55 2019
@@ -9392,32 +9392,6 @@ defm : avx512_masked_scalar<fsqrt, "SQRT
(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
-multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
- X86VectorVTInfo _, PatLeaf ZeroFP,
- bits<8> ImmV, Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
- (OpNode (extractelt _.VT:$src2, (iPTR 0))),
- (extractelt _.VT:$dst, (iPTR 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
- _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
-
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
- (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
- VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
- }
-}
-
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
- v4f32x_info, fp32imm0, 0x09, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
- v4f32x_info, fp32imm0, 0x0A, HasAVX512>;
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
- v2f64x_info, fp64imm0, 0x09, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
- v2f64x_info, fp64imm0, 0x0A, HasAVX512>;
-
//-------------------------------------------------
// Integer truncate and extend operations
@@ -12293,26 +12267,6 @@ multiclass AVX512_scalar_unary_math_patt
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
-multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
- SDNode Move, X86VectorVTInfo _,
- bits<8> ImmV> {
- let Predicates = [HasAVX512] in {
- def : Pat<(_.VT (Move _.VT:$dst,
- (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
- (i32 ImmV))>;
- }
-}
-
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
- v4f32x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
- v4f32x_info, 0x02>;
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
- v2f64x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
- v2f64x_info, 0x02>;
-
//===----------------------------------------------------------------------===//
// AES instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=361425&r1=361424&r2=361425&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed May 22 13:04:55 2019
@@ -3099,23 +3099,6 @@ multiclass scalar_unary_math_patterns<SD
}
}
-multiclass scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, SDNode Move,
- ValueType VT, bits<8> ImmV,
- Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
- }
-
- // Repeat for AVX versions of the instructions.
- let Predicates = [UseAVX] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
- }
-}
-
defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
@@ -5984,15 +5967,6 @@ let Predicates = [UseSSE41] in {
(ROUNDPDm addr:$src, (i32 0xB))>;
}
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSS", X86Movss,
- v4f32, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSS", X86Movss,
- v4f32, 0x02, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSD", X86Movsd,
- v2f64, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSD", X86Movsd,
- v2f64, 0x02, UseSSE41>;
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=361425&r1=361424&r2=361425&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Wed May 22 13:04:55 2019
@@ -607,105 +607,6 @@ static Value *simplifyX86pack(IntrinsicI
return Builder.CreateTrunc(Shuffle, ResTy);
}
-// Replace X86-specific intrinsics with generic floor-ceil where applicable.
-static Value *simplifyX86round(IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- ConstantInt *Arg = nullptr;
- Intrinsic::ID IntrinsicID = II.getIntrinsicID();
-
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
- if (!Arg)
- return nullptr;
- unsigned RoundControl = Arg->getZExtValue();
-
- Arg = nullptr;
- unsigned SAE = 0;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
- else
- SAE = 4;
- if (!SAE) {
- if (!Arg)
- return nullptr;
- SAE = Arg->getZExtValue();
- }
-
- if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
- return nullptr;
-
- Value *Src, *Dst, *Mask;
- bool IsScalar = false;
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- IsScalar = true;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Mask = II.getArgOperand(3);
- Value *Zero = Constant::getNullValue(Mask->getType());
- Mask = Builder.CreateAnd(Mask, 1);
- Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
- Dst = II.getArgOperand(2);
- } else
- Dst = II.getArgOperand(0);
- Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
- } else {
- Src = II.getArgOperand(0);
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
- Dst = II.getArgOperand(2);
- Mask = II.getArgOperand(3);
- } else {
- Dst = Src;
- Mask = ConstantInt::getAllOnesValue(
- Builder.getIntNTy(Src->getType()->getVectorNumElements()));
- }
- }
-
- Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
- Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
- if (!IsScalar) {
- if (auto *C = dyn_cast<Constant>(Mask))
- if (C->isAllOnesValue())
- return Res;
- auto *MaskTy = VectorType::get(
- Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
- Mask = Builder.CreateBitCast(Mask, MaskTy);
- unsigned Width = Src->getType()->getVectorNumElements();
- if (MaskTy->getVectorNumElements() > Width) {
- uint32_t Indices[4];
- for (unsigned i = 0; i != Width; ++i)
- Indices[i] = i;
- Mask = Builder.CreateShuffleVector(Mask, Mask,
- makeArrayRef(Indices, Width));
- }
- return Builder.CreateSelect(Mask, Res, Dst);
- }
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
- Res = Builder.CreateSelect(Mask, Res, Dst);
- Dst = II.getArgOperand(0);
- }
- return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
-}
-
static Value *simplifyX86movmsk(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
Value *Arg = II.getArgOperand(0);
@@ -2603,22 +2504,6 @@ Instruction *InstCombiner::visitCallInst
break;
}
- case Intrinsic::x86_sse41_round_ps:
- case Intrinsic::x86_sse41_round_pd:
- case Intrinsic::x86_avx_round_ps_256:
- case Intrinsic::x86_avx_round_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_128:
- case Intrinsic::x86_avx512_mask_rndscale_ps_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_512:
- case Intrinsic::x86_avx512_mask_rndscale_pd_128:
- case Intrinsic::x86_avx512_mask_rndscale_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_pd_512:
- case Intrinsic::x86_avx512_mask_rndscale_ss:
- case Intrinsic::x86_avx512_mask_rndscale_sd:
- if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:
@@ -2812,13 +2697,6 @@ Instruction *InstCombiner::visitCallInst
}
break;
- case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd: {
- if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
- }
-
// Constant fold ashr( <A x Bi>, Ci ).
// Constant fold lshr( <A x Bi>, Ci ).
// Constant fold shl( <A x Bi>, Ci ).
Modified: llvm/trunk/test/CodeGen/X86/vec_floor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_floor.ll?rev=361425&r1=361424&r2=361425&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_floor.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_floor.ll Wed May 22 13:04:55 2019
@@ -821,18 +821,20 @@ define <4 x float> @const_trunc_v4f32()
define <4 x float> @floor_ss(<4 x float> %x, <4 x float> %y) nounwind {
; SSE41-LABEL: floor_ss:
; SSE41: ## %bb.0:
-; SSE41-NEXT: roundss $1, %xmm0, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm0
+; SSE41-NEXT: roundss $9, %xmm0, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: floor_ss:
; AVX: ## %bb.0:
-; AVX-NEXT: vroundss $1, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
;
; AVX512-LABEL: floor_ss:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vroundss $1, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: retq
%s = extractelement <4 x float> %x, i32 0
%call = call float @llvm.floor.f32(float %s)
@@ -844,18 +846,20 @@ declare float @llvm.floor.f32(float %s)
define <2 x double> @floor_sd(<2 x double> %x, <2 x double> %y) nounwind {
; SSE41-LABEL: floor_sd:
; SSE41: ## %bb.0:
-; SSE41-NEXT: roundsd $1, %xmm0, %xmm1
-; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: roundsd $9, %xmm0, %xmm0
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE41-NEXT: retq
;
; AVX-LABEL: floor_sd:
; AVX: ## %bb.0:
-; AVX-NEXT: vroundsd $1, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX-NEXT: retq
;
; AVX512-LABEL: floor_sd:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vroundsd $1, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX512-NEXT: retq
%s = extractelement <2 x double> %x, i32 0
%call = call double @llvm.floor.f64(double %s)
@@ -1373,8 +1377,9 @@ define <4 x float> @floor_mask_ss(<4 x f
;
; AVX512-LABEL: floor_mask_ss:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = and i8 %k, 1
@@ -1414,8 +1419,9 @@ define <4 x float> @floor_maskz_ss(<4 x
;
; AVX512-LABEL: floor_maskz_ss:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = and i8 %k, 1
%nmask = icmp eq i8 %mask, 0
@@ -1451,8 +1457,9 @@ define <2 x double> @floor_mask_sd(<2 x
;
; AVX512-LABEL: floor_mask_sd:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = and i8 %k, 1
@@ -1492,8 +1499,9 @@ define <2 x double> @floor_maskz_sd(<2 x
;
; AVX512-LABEL: floor_maskz_sd:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = and i8 %k, 1
%nmask = icmp eq i8 %mask, 0
@@ -1529,8 +1537,9 @@ define <4 x float> @floor_mask_ss_trunc(
;
; AVX512-LABEL: floor_mask_ss_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
@@ -1572,8 +1581,9 @@ define <4 x float> @floor_maskz_ss_trunc
;
; AVX512-LABEL: floor_maskz_ss_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
%s = extractelement <4 x float> %x, i64 0
@@ -1608,8 +1618,9 @@ define <2 x double> @floor_mask_sd_trunc
;
; AVX512-LABEL: floor_mask_sd_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
@@ -1651,8 +1662,9 @@ define <2 x double> @floor_maskz_sd_trun
;
; AVX512-LABEL: floor_maskz_sd_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
%s = extractelement <2 x double> %x, i64 0
@@ -1683,8 +1695,9 @@ define <4 x float> @floor_mask_ss_mask8(
;
; AVX512-LABEL: floor_mask_ss_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm3
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm3, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%mask1 = fcmp oeq <4 x float> %x, %y
@@ -1716,8 +1729,9 @@ define <4 x float> @floor_maskz_ss_mask8
;
; AVX512-LABEL: floor_maskz_ss_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask1 = fcmp oeq <4 x float> %x, %y
%mask = extractelement <4 x i1> %mask1, i64 0
@@ -1749,8 +1763,9 @@ define <2 x double> @floor_mask_sd_mask8
;
; AVX512-LABEL: floor_mask_sd_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm3
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm3, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%mask1 = fcmp oeq <2 x double> %x, %y
@@ -1782,8 +1797,9 @@ define <2 x double> @floor_maskz_sd_mask
;
; AVX512-LABEL: floor_maskz_sd_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask1 = fcmp oeq <2 x double> %x, %y
%mask = extractelement <2 x i1> %mask1, i64 0
@@ -1797,18 +1813,20 @@ define <2 x double> @floor_maskz_sd_mask
define <4 x float> @ceil_ss(<4 x float> %x, <4 x float> %y) nounwind {
; SSE41-LABEL: ceil_ss:
; SSE41: ## %bb.0:
-; SSE41-NEXT: roundss $2, %xmm0, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm0
+; SSE41-NEXT: roundss $10, %xmm0, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE41-NEXT: retq
;
; AVX-LABEL: ceil_ss:
; AVX: ## %bb.0:
-; AVX-NEXT: vroundss $2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
;
; AVX512-LABEL: ceil_ss:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vroundss $2, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: retq
%s = extractelement <4 x float> %x, i32 0
%call = call float @llvm.ceil.f32(float %s)
@@ -1820,18 +1838,20 @@ declare float @llvm.ceil.f32(float %s)
define <2 x double> @ceil_sd(<2 x double> %x, <2 x double> %y) nounwind {
; SSE41-LABEL: ceil_sd:
; SSE41: ## %bb.0:
-; SSE41-NEXT: roundsd $2, %xmm0, %xmm1
-; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: roundsd $10, %xmm0, %xmm0
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE41-NEXT: retq
;
; AVX-LABEL: ceil_sd:
; AVX: ## %bb.0:
-; AVX-NEXT: vroundsd $2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX-NEXT: retq
;
; AVX512-LABEL: ceil_sd:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vroundsd $2, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX512-NEXT: retq
%s = extractelement <2 x double> %x, i32 0
%call = call double @llvm.ceil.f64(double %s)
@@ -2349,8 +2369,9 @@ define <4 x float> @ceil_mask_ss(<4 x fl
;
; AVX512-LABEL: ceil_mask_ss:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = and i8 %k, 1
@@ -2390,8 +2411,9 @@ define <4 x float> @ceil_maskz_ss(<4 x f
;
; AVX512-LABEL: ceil_maskz_ss:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = and i8 %k, 1
%nmask = icmp eq i8 %mask, 0
@@ -2427,8 +2449,9 @@ define <2 x double> @ceil_mask_sd(<2 x d
;
; AVX512-LABEL: ceil_mask_sd:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = and i8 %k, 1
@@ -2468,8 +2491,9 @@ define <2 x double> @ceil_maskz_sd(<2 x
;
; AVX512-LABEL: ceil_maskz_sd:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = and i8 %k, 1
%nmask = icmp eq i8 %mask, 0
@@ -2505,8 +2529,9 @@ define <4 x float> @ceil_mask_ss_trunc(<
;
; AVX512-LABEL: ceil_mask_ss_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
@@ -2548,8 +2573,9 @@ define <4 x float> @ceil_maskz_ss_trunc(
;
; AVX512-LABEL: ceil_maskz_ss_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
%s = extractelement <4 x float> %x, i64 0
@@ -2584,8 +2610,9 @@ define <2 x double> @ceil_mask_sd_trunc(
;
; AVX512-LABEL: ceil_mask_sd_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
@@ -2627,8 +2654,9 @@ define <2 x double> @ceil_maskz_sd_trunc
;
; AVX512-LABEL: ceil_maskz_sd_trunc:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: kmovw %edi, %k1
-; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask = trunc i16 %k to i1
%s = extractelement <2 x double> %x, i64 0
@@ -2659,8 +2687,9 @@ define <4 x float> @ceil_mask_ss_mask8(<
;
; AVX512-LABEL: ceil_mask_ss_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm3
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovss %xmm3, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%mask1 = fcmp oeq <4 x float> %x, %y
@@ -2692,8 +2721,9 @@ define <4 x float> @ceil_maskz_ss_mask8(
;
; AVX512-LABEL: ceil_maskz_ss_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask1 = fcmp oeq <4 x float> %x, %y
%mask = extractelement <4 x i1> %mask1, i64 0
@@ -2725,8 +2755,9 @@ define <2 x double> @ceil_mask_sd_mask8(
;
; AVX512-LABEL: ceil_mask_sd_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm3
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
+; AVX512-NEXT: vmovsd %xmm3, %xmm1, %xmm2 {%k1}
; AVX512-NEXT: vmovapd %xmm2, %xmm0
; AVX512-NEXT: retq
%mask1 = fcmp oeq <2 x double> %x, %y
@@ -2758,8 +2789,9 @@ define <2 x double> @ceil_maskz_sd_mask8
;
; AVX512-LABEL: ceil_maskz_sd_mask8:
; AVX512: ## %bb.0:
+; AVX512-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
-; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%mask1 = fcmp oeq <2 x double> %x, %y
%mask = extractelement <2 x i1> %mask1, i64 0
Removed: llvm/trunk/test/Transforms/InstCombine/X86/x86-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-avx.ll?rev=361424&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-avx.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-avx.ll (removed)
@@ -1,41 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32)
-declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32)
-
-define <8 x float> @test_round_ps_floor(<8 x float> %a) {
-; CHECK-LABEL: @test_round_ps_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[A:%.*]])
-; CHECK-NEXT: ret <8 x float> [[TMP1]]
-;
- %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a, i32 1)
- ret <8 x float> %1
-}
-
-define <8 x float> @test_round_ps_ceil(<8 x float> %a) {
-; CHECK-LABEL: @test_round_ps_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[A:%.*]])
-; CHECK-NEXT: ret <8 x float> [[TMP1]]
-;
- %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a, i32 2)
- ret <8 x float> %1
-}
-
-define <4 x double> @test_round_pd_floor(<4 x double> %a) {
-; CHECK-LABEL: @test_round_pd_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[A:%.*]])
-; CHECK-NEXT: ret <4 x double> [[TMP1]]
-;
- %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a, i32 1)
- ret <4 x double> %1
-}
-
-define <4 x double> @test_round_pd_ceil(<4 x double> %a) {
-; CHECK-LABEL: @test_round_pd_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[A:%.*]])
-; CHECK-NEXT: ret <4 x double> [[TMP1]]
-;
- %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a, i32 2)
- ret <4 x double> %1
-}
Modified: llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll?rev=361425&r1=361424&r2=361425&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll Wed May 22 13:04:55 2019
@@ -916,213 +916,6 @@ declare i64 @llvm.x86.avx512.vcvtsd2usi6
declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32)
declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32)
-declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
-declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)
-declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
-declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
-declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
-declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
-declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
-declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
-
-define <4 x float> @test_rndscale_ss_floor(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_ss_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SRC1:%.*]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.floor.f32(float [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[DST:%.*]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP2]], float [[TMP5]], float [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[SRC0:%.*]], float [[TMP6]], i64 0
-; CHECK-NEXT: ret <4 x float> [[TMP7]]
-;
- %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k, i32 1, i32 4)
- ret <4 x float> %1
-}
-
-define <4 x float> @test_rndscale_ss_ceil(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_ss_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SRC1:%.*]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.ceil.f32(float [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[DST:%.*]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP2]], float [[TMP5]], float [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[SRC0:%.*]], float [[TMP6]], i64 0
-; CHECK-NEXT: ret <4 x float> [[TMP7]]
-;
- %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k, i32 2, i32 4)
- ret <4 x float> %1
-}
-
-define <2 x double> @test_rndscale_sd_floor(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_sd_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[SRC1:%.*]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.floor.f64(double [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[DST:%.*]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP2]], double [[TMP5]], double [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[SRC0:%.*]], double [[TMP6]], i64 0
-; CHECK-NEXT: ret <2 x double> [[TMP7]]
-;
- %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k, i32 1, i32 4)
- ret <2 x double> %1
-}
-
-define <2 x double> @test_rndscale_sd_ceil(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_sd_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[SRC1:%.*]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.ceil.f64(double [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[DST:%.*]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP2]], double [[TMP5]], double [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[SRC0:%.*]], double [[TMP6]], i64 0
-; CHECK-NEXT: ret <2 x double> [[TMP7]]
-;
- %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k, i32 2, i32 4)
- ret <2 x double> %1
-}
-
-define <4 x float> @test_rndscale_ps_128_floor(<4 x float> %src, <4 x float> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_ps_128_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP1]], <4 x float> [[DST:%.*]]
-; CHECK-NEXT: ret <4 x float> [[TMP4]]
-;
- %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %src, i32 1, <4 x float> %dst, i8 %k)
- ret <4 x float> %1
-}
-
-define <4 x float> @test_rndscale_ps_128_ceil(<4 x float> %src, <4 x float> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_ps_128_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP1]], <4 x float> [[DST:%.*]]
-; CHECK-NEXT: ret <4 x float> [[TMP4]]
-;
- %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %src, i32 2, <4 x float> %dst, i8 %k)
- ret <4 x float> %1
-}
-
-define <8 x float> @test_rndscale_ps_256_floor(<8 x float> %src, <8 x float> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_ps_256_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[DST:%.*]]
-; CHECK-NEXT: ret <8 x float> [[TMP3]]
-;
- %1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %src, i32 1, <8 x float> %dst, i8 %k)
- ret <8 x float> %1
-}
-
-define <8 x float> @test_rndscale_ps_256_ceil(<8 x float> %src, <8 x float> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_ps_256_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[DST:%.*]]
-; CHECK-NEXT: ret <8 x float> [[TMP3]]
-;
- %1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %src, i32 2, <8 x float> %dst, i8 %k)
- ret <8 x float> %1
-}
-
-define <16 x float> @test_rndscale_ps_512_floor(<16 x float> %src, <16 x float> %dst, i16 %k) {
-; CHECK-LABEL: @test_rndscale_ps_512_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.floor.v16f32(<16 x float> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[K:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[DST:%.*]]
-; CHECK-NEXT: ret <16 x float> [[TMP3]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %src, i32 1, <16 x float> %dst, i16 %k, i32 4)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_rndscale_ps_512_ceil(<16 x float> %src, <16 x float> %dst, i16 %k) {
-; CHECK-LABEL: @test_rndscale_ps_512_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.ceil.v16f32(<16 x float> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[K:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[DST:%.*]]
-; CHECK-NEXT: ret <16 x float> [[TMP3]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %src, i32 2, <16 x float> %dst, i16 %k, i32 4)
- ret <16 x float> %1
-}
-
-define <2 x double> @test_rndscale_pd_128_floor(<2 x double> %src, <2 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_pd_128_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP1]], <2 x double> [[DST:%.*]]
-; CHECK-NEXT: ret <2 x double> [[TMP4]]
-;
- %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %src, i32 1, <2 x double> %dst, i8 %k)
- ret <2 x double> %1
-}
-
-define <2 x double> @test_rndscale_pd_128_ceil(<2 x double> %src, <2 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_pd_128_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP1]], <2 x double> [[DST:%.*]]
-; CHECK-NEXT: ret <2 x double> [[TMP4]]
-;
- %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %src, i32 2, <2 x double> %dst, i8 %k)
- ret <2 x double> %1
-}
-
-define <4 x double> @test_rndscale_pd_256_floor(<4 x double> %src, <4 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_pd_256_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> [[DST:%.*]]
-; CHECK-NEXT: ret <4 x double> [[TMP4]]
-;
- %1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %src, i32 1, <4 x double> %dst, i8 %k)
- ret <4 x double> %1
-}
-
-define <4 x double> @test_rndscale_pd_256_ceil(<4 x double> %src, <4 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_pd_256_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> [[DST:%.*]]
-; CHECK-NEXT: ret <4 x double> [[TMP4]]
-;
- %1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %src, i32 2, <4 x double> %dst, i8 %k)
- ret <4 x double> %1
-}
-
-define <8 x double> @test_rndscale_pd_512_floor(<8 x double> %src, <8 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_pd_512_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.floor.v8f64(<8 x double> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[DST:%.*]]
-; CHECK-NEXT: ret <8 x double> [[TMP3]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %src, i32 1, <8 x double> %dst, i8 %k, i32 4)
- ret <8 x double> %1
-}
-
-define <8 x double> @test_rndscale_pd_512_ceil(<8 x double> %src, <8 x double> %dst, i8 %k) {
-; CHECK-LABEL: @test_rndscale_pd_512_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.ceil.v8f64(<8 x double> [[SRC:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[DST:%.*]]
-; CHECK-NEXT: ret <8 x double> [[TMP3]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %src, i32 2, <8 x double> %dst, i8 %k, i32 4)
- ret <8 x double> %1
-}
-
declare float @llvm.fma.f32(float, float, float) #1
define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
Modified: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll?rev=361425&r1=361424&r2=361425&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll Wed May 22 13:04:55 2019
@@ -13,28 +13,6 @@ define <2 x double> @test_round_sd(<2 x
ret <2 x double> %3
}
-define <2 x double> @test_round_sd_floor(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_round_sd_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0
-; CHECK-NEXT: ret <2 x double> [[TMP3]]
-;
- %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 1)
- ret <2 x double> %1
-}
-
-define <2 x double> @test_round_sd_ceil(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_round_sd_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0
-; CHECK-NEXT: ret <2 x double> [[TMP3]]
-;
- %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 2)
- ret <2 x double> %1
-}
-
define double @test_round_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_round_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
@@ -79,28 +57,6 @@ define <4 x float> @test_round_ss(<4 x f
ret <4 x float> %7
}
-define <4 x float> @test_round_ss_floor(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_round_ss_floor(
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0
-; CHECK-NEXT: ret <4 x float> [[TMP3]]
-;
- %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 1)
- ret <4 x float> %1
-}
-
-define <4 x float> @test_round_ss_ceil(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_round_ss_ceil(
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0
-; CHECK-NEXT: ret <4 x float> [[TMP3]]
-;
- %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 2)
- ret <4 x float> %1
-}
-
define float @test_round_ss_0(float %a, float %b) {
; CHECK-LABEL: @test_round_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
More information about the llvm-commits
mailing list