[llvm] r334849 - [X86] Lowering sqrt intrinsics to native IR
Tomasz Krupa via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 15 11:05:24 PDT 2018
Author: tkrupa
Date: Fri Jun 15 11:05:24 2018
New Revision: 334849
URL: http://llvm.org/viewvc/llvm-project?rev=334849&view=rev
Log:
[X86] Lowering sqrt intrinsics to native IR
Summary: Complementary patch to lowering sqrt intrinsics in Clang.
Reviewers: craig.topper, spatel, RKSimon, DavidKreitzer, uriel.k
Reviewed By: craig.topper
Subscribers: tkrupa, mike.dvoretsky, llvm-commits
Differential Revision: https://reviews.llvm.org/D41599
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/fold-load-unops.ll
llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse_partial_update.ll
llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll
llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Fri Jun 15 11:05:24 2018
@@ -180,12 +180,6 @@ let TargetPrefix = "x86" in {
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
- [IntrNoMem]>;
- def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
- [IntrNoMem]>;
def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
@@ -304,12 +298,6 @@ let TargetPrefix = "x86" in { // All in
// FP arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
- [IntrNoMem]>;
- def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
- [IntrNoMem]>;
def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
@@ -961,11 +949,6 @@ let TargetPrefix = "x86" in { // All in
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-
def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
@@ -3868,29 +3851,17 @@ let TargetPrefix = "x86" in { // All in
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round_mask">,
+ def int_x86_avx512_mask_sqrt_ss :
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round_mask">,
+ def int_x86_avx512_mask_sqrt_sd :
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256_mask">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
+ def int_x86_avx512_mask_sqrt_pd_512 :
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_ps_128 : GCCBuiltin<"__builtin_ia32_sqrtps128_mask">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256_mask">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
+ def int_x86_avx512_mask_sqrt_ps_512 :
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_fixupimm_pd_128 :
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Fri Jun 15 11:05:24 2018
@@ -97,6 +97,15 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name.startswith("avx2.pabs.") || // Added in 6.0
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
Name.startswith("avx512.broadcastm") || // Added in 6.0
+ Name == "sse.sqrt.ss" || // Added in 7.0
+ Name == "sse2.sqrt.sd" || // Added in 7.0
+ Name == "avx512.mask.sqrt.ps.128" || // Added in 7.0
+ Name == "avx512.mask.sqrt.ps.256" || // Added in 7.0
+ Name == "avx512.mask.sqrt.pd.128" || // Added in 7.0
+ Name == "avx512.mask.sqrt.pd.256" || // Added in 7.0
+ Name.startswith("avx.sqrt.p") || // Added in 7.0
+ Name.startswith("sse2.sqrt.p") || // Added in 7.0
+ Name.startswith("sse.sqrt.p") || // Added in 7.0
Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
Name.startswith("sse2.pcmpeq.") || // Added in 3.1
Name.startswith("sse2.pcmpgt.") || // Added in 3.1
@@ -1475,6 +1484,29 @@ void llvm::UpgradeIntrinsicCall(CallInst
ExtTy->getPrimitiveSizeInBits();
Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
Rep = Builder.CreateVectorSplat(NumElts, Rep);
+ } else if (IsX86 && (Name == "sse.sqrt.ss" ||
+ Name == "sse2.sqrt.sd")) {
+ Value *Vec = CI->getArgOperand(0);
+ Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
+ Function *Intr = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::sqrt, Elt0->getType());
+ Elt0 = Builder.CreateCall(Intr, Elt0);
+ Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
+ } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
+ Name.startswith("sse2.sqrt.p") ||
+ Name.startswith("sse.sqrt.p"))) {
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::sqrt,
+ CI->getType()),
+ {CI->getArgOperand(0)});
+ } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p") &&
+ !Name.endswith("512"))) {
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::sqrt,
+ CI->getType()),
+ {CI->getArgOperand(0)});
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.ptestm") ||
Name.startswith("avx512.ptestnm"))) {
Value *Op0 = CI->getArgOperand(0);
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Jun 15 11:05:24 2018
@@ -8534,7 +8534,7 @@ multiclass avx512_sqrt_packed_all_round<
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Name, Intrinsic Intr> {
+ X86VectorVTInfo _, string Name> {
let ExeDomain = _.ExeDomain in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -8575,30 +8575,20 @@ multiclass avx512_sqrt_scalar<bits<8> op
def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
(!cast<Instruction>(Name#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
-
- def : Pat<(Intr VR128X:$src),
- (!cast<Instruction>(Name#Zr_Int) VR128X:$src,
- VR128X:$src)>;
}
let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(_.EltVT (fsqrt (load addr:$src))),
(!cast<Instruction>(Name#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>;
-
- def : Pat<(Intr _.ScalarIntMemCPat:$src2),
- (!cast<Instruction>(Name#Zm_Int)
- (_.VT (IMPLICIT_DEF)), addr:$src2)>;
}
}
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
- defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS",
- int_x86_sse_sqrt_ss>,
+ defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
- defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD",
- int_x86_sse2_sqrt_sd>,
+ defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
}
@@ -8711,6 +8701,13 @@ multiclass avx512_masked_scalar<SDNode O
}
}
+defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
+ (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
+ fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
+defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
+ (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
+ fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
+
multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
bits<8> ImmV, dag OutMask,
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Jun 15 11:05:24 2018
@@ -2761,12 +2761,9 @@ defm : scalar_math_patterns<fdiv, "DIVSD
/// For the non-AVX defs, we need $src1 to be tied to $dst because
/// the HW instructions are 2 operand / destructive.
multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- ValueType vt, ValueType ScalarVT,
- X86MemOperand x86memop,
- Operand intmemop, ComplexPattern int_cpat,
- Intrinsic Intr, SDNode OpNode, Domain d,
- X86FoldableSchedWrite sched,
- Predicate target> {
+ ValueType ScalarVT, X86MemOperand x86memop,
+ Operand intmemop, SDNode OpNode, Domain d,
+ X86FoldableSchedWrite sched, Predicate target> {
let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
!strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
@@ -2790,6 +2787,11 @@ multiclass sse_fp_unop_s<bits<8> opc, st
}
}
+}
+
+multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
+ ComplexPattern int_cpat, Intrinsic Intr,
+ Predicate target, string Suffix> {
let Predicates = [target] in {
// These are unary operations, but they are modeled as having 2 source operands
// because the high elements of the destination are unchanged in SSE.
@@ -2810,11 +2812,23 @@ multiclass sse_fp_unop_s<bits<8> opc, st
}
}
+multiclass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, ComplexPattern int_cpat,
+ Intrinsic Intr, Predicate target> {
+ let Predicates = [target] in {
+ def : Pat<(Intr VR128:$src),
+ (!cast<Instruction>(NAME#r_Int) VR128:$src,
+ VR128:$src)>;
+ }
+ let Predicates = [target, OptForSize] in {
+ def : Pat<(Intr int_cpat:$src2),
+ (!cast<Instruction>(NAME#m_Int)
+ (vt (IMPLICIT_DEF)), addr:$src2)>;
+ }
+}
+
multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- ValueType vt, ValueType ScalarVT,
- X86MemOperand x86memop,
- Operand intmemop, ComplexPattern int_cpat,
- Intrinsic Intr, SDNode OpNode, Domain d,
+ ValueType ScalarVT, X86MemOperand x86memop,
+ Operand intmemop, SDNode OpNode, Domain d,
X86FoldableSchedWrite sched, Predicate target> {
let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
@@ -2849,14 +2863,8 @@ multiclass avx_fp_unop_s<bits<8> opc, st
let Predicates = [target] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>(NAME#r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
- def : Pat<(Intr VR128:$src),
- (!cast<Instruction>(NAME#r_Int) VR128:$src,
- VR128:$src)>;
}
let Predicates = [target, OptForSize] in {
- def : Pat<(Intr int_cpat:$src2),
- (!cast<Instruction>(NAME#m_Int)
- (vt (IMPLICIT_DEF)), addr:$src2)>;
def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>(NAME#m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;
@@ -2935,29 +2943,32 @@ let Predicates = [HasAVX, NoVLX] in {
Sched<[sched.XMM.Folded]>;
}
+multiclass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86SchedWriteWidths sched, Predicate AVXTarget> {
+ defm SS : sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
+ !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss),
+ UseSSE1, "SS">, XS;
+ defm V#NAME#SS : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
+ !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss),
+ AVXTarget>,
+ XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
+}
+
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
- ssmem, sse_load_f32,
- !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- SSEPackedSingle, sched.Scl, UseSSE1>, XS;
- defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
- f32mem, ssmem, sse_load_f32,
- !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- SSEPackedSingle, sched.Scl, AVXTarget>, XS, VEX_4V,
- VEX_LIG, VEX_WIG;
+ defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, f32, f32mem,
+ ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
+ defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, f32,
+ f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
+ XS, VEX_4V, VEX_LIG, VEX_WIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
- sdmem, sse_load_f64,
- !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
- OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
- defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
- f64mem, sdmem, sse_load_f64,
- !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
- OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
+ defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, f64, f64mem,
+ sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
+ defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, f64,
+ f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
XD, VEX_4V, VEX_LIG, VEX_WIG;
}
@@ -2970,8 +2981,10 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt"
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
+ sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
+ sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
// There is no f64 version of the reciprocal approximation instructions.
@@ -3009,6 +3022,9 @@ multiclass scalar_unary_math_imm_pattern
}
}
+defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
+
multiclass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix,
SDNode Move, ValueType VT,
Predicate BasePredicate> {
@@ -3028,10 +3044,6 @@ defm : scalar_unary_math_intr_patterns<i
v4f32, UseSSE1>;
defm : scalar_unary_math_intr_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss,
v4f32, UseSSE1>;
-defm : scalar_unary_math_intr_patterns<int_x86_sse_sqrt_ss, "SQRTSS", X86Movss,
- v4f32, UseSSE1>;
-defm : scalar_unary_math_intr_patterns<int_x86_sse2_sqrt_sd, "SQRTSD", X86Movsd,
- v2f64, UseSSE2>;
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Fri Jun 15 11:05:24 2018
@@ -318,8 +318,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx_round_pd_256, ROUNDP, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx_round_ps_256, ROUNDP, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx_rsqrt_ps_256, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(avx_sqrt_pd_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
- X86_INTRINSIC_DATA(avx_sqrt_ps_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_pd, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
@@ -894,12 +892,8 @@ static const IntrinsicData IntrinsicsWi
X86ISD::SCALEFS, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEFS, 0),
- X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
- X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK, ISD::FSQRT,
X86ISD::FSQRT_RND),
- X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
- X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK, ISD::FSQRT,
X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
@@ -1289,7 +1283,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
- X86_INTRINSIC_DATA(sse_sqrt_ps, INTR_TYPE_1OP, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(sse_ucomieq_ss, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_ucomige_ss, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_ucomigt_ss, COMI, X86ISD::UCOMI, ISD::SETGT),
@@ -1345,7 +1338,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(sse2_sqrt_pd, INTR_TYPE_1OP, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Fri Jun 15 11:05:24 2018
@@ -1293,8 +1293,6 @@ Value *InstCombiner::SimplifyDemandedVec
// Unary scalar-as-vector operations that work column-wise.
case Intrinsic::x86_sse_rcp_ss:
case Intrinsic::x86_sse_rsqrt_ss:
- case Intrinsic::x86_sse_sqrt_ss:
- case Intrinsic::x86_sse2_sqrt_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Fri Jun 15 11:05:24 2018
@@ -3018,10 +3018,12 @@ define <4 x double> @test_mm256_sqrt_pd(
; X64: # %bb.0:
; X64-NEXT: vsqrtpd %ymm0, %ymm0
; X64-NEXT: retq
- %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
- ret <4 x double> %res
+entry:
+ %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a0) #2
+ ret <4 x double> %0
}
-declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
+
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #1
define <8 x float> @test_mm256_sqrt_ps(<8 x float> %a0) nounwind {
; X32-LABEL: test_mm256_sqrt_ps:
@@ -3033,10 +3035,12 @@ define <8 x float> @test_mm256_sqrt_ps(<
; X64: # %bb.0:
; X64-NEXT: vsqrtps %ymm0, %ymm0
; X64-NEXT: retq
- %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
- ret <8 x float> %res
+entry:
+ %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %a0) #2
+ ret <8 x float> %0
}
-declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
+
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #1
define void @test_mm256_store_pd(double* %a0, <4 x double> %a1) nounwind {
; X32-LABEL: test_mm256_store_pd:
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll Fri Jun 15 11:05:24 2018
@@ -6,6 +6,36 @@
; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
+define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
+; AVX-LABEL: test_x86_avx_sqrt_pd_256:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
+; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
+; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
+
+define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
+; AVX-LABEL: test_x86_avx_sqrt_ps_256:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
+; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
+; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
+
define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1:
; AVX: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Fri Jun 15 11:05:24 2018
@@ -622,39 +622,6 @@ define <8 x float> @test_x86_avx_rsqrt_p
}
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
-
-define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
-; AVX-LABEL: test_x86_avx_sqrt_pd_256:
-; AVX: # %bb.0:
-; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
-; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
-;
-; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
-; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
-
-
-define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
-; AVX-LABEL: test_x86_avx_sqrt_ps_256:
-; AVX: # %bb.0:
-; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
-; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
-;
-; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
-; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
- ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-
-
define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
; AVX-LABEL: test_x86_avx_vpermilvar_pd:
; AVX: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll Fri Jun 15 11:05:24 2018
@@ -6375,6 +6375,182 @@ declare <4 x double> @llvm.fma.v4f64(<4
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #8
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #8
+define <2 x double> @test_mm_mask_sqrt_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) {
+; X32-LABEL: test_mm_mask_sqrt_pd:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtpd %xmm1, %xmm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_sqrt_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtpd %xmm1, %xmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__W
+ ret <2 x double> %2
+}
+
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+
+define <2 x double> @test_mm_maskz_sqrt_pd(i8 zeroext %__U, <2 x double> %__A) {
+; X32-LABEL: test_mm_maskz_sqrt_pd:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_maskz_sqrt_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer
+ ret <2 x double> %2
+}
+
+define <4 x double> @test_mm256_mask_sqrt_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) {
+; X32-LABEL: test_mm256_mask_sqrt_pd:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtpd %ymm1, %ymm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_sqrt_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtpd %ymm1, %ymm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W
+ ret <4 x double> %2
+}
+
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
+
+define <4 x double> @test_mm256_maskz_sqrt_pd(i8 zeroext %__U, <4 x double> %__A) {
+; X32-LABEL: test_mm256_maskz_sqrt_pd:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_maskz_sqrt_pd:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer
+ ret <4 x double> %2
+}
+
+define <4 x float> @test_mm_mask_sqrt_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
+; X32-LABEL: test_mm_mask_sqrt_ps:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtps %xmm1, %xmm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_sqrt_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtps %xmm1, %xmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__W
+ ret <4 x float> %2
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define <4 x float> @test_mm_maskz_sqrt_ps(i8 zeroext %__U, <4 x float> %__A) {
+; X32-LABEL: test_mm_maskz_sqrt_ps:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_maskz_sqrt_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer
+ ret <4 x float> %2
+}
+
+define <8 x float> @test_mm256_mask_sqrt_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A) {
+; X32-LABEL: test_mm256_mask_sqrt_ps:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtps %ymm1, %ymm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_sqrt_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtps %ymm1, %ymm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__W
+ ret <8 x float> %2
+}
+
+define <8 x float> @test_mm256_maskz_sqrt_ps(i8 zeroext %__U, <8 x float> %__A) {
+; X32-LABEL: test_mm256_maskz_sqrt_ps:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_maskz_sqrt_ps:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #2
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer
+ ret <8 x float> %2
+}
+
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
+declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>)
declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>)
declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Fri Jun 15 11:05:24 2018
@@ -12070,3 +12070,40 @@ define <8 x i32> @test_expand_load_d_256
%res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
ret <8 x i32> %res
}
+
+define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
+; X86-LABEL: test_sqrt_pd_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_sqrt_pd_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
+; X86-LABEL: test_sqrt_ps_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_sqrt_ps_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Fri Jun 15 11:05:24 2018
@@ -999,43 +999,6 @@ define <4 x float> @test_mm512_min_ps_12
}
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
-define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
-; X86-LABEL: test_sqrt_pd_256:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_sqrt_pd_256:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
-; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
-
-define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
-; X86-LABEL: test_sqrt_ps_256:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_sqrt_ps_256:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
-; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
- ret <8 x float> %res
-}
-
-declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
-
define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
; CHECK-LABEL: test_getexp_pd_256:
; CHECK: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/fold-load-unops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-load-unops.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-load-unops.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-load-unops.ll Fri Jun 15 11:05:24 2018
@@ -165,12 +165,14 @@ define float @sqrtss_size(float* %a) opt
define <4 x float> @sqrtss_full_size(<4 x float>* %a) optsize{
; SSE-LABEL: sqrtss_full_size:
; SSE: # %bb.0:
-; SSE-NEXT: sqrtss (%rdi), %xmm0
+; SSE-NEXT: movaps (%rdi), %xmm0
+; SSE-NEXT: sqrtss %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtss_full_size:
; AVX: # %bb.0:
-; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovaps (%rdi), %xmm0
+; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, <4 x float>* %a
%res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld)
@@ -197,12 +199,14 @@ define double @sqrtsd_size(double* %a) o
define <2 x double> @sqrtsd_full_size(<2 x double>* %a) optsize {
; SSE-LABEL: sqrtsd_full_size:
; SSE: # %bb.0:
-; SSE-NEXT: sqrtsd (%rdi), %xmm0
+; SSE-NEXT: movapd (%rdi), %xmm0
+; SSE-NEXT: sqrtsd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sqrtsd_full_size:
; AVX: # %bb.0:
-; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovapd (%rdi), %xmm0
+; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%ld = load <2 x double>, <2 x double>* %a
%res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld)
Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-fast-isel.ll Fri Jun 15 11:05:24 2018
@@ -2075,10 +2075,10 @@ define <4 x float> @test_mm_sqrt_ps(<4 x
; AVX: # %bb.0:
; AVX-NEXT: vsqrtps %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
- %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
+ %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone
define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) {
; SSE-LABEL: test_mm_sqrt_ss:
@@ -2090,10 +2090,12 @@ define <4 x float> @test_mm_sqrt_ss(<4 x
; AVX: # %bb.0:
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
- %sqrt = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
- ret <4 x float> %sqrt
+ %ext = extractelement <4 x float> %a0, i32 0
+ %sqrt = call float @llvm.sqrt.f32(float %ext)
+ %ins = insertelement <4 x float> %a0, float %sqrt, i32 0
+ ret <4 x float> %ins
}
-declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+declare float @llvm.sqrt.f32(float) nounwind readnone
define void @test_mm_store_ps(float *%a0, <4 x float> %a1) {
; X86-SSE-LABEL: test_mm_store_ps:
Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll Fri Jun 15 11:05:24 2018
@@ -6,6 +6,44 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
+
+define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
+; SSE-LABEL: test_x86_sse_sqrt_ps:
+; SSE: ## %bb.0:
+; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0]
+; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+;
+; AVX1-LABEL: test_x86_sse_sqrt_ps:
+; AVX1: ## %bb.0:
+; AVX1-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0]
+; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+;
+; AVX512-LABEL: test_x86_sse_sqrt_ps:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0]
+; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
+; SSE-LABEL: test_x86_sse_sqrt_ss:
+; SSE: ## %bb.0:
+; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0]
+; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+;
+; AVX-LABEL: test_x86_sse_sqrt_ss:
+; AVX: ## %bb.0:
+; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]
+; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+
define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
; X86-SSE-LABEL: test_x86_sse_storeu_ps:
; X86-SSE: ## %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Fri Jun 15 11:05:24 2018
@@ -448,48 +448,6 @@ define <4 x float> @test_x86_sse_rsqrt_s
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
-define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
-; SSE-LABEL: test_x86_sse_sqrt_ps:
-; SSE: ## %bb.0:
-; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0]
-; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX1-LABEL: test_x86_sse_sqrt_ps:
-; AVX1: ## %bb.0:
-; AVX1-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0]
-; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX512-LABEL: test_x86_sse_sqrt_ps:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0]
-; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
-
-
-define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
-; SSE-LABEL: test_x86_sse_sqrt_ss:
-; SSE: ## %bb.0:
-; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0]
-; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX1-LABEL: test_x86_sse_sqrt_ss:
-; AVX1: ## %bb.0:
-; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0]
-; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX512-LABEL: test_x86_sse_sqrt_ss:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
-; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
-
-
define void @test_x86_sse_stmxcsr(i8* %a0) {
; X86-SSE-LABEL: test_x86_sse_stmxcsr:
; X86-SSE: ## %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll Fri Jun 15 11:05:24 2018
@@ -83,26 +83,22 @@ define <4 x float> @test_div_ss(<4 x flo
define <4 x float> @test_sqrt_ss(<4 x float> %a) {
; SSE2-LABEL: test_sqrt_ss:
; SSE2: # %bb.0:
-; SSE2-NEXT: sqrtss %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE2-NEXT: sqrtss %xmm0, %xmm0
; SSE2-NEXT: ret{{[l|q]}}
;
; SSE41-LABEL: test_sqrt_ss:
; SSE41: # %bb.0:
-; SSE41-NEXT: sqrtss %xmm0, %xmm1
-; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: sqrtss %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX1-LABEL: test_sqrt_ss:
; AVX1: # %bb.0:
-; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm1
-; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX512-LABEL: test_sqrt_ss:
; AVX512: # %bb.0:
-; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; AVX512-NEXT: ret{{[l|q]}}
%1 = extractelement <4 x float> %a, i32 0
%2 = call float @llvm.sqrt.f32(float %1)
@@ -182,26 +178,22 @@ define <2 x double> @test_div_sd(<2 x do
define <2 x double> @test_sqrt_sd(<2 x double> %a) {
; SSE2-LABEL: test_sqrt_sd:
; SSE2: # %bb.0:
-; SSE2-NEXT: sqrtsd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: sqrtsd %xmm0, %xmm0
; SSE2-NEXT: ret{{[l|q]}}
;
; SSE41-LABEL: test_sqrt_sd:
; SSE41: # %bb.0:
-; SSE41-NEXT: sqrtsd %xmm0, %xmm1
-; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT: sqrtsd %xmm0, %xmm0
; SSE41-NEXT: ret{{[l|q]}}
;
; AVX1-LABEL: test_sqrt_sd:
; AVX1: # %bb.0:
-; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; AVX1-NEXT: ret{{[l|q]}}
;
; AVX512-LABEL: test_sqrt_sd:
; AVX512: # %bb.0:
-; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: ret{{[l|q]}}
%1 = extractelement <2 x double> %a, i32 0
%2 = call double @llvm.sqrt.f64(double %1)
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll Fri Jun 15 11:05:24 2018
@@ -3720,10 +3720,10 @@ define <2 x double> @test_mm_sqrt_pd(<2
; AVX: # %bb.0:
; AVX-NEXT: vsqrtpd %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
- %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
+ %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
; SSE-LABEL: test_mm_sqrt_sd:
@@ -3736,14 +3736,12 @@ define <2 x double> @test_mm_sqrt_sd(<2
; AVX: # %bb.0:
; AVX-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
- %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
- %ext0 = extractelement <2 x double> %call, i32 0
- %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
- %ext1 = extractelement <2 x double> %a1, i32 1
- %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
- ret <2 x double> %ins1
+ %ext = extractelement <2 x double> %a0, i32 0
+ %sqrt = call double @llvm.sqrt.f64(double %ext)
+ %ins = insertelement <2 x double> %a1, double %sqrt, i32 0
+ ret <2 x double> %ins
}
-declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
; SSE-LABEL: test_mm_sra_epi16:
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll Fri Jun 15 11:05:24 2018
@@ -6,6 +6,89 @@
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
+
+define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
+; SSE-LABEL: test_x86_sse2_sqrt_pd:
+; SSE: ## %bb.0:
+; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0]
+; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+;
+; AVX1-LABEL: test_x86_sse2_sqrt_pd:
+; AVX1: ## %bb.0:
+; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0]
+; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+;
+; AVX512-LABEL: test_x86_sse2_sqrt_pd:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
+; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
+; SSE-LABEL: test_x86_sse2_sqrt_sd:
+; SSE: ## %bb.0:
+; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
+; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+;
+; AVX-LABEL: test_x86_sse2_sqrt_sd:
+; AVX: ## %bb.0:
+; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
+; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
+; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
+; X86-SSE: ## %bb.0:
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-SSE-NEXT: movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00]
+; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
+; X86-SSE-NEXT: retl ## encoding: [0xc3]
+;
+; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
+; X86-AVX1: ## %bb.0:
+; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX1-NEXT: vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00]
+; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
+; X86-AVX1-NEXT: retl ## encoding: [0xc3]
+;
+; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
+; X86-AVX512: ## %bb.0:
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX512-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
+; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
+; X86-AVX512-NEXT: retl ## encoding: [0xc3]
+;
+; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
+; X64-SSE: ## %bb.0:
+; X64-SSE-NEXT: movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07]
+; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
+; X64-SSE-NEXT: retq ## encoding: [0xc3]
+;
+; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
+; X64-AVX1: ## %bb.0:
+; X64-AVX1-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07]
+; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
+; X64-AVX1-NEXT: retq ## encoding: [0xc3]
+;
+; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
+; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
+; X64-AVX512-NEXT: retq ## encoding: [0xc3]
+ %a1 = load <2 x double>, <2 x double>* %a0, align 16
+ %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+
+
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; SSE-LABEL: test_x86_sse2_psll_dq_bs:
; SSE: ## %bb.0:
@@ -241,8 +324,8 @@ define void @test_x86_sse2_storeu_pd(i8*
; X86-SSE: ## %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
-; X86-SSE-NEXT: movhpd LCPI8_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
-; X86-SSE-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4
+; X86-SSE-NEXT: movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
+; X86-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
; X86-SSE-NEXT: ## xmm1 = xmm1[0],mem[0]
; X86-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
; X86-SSE-NEXT: movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08]
@@ -252,8 +335,8 @@ define void @test_x86_sse2_storeu_pd(i8*
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
-; X86-AVX1-NEXT: vmovhpd LCPI8_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
-; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4
+; X86-AVX1-NEXT: vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
+; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
; X86-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0]
; X86-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
; X86-AVX1-NEXT: vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00]
@@ -262,8 +345,8 @@ define void @test_x86_sse2_storeu_pd(i8*
; X86-AVX512-LABEL: test_x86_sse2_storeu_pd:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovsd LCPI8_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
-; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4
+; X86-AVX512-NEXT: vmovsd LCPI11_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
+; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
; X86-AVX512-NEXT: ## xmm1 = mem[0],zero
; X86-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
; X86-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
@@ -275,7 +358,7 @@ define void @test_x86_sse2_storeu_pd(i8*
; X64-SSE: ## %bb.0:
; X64-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
; X64-SSE-NEXT: movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
-; X64-SSE-NEXT: ## fixup A - offset: 4, value: LCPI8_0-4, kind: reloc_riprel_4byte
+; X64-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
; X64-SSE-NEXT: ## xmm1 = xmm1[0],mem[0]
; X64-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
; X64-SSE-NEXT: movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f]
@@ -285,7 +368,7 @@ define void @test_x86_sse2_storeu_pd(i8*
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
; X64-AVX1-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
-; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI8_0-4, kind: reloc_riprel_4byte
+; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
; X64-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0]
; X64-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
; X64-AVX1-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07]
@@ -294,7 +377,7 @@ define void @test_x86_sse2_storeu_pd(i8*
; X64-AVX512-LABEL: test_x86_sse2_storeu_pd:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vmovsd {{.*}}(%rip), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
-; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI8_0-4, kind: reloc_riprel_4byte
+; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
; X64-AVX512-NEXT: ## xmm1 = mem[0],zero
; X64-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
; X64-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Fri Jun 15 11:05:24 2018
@@ -1607,93 +1607,6 @@ define <8 x i16> @test_x86_sse2_psubus_w
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
-define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
-; SSE-LABEL: test_x86_sse2_sqrt_pd:
-; SSE: ## %bb.0:
-; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0]
-; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX1-LABEL: test_x86_sse2_sqrt_pd:
-; AVX1: ## %bb.0:
-; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0]
-; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX512-LABEL: test_x86_sse2_sqrt_pd:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
-; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
-
-
-define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
-; SSE-LABEL: test_x86_sse2_sqrt_sd:
-; SSE: ## %bb.0:
-; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
-; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX1-LABEL: test_x86_sse2_sqrt_sd:
-; AVX1: ## %bb.0:
-; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
-; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
-;
-; AVX512-LABEL: test_x86_sse2_sqrt_sd:
-; AVX512: ## %bb.0:
-; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
-; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
-
-
-define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
-; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
-; X86-SSE: ## %bb.0:
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00]
-; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
-; X86-SSE-NEXT: retl ## encoding: [0xc3]
-;
-; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
-; X86-AVX1: ## %bb.0:
-; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00]
-; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
-; X86-AVX1-NEXT: retl ## encoding: [0xc3]
-;
-; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
-; X86-AVX512: ## %bb.0:
-; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
-; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
-; X86-AVX512-NEXT: retl ## encoding: [0xc3]
-;
-; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
-; X64-SSE: ## %bb.0:
-; X64-SSE-NEXT: movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07]
-; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
-; X64-SSE-NEXT: retq ## encoding: [0xc3]
-;
-; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
-; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07]
-; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
-; X64-AVX1-NEXT: retq ## encoding: [0xc3]
-;
-; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
-; X64-AVX512: ## %bb.0:
-; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
-; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
-; X64-AVX512-NEXT: retq ## encoding: [0xc3]
- %a1 = load <2 x double>, <2 x double>* %a0, align 16
- %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-
-
define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomieq_sd:
; SSE: ## %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/sse_partial_update.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse_partial_update.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse_partial_update.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse_partial_update.ll Fri Jun 15 11:05:24 2018
@@ -54,9 +54,10 @@ declare <4 x float> @llvm.x86.sse.rcp.ss
define void @sqrtss(<4 x float> %a) nounwind uwtable ssp {
; CHECK-LABEL: sqrtss:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: sqrtss %xmm0, %xmm0
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm2
+; CHECK-NEXT: sqrtss %xmm0, %xmm1
+; CHECK-NEXT: cvtss2sd %xmm1, %xmm2
; CHECK-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: jmp _callee ## TAILCALL
@@ -75,9 +76,10 @@ declare <4 x float> @llvm.x86.sse.sqrt.s
define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp {
; CHECK-LABEL: sqrtsd:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: sqrtsd %xmm0, %xmm0
-; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2
+; CHECK-NEXT: sqrtsd %xmm0, %xmm1
+; CHECK-NEXT: cvtsd2ss %xmm1, %xmm2
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: jmp _callee2 ## TAILCALL
Modified: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll Fri Jun 15 11:05:24 2018
@@ -33,10 +33,8 @@ define float @test_rcp_ss_1(float %a) {
define float @test_sqrt_ss_0(float %a) {
; CHECK-LABEL: @test_sqrt_ss_0(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT: ret float [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float %a)
+; CHECK-NEXT: ret float [[TMP1]]
;
%1 = insertelement <4 x float> undef, float %a, i32 0
%2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
Modified: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll?rev=334849&r1=334848&r2=334849&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll Fri Jun 15 11:05:24 2018
@@ -4,10 +4,8 @@ target datalayout = "e-m:e-i64:64-f80:12
define double @test_sqrt_sd_0(double %a) {
; CHECK-LABEL: @test_sqrt_sd_0(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
-; CHECK-NEXT: ret double [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double %a)
+; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
%2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
More information about the llvm-commits
mailing list