[llvm] bbf7860 - add support for strict operation fpextend/fpround/fsqrt on X86 backend
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 17:05:41 PST 2019
Author: Liu, Chen3
Date: 2019-12-10T09:04:28+08:00
New Revision: bbf7860b9371799609f0e918e468f8e997f62eb3
URL: https://github.com/llvm/llvm-project/commit/bbf7860b9371799609f0e918e468f8e997f62eb3
DIFF: https://github.com/llvm/llvm-project/commit/bbf7860b9371799609f0e918e468f8e997f62eb3.diff
LOG: add support for strict operation fpextend/fpround/fsqrt on X86 backend
Differential Revision: https://reviews.llvm.org/D71184
Added:
Modified:
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/fp-strict-scalar.ll
llvm/test/CodeGen/X86/vec-strict-128.ll
llvm/test/CodeGen/X86/vec-strict-256.ll
llvm/test/CodeGen/X86/vec-strict-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index b955f63296b5..0c7aaada4be9 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5219,14 +5219,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SelectCode(Res.getNode());
return;
}
- case ISD::STRICT_FP_ROUND: {
- // X87 instructions has enabled this strict fp operation.
- bool UsingFp80 = Node->getSimpleValueType(0) == MVT::f80 ||
- Node->getOperand(1).getSimpleValueType() == MVT::f80;
- if (UsingFp80 || (!Subtarget->hasSSE1() && Subtarget->hasX87()))
- break;
- LLVM_FALLTHROUGH;
- }
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
// FIXME: Remove when we have isel patterns for strict versions of these
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3697b5328e9c..7733ad661625 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -591,13 +591,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSIN , VT, Expand);
setOperationAction(ISD::FCOS , VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
-
- // Handle constrained floating-point operations of scalar.
- setOperationAction(ISD::STRICT_FSQRT , VT, Legal);
- setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
- // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
- // as Custom.
- setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
}
}
@@ -622,14 +615,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
// Handle constrained floating-point operations of scalar.
- setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
// We don't support FMA.
setOperationAction(ISD::FMA, MVT::f64, Expand);
@@ -857,17 +855,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
- // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select,
- // but its sufficient to pretend they're Legal since they will be someday.
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -1042,6 +1034,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// With AVX512, expanding (and promoting the shifts) is better.
if (!Subtarget.hasAVX512())
setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -1157,9 +1155,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
- // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select,
- // but its sufficient to pretend they're Legal since they will be someday.
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
@@ -1168,6 +1164,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
@@ -1430,17 +1429,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
- // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select,
- // but its sufficient to pretend their Legal since they will be someday.
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 3f782f7586b3..ab94950a635e 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7464,28 +7464,28 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f32x_info,
f64x_info>;
-def : Pat<(f64 (fpextend FR32X:$src)),
+def : Pat<(f64 (any_fpextend FR32X:$src)),
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Requires<[HasAVX512]>;
-def : Pat<(f64 (fpextend (loadf32 addr:$src))),
+def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
-def : Pat<(f32 (fpround FR64X:$src)),
+def : Pat<(f32 (any_fpround FR64X:$src)),
(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
- (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
+ (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
- (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
+ (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
@@ -7583,14 +7583,14 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
- fpextend, sched.ZMM>,
+ any_fpextend, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
sched.YMM>, EVEX_V256;
}
}
@@ -7657,63 +7657,63 @@ defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
PS, EVEX_CD8<32, CD8VH>;
let Predicates = [HasAVX512] in {
- def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
+ def : Pat<(v8f32 (any_fpround (v8f64 VR512:$src))),
(VCVTPD2PSZrr VR512:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (v8f64 VR512:$src))),
VR256X:$src0),
(VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (v8f64 VR512:$src))),
v8f32x_info.ImmAllZerosV),
(VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
- def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
+ def : Pat<(v8f32 (any_fpround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (loadv8f64 addr:$src))),
VR256X:$src0),
(VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
- def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (loadv8f64 addr:$src))),
v8f32x_info.ImmAllZerosV),
(VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
- def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))),
+ def : Pat<(v8f32 (any_fpround (v8f64 (X86VBroadcastld64 addr:$src)))),
(VCVTPD2PSZrmb addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
+ (any_fpround (v8f64 (X86VBroadcastld64 addr:$src))),
(v8f32 VR256X:$src0)),
(VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
- (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
+ (any_fpround (v8f64 (X86VBroadcastld64 addr:$src))),
v8f32x_info.ImmAllZerosV),
(VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
- def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
+ def : Pat<(v4f32 (any_fpround (v4f64 VR256X:$src))),
(VCVTPD2PSZ256rr VR256X:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (v4f64 VR256X:$src))),
VR128X:$src0),
(VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (v4f64 VR256X:$src))),
v4f32x_info.ImmAllZerosV),
(VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
- def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+ def : Pat<(v4f32 (any_fpround (loadv4f64 addr:$src))),
(VCVTPD2PSZ256rm addr:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (loadv4f64 addr:$src))),
VR128X:$src0),
(VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
- def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (loadv4f64 addr:$src))),
v4f32x_info.ImmAllZerosV),
(VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
- def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
+ def : Pat<(v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
(VCVTPD2PSZ256rmb addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
+ (v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
VR128X:$src0),
(VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
- (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
+ (v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
v4f32x_info.ImmAllZerosV),
(VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
@@ -8963,17 +8963,17 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (fsqrt _.RC:$src))>, EVEX,
+ (_.VT (any_fsqrt _.RC:$src))>, EVEX,
Sched<[sched]>;
defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
- (fsqrt (_.VT
+ (any_fsqrt (_.VT
(bitconvert (_.LdFrag addr:$src))))>, EVEX,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
- (fsqrt (_.VT
+ (any_fsqrt (_.VT
(_.BroadcastLdFrag addr:$src)))>,
EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -9054,13 +9054,13 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
}
let Predicates = [HasAVX512] in {
- def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
+ def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
(!cast<Instruction>(Name#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
}
let Predicates = [HasAVX512, OptForSize] in {
- def : Pat<(_.EltVT (fsqrt (load addr:$src))),
+ def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
(!cast<Instruction>(Name#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>;
}
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index adc616e86c27..f97d90076c18 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1219,18 +1219,18 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
}
-def : Pat<(f32 (fpround FR64:$src)),
+def : Pat<(f32 (any_fpround FR64:$src)),
(VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
Requires<[UseAVX]>;
let isCodeGenOnly = 1 in {
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fpround FR64:$src))]>,
+ [(set FR32:$dst, (any_fpround FR64:$src))]>,
Sched<[WriteCvtSD2SS]>, SIMD_EXC;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
+ [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>,
XD, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC;
}
@@ -1284,19 +1284,19 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
Requires<[UseAVX, OptForSize]>, SIMD_EXC;
} // isCodeGenOnly = 1, hasSideEffects = 0
-def : Pat<(f64 (fpextend FR32:$src)),
+def : Pat<(f64 (any_fpextend FR32:$src)),
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
-def : Pat<(fpextend (loadf32 addr:$src)),
+def : Pat<(any_fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
let isCodeGenOnly = 1 in {
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (fpextend FR32:$src))]>,
+ [(set FR64:$dst, (any_fpextend FR32:$src))]>,
XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>,
+ [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>,
XS, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC;
} // isCodeGenOnly = 1
@@ -1335,13 +1335,13 @@ let Predicates = [UseAVX] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
- (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
- (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v4f32 (X86Movss
@@ -1389,13 +1389,13 @@ let Predicates = [UseSSE2] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
- (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSD2SSrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
- (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
+ (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
def : Pat<(v2f64 (X86Movsd
@@ -1625,7 +1625,7 @@ def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>,
+ [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>,
PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
@@ -1740,9 +1740,9 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
+ def : Pat<(v4f32 (any_fpround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
- def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+ def : Pat<(v4f32 (any_fpround (loadv4f64 addr:$src))),
(VCVTPD2PSYrm addr:$src)>;
}
@@ -3007,10 +3007,10 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>,
- sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>, SIMD_EXC;
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>,
+ sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
+ sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>,
+ sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
@@ -3039,8 +3039,8 @@ multiclass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Mo
}
}
-defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
-defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
+defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
multiclass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix,
SDNode Move, ValueType VT,
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
index a61f195735ef..d532425cb5d2 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE,SSE-X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE,SSE-X64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE,SSE-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE,SSE-X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X87
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll
index a884303ad0c3..913b24c0af4a 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128.ll
@@ -14,6 +14,10 @@ declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2
declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 {
; SSE-LABEL: f1:
@@ -143,4 +147,74 @@ define <4 x float> @f8(<4 x float> %a, <4 x float> %b) #0 {
ret <4 x float> %ret
}
+define <2 x double> @f9(<2 x double> %a) #0 {
+; SSE-LABEL: f9:
+; SSE: # %bb.0:
+; SSE-NEXT: sqrtpd %xmm0, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: f9:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtpd %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+ <2 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %sqrt
+}
+
+define <4 x float> @f10(<4 x float> %a) #0 {
+; SSE-LABEL: f10:
+; SSE: # %bb.0:
+; SSE-NEXT: sqrtps %xmm0, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: f10:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtps %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %sqrt = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
+ <4 x float> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float > %sqrt
+}
+
+define <4 x float> @f11(<2 x double> %a0, <4 x float> %a1) #0 {
+; SSE-LABEL: f11:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtsd2ss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: f11:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %ext = extractelement <2 x double> %a0, i32 0
+ %cvt = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %ext,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = insertelement <4 x float> %a1, float %cvt, i32 0
+ ret <4 x float> %res
+}
+
+define <2 x double> @f12(<2 x double> %a0, <4 x float> %a1) #0 {
+; SSE-LABEL: f12:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtss2sd %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: f12:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %ext = extractelement <4 x float> %a1, i32 0
+ %cvt = call double @llvm.experimental.constrained.fpext.f64.f32(float %ext,
+ metadata !"fpexcept.strict") #0
+ %res = insertelement <2 x double> %a0, double %cvt, i32 0
+ ret <2 x double> %res
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll
index 8828deb0808c..f210715b4b74 100644
--- a/llvm/test/CodeGen/X86/vec-strict-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-256.ll
@@ -12,6 +12,10 @@ declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4
declare <8 x float> @llvm.experimental.constrained.fmul.v8f32(<8 x float>, <8 x float>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float>, <8 x float>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
+declare <8 x float> @llvm.experimental.constrained.sqrt.v8f32(<8 x float>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
+declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 {
; CHECK-LABEL: f1:
@@ -101,4 +105,53 @@ define <8 x float> @f8(<8 x float> %a, <8 x float> %b) #0 {
ret <8 x float> %ret
}
+define <4 x double> @f9(<4 x double> %a) #0 {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtpd %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
+ <4 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %ret
+}
+
+
+define <8 x float> @f10(<8 x float> %a) #0 {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtps %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x float> @llvm.experimental.constrained.sqrt.v8f32(
+ <8 x float> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x float > %ret
+}
+
+define <4 x double> @f11(<4 x float> %a) #0 {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
+ <4 x float> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %ret
+}
+
+define <4 x float> @f12(<4 x double> %a) #0 {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
+ <4 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %ret
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll
index 813088440872..6505e2ac9b71 100644
--- a/llvm/test/CodeGen/X86/vec-strict-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-512.ll
@@ -10,6 +10,10 @@ declare <8 x double> @llvm.experimental.constrained.fmul.v8f64(<8 x double>, <8
declare <16 x float> @llvm.experimental.constrained.fmul.v16f32(<16 x float>, <16 x float>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.fdiv.v8f64(<8 x double>, <8 x double>, metadata, metadata)
declare <16 x float> @llvm.experimental.constrained.fdiv.v16f32(<16 x float>, <16 x float>, metadata, metadata)
+declare <8 x double> @llvm.experimental.constrained.sqrt.v8f64(<8 x double>, metadata, metadata)
+declare <16 x float> @llvm.experimental.constrained.sqrt.v16f32(<16 x float>, metadata, metadata)
+declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float>, metadata)
+declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata)
define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 {
; CHECK-LABEL: f1:
@@ -99,4 +103,52 @@ define <16 x float> @f8(<16 x float> %a, <16 x float> %b) #0 {
ret <16 x float> %ret
}
+define <8 x double> @f9(<8 x double> %a) #0 {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x double> @llvm.experimental.constrained.sqrt.v8f64(
+ <8 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x double> %ret
+}
+
+
+define <16 x float> @f10(<16 x float> %a) #0 {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtps %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x float> @llvm.experimental.constrained.sqrt.v16f32(
+ <16 x float> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x float > %ret
+}
+
+define <8 x double> @f11(<8 x float> %a) #0 {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2pd %ymm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(
+ <8 x float> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x double> %ret
+}
+
+define <8 x float> @f12(<8 x double> %a) #0 {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(
+ <8 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x float> %ret
+}
+
attributes #0 = { strictfp }
More information about the llvm-commits
mailing list