[llvm] c43b8ec - [X86] Add support for STRICT_FP_ROUND/STRICT_FP_EXTEND from/to fp128 to/from f32/f64/f80 in 64-bit mode.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 18:37:26 PST 2019
Author: Craig Topper
Date: 2019-11-25T18:18:39-08:00
New Revision: c43b8ec735e88472593ca420a5c6a17630f94066
URL: https://github.com/llvm/llvm-project/commit/c43b8ec735e88472593ca420a5c6a17630f94066
DIFF: https://github.com/llvm/llvm-project/commit/c43b8ec735e88472593ca420a5c6a17630f94066.diff
LOG: [X86] Add support for STRICT_FP_ROUND/STRICT_FP_EXTEND from/to fp128 to/from f32/f64/f80 in 64-bit mode.
These need to emit a libcall like we do for the non-strict version.
32-bit mode needs to SoftenFloat support to be implemented for strict FP nodes.
Differential Revision: https://reviews.llvm.org/D70504
Added:
llvm/test/CodeGen/X86/fp128-cast-strict.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c658363f8d6a..33f50e518bbe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -690,7 +690,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
// We need to custom handle any FP_ROUND with an f128 input, but
// LegalizeDAG uses the result type to know when to run a custom handler.
// So we have to list all legal floating point result types here.
@@ -19714,9 +19715,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
}
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
- SDValue In = Op.getOperand(0);
+ SDValue In = Op.getOperand(IsStrict ? 1 : 0);
MVT SVT = In.getSimpleValueType();
if (VT == MVT::f128) {
@@ -19725,6 +19728,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
}
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
+ // FIXME: Strict fp.
+ assert(!IsStrict && "Strict FP not supported yet!");
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
@@ -19732,8 +19737,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
}
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ bool IsStrict = Op->isStrictFPOpcode();
+
MVT VT = Op.getSimpleValueType();
- SDValue In = Op.getOperand(0);
+ SDValue In = Op.getOperand(IsStrict ? 1 : 0);
MVT SVT = In.getSimpleValueType();
// It's legal except when f128 is involved
@@ -19745,17 +19752,17 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
// FP_ROUND node has a second operand indicating whether it is known to be
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
+
+ SDLoc dl(Op);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first;
-}
+ std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, In, CallOptions,
+ dl, Chain);
-// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking
-// the default expansion of STRICT_FP_ROUND.
-static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) {
- // FIXME: Need to form a libcall with an input chain for f128.
- assert(Op.getOperand(0).getValueType() != MVT::f128 &&
- "Don't know how to handle f128 yet!");
- return Op;
+ if (IsStrict)
+ return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
+
+ return Tmp.first;
}
/// Depending on uarch and/or optimizing for size, we might prefer to use a
@@ -27773,9 +27780,21 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
- SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+
+ bool IsStrict = Op->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
+
+ SDLoc dl(Op);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
MakeLibCallOptions CallOptions;
- return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+ std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, Call, MVT::f128, Ops,
+ CallOptions, dl, Chain);
+
+ if (IsStrict)
+ return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl);
+
+ return Tmp.first;
}
/// Provide custom lowering hooks for some operations.
@@ -27825,9 +27844,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
- case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
- case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
- case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
case ISD::FADD:
diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
new file mode 100644
index 000000000000..84964d771925
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
@@ -0,0 +1,175 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE
+; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE
+; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX
+; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX
+; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
+; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
+
+; Check soft floating point conversion function calls.
+
+ at vf32 = common global float 0.000000e+00, align 4
+ at vf64 = common global double 0.000000e+00, align 8
+ at vf80 = common global x86_fp80 0xK00000000000000000000, align 8
+ at vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
+
+define void @TestFPExtF32_F128() nounwind strictfp {
+; X64-SSE-LABEL: TestFPExtF32_F128:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: callq __extendsftf2
+; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip)
+; X64-SSE-NEXT: popq %rax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: TestFPExtF32_F128:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: pushq %rax
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: callq __extendsftf2
+; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip)
+; X64-AVX-NEXT: popq %rax
+; X64-AVX-NEXT: retq
+entry:
+ %0 = load float, float* @vf32, align 4
+ %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %0, metadata !"fpexcept.strict") #0
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+}
+
+define void @TestFPExtF64_F128() nounwind strictfp {
+; X64-SSE-LABEL: TestFPExtF64_F128:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE-NEXT: callq __extenddftf2
+; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip)
+; X64-SSE-NEXT: popq %rax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: TestFPExtF64_F128:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: pushq %rax
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX-NEXT: callq __extenddftf2
+; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip)
+; X64-AVX-NEXT: popq %rax
+; X64-AVX-NEXT: retq
+entry:
+ %0 = load double, double* @vf64, align 8
+ %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %0, metadata !"fpexcept.strict") #0
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+}
+
+define void @TestFPExtF80_F128() nounwind strictfp {
+; X64-SSE-LABEL: TestFPExtF80_F128:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: subq $24, %rsp
+; X64-SSE-NEXT: fldt {{.*}}(%rip)
+; X64-SSE-NEXT: fstpt (%rsp)
+; X64-SSE-NEXT: callq __extendxftf2
+; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip)
+; X64-SSE-NEXT: addq $24, %rsp
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: TestFPExtF80_F128:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: subq $24, %rsp
+; X64-AVX-NEXT: fldt {{.*}}(%rip)
+; X64-AVX-NEXT: fstpt (%rsp)
+; X64-AVX-NEXT: callq __extendxftf2
+; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip)
+; X64-AVX-NEXT: addq $24, %rsp
+; X64-AVX-NEXT: retq
+entry:
+ %0 = load x86_fp80, x86_fp80* @vf80, align 8
+ %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80 %0, metadata !"fpexcept.strict") #0
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+}
+
+define void @TestFPTruncF128_F32() nounwind strictfp {
+; X64-SSE-LABEL: TestFPTruncF128_F32:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: callq __trunctfsf2
+; X64-SSE-NEXT: movss %xmm0, {{.*}}(%rip)
+; X64-SSE-NEXT: popq %rax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: TestFPTruncF128_F32:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: pushq %rax
+; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; X64-AVX-NEXT: callq __trunctfsf2
+; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip)
+; X64-AVX-NEXT: popq %rax
+; X64-AVX-NEXT: retq
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ store float %conv, float* @vf32, align 4
+ ret void
+}
+
+define void @TestFPTruncF128_F64() nounwind strictfp {
+; X64-SSE-LABEL: TestFPTruncF128_F64:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: callq __trunctfdf2
+; X64-SSE-NEXT: movsd %xmm0, {{.*}}(%rip)
+; X64-SSE-NEXT: popq %rax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: TestFPTruncF128_F64:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: pushq %rax
+; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; X64-AVX-NEXT: callq __trunctfdf2
+; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip)
+; X64-AVX-NEXT: popq %rax
+; X64-AVX-NEXT: retq
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ store double %conv, double* @vf64, align 8
+ ret void
+}
+
+define void @TestFPTruncF128_F80() nounwind strictfp {
+; X64-SSE-LABEL: TestFPTruncF128_F80:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: callq __trunctfxf2
+; X64-SSE-NEXT: fstpt {{.*}}(%rip)
+; X64-SSE-NEXT: popq %rax
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: TestFPTruncF128_F80:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: pushq %rax
+; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; X64-AVX-NEXT: callq __trunctfxf2
+; X64-AVX-NEXT: fstpt {{.*}}(%rip)
+; X64-AVX-NEXT: popq %rax
+; X64-AVX-NEXT: retq
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = call x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ store x86_fp80 %conv, x86_fp80* @vf80, align 8
+ ret void
+}
+
+attributes #0 = { strictfp }
+
+declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
+declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
+declare fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80, metadata)
More information about the llvm-commits
mailing list