[llvm] 8af492a - add strict float for round operation
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 1 04:45:16 PST 2020
Author: Liu, Chen3
Date: 2020-01-01T20:42:12+08:00
New Revision: 8af492ade1bc5367ae529e451b9c9cd9e3d55e53
URL: https://github.com/llvm/llvm-project/commit/8af492ade1bc5367ae529e451b9c9cd9e3d55e53
DIFF: https://github.com/llvm/llvm-project/commit/8af492ade1bc5367ae529e451b9c9cd9e3d55e53.diff
LOG: add strict float for round operation
Differential Revision: https://reviews.llvm.org/D72026
Added:
llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
llvm/test/CodeGen/X86/vec-strict-round-128.ll
Modified:
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/vec-strict-256.ll
llvm/test/CodeGen/X86/vec-strict-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1f262e076c73..4e29597e9411 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -897,27 +897,50 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
}
case ISD::FCEIL:
+ case ISD::STRICT_FCEIL:
case ISD::FFLOOR:
+ case ISD::STRICT_FFLOOR:
case ISD::FTRUNC:
+ case ISD::STRICT_FTRUNC:
case ISD::FNEARBYINT:
- case ISD::FRINT: {
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::FRINT:
+ case ISD::STRICT_FRINT: {
// Replace fp rounding with their X86 specific equivalent so we don't
// need 2 sets of patterns.
unsigned Imm;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
+ case ISD::STRICT_FCEIL:
case ISD::FCEIL: Imm = 0xA; break;
+ case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: Imm = 0x9; break;
+ case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: Imm = 0xB; break;
+ case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: Imm = 0xC; break;
+ case ISD::STRICT_FRINT:
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(N);
- SDValue Res = CurDAG->getNode(
- X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
- CurDAG->getTargetConstant(Imm, dl, MVT::i8));
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Res;
+ if (IsStrict)
+ Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
+ {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), N->getOperand(1),
+ CurDAG->getTargetConstant(Imm, dl, MVT::i8)});
+ else
+ Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
+ N->getOperand(0),
+ CurDAG->getTargetConstant(Imm, dl, MVT::i8));
--I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ if (IsStrict) {
+ SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
+ SDValue To[] = {Res.getValue(0), Res.getValue(1)};
+ CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
+ } else
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
CurDAG->DeleteNode(N);
continue;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0f171e55dd4..38911758a2e9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1068,11 +1068,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
- setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
- setOperationAction(ISD::FCEIL, RoundedTy, Legal);
- setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
- setOperationAction(ISD::FRINT, RoundedTy, Legal);
- setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
}
setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
@@ -1144,14 +1149,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
: &X86::VR256RegClass);
for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
@@ -1503,11 +1513,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::FFLOOR, VT, Legal);
- setOperationAction(ISD::FCEIL, VT, Legal);
- setOperationAction(ISD::FTRUNC, VT, Legal);
- setOperationAction(ISD::FRINT, VT, Legal);
- setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
}
@@ -29650,6 +29665,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
+ case X86ISD::STRICT_VRNDSCALE: return "X86ISD::STRICT_VRNDSCALE";
case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE";
case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 655717bd6052..16b076e85af8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -424,7 +424,7 @@ namespace llvm {
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
// Also used by the legacy (V)ROUND intrinsics where we mask out the
// scaling part of the immediate.
- VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
+ VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, STRICT_VRNDSCALE,
// Tests Types Of a FP Values for packed types.
VFPCLASS,
// Tests Types Of a FP Values for scalar types.
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 02ac454fe063..61f12785db77 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9019,13 +9019,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
let Predicates = [HasAVX512] in {
- def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src1, timm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
- def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src1, timm:$src2))>;
}
@@ -10290,7 +10290,7 @@ defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56
X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
- X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
+ X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 37cba895c370..a9902013dfef 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -466,6 +466,12 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
+def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm,
+ [SDNPHasChain]>;
+def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2),
+ [(X86strict_VRndScale node:$src1, node:$src2),
+ (X86VRndScale node:$src1, node:$src2)]>;
+
def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>;
def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c7ecfba5b245..ce085e6d56ba 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5540,19 +5540,19 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
- loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
+ loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
- loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
+ loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
- loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
+ loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
- loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
+ loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
}
@@ -5565,25 +5565,25 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseAVX] in {
- def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
- def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
(VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;
}
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
- memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
+ memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;
let ExeDomain = SSEPackedDouble in
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
- memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
+ memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
@@ -5592,16 +5592,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
- def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),
(ROUNDSSr FR32:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
+ def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),
(ROUNDSDr FR64:$src1, timm:$src2)>;
}
let Predicates = [UseSSE41, OptForSize] in {
- def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),
(ROUNDSSm addr:$src1, timm:$src2)>;
- def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
+ def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),
(ROUNDSDm addr:$src1, timm:$src2)>;
}
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
new file mode 100644
index 000000000000..b5e7f9307de2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll
@@ -0,0 +1,474 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86,AVX512-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64,AVX512-X64
+
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
+declare float @llvm.experimental.constrained.floor.f32(float, metadata)
+declare double @llvm.experimental.constrained.floor.f64(double, metadata)
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+
+define float @fceil32(float %f) #0 {
+; SSE41-X86-LABEL: fceil32:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0
+; SSE41-X86-NEXT: movss %xmm0, (%esp)
+; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: popl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: fceil32:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundss $10, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: fceil32:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovss %xmm0, (%esp)
+; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: popl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: fceil32:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call float @llvm.experimental.constrained.ceil.f32(
+ float %f, metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @fceilf64(double %f) #0 {
+; SSE41-X86-LABEL: fceilf64:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: .cfi_offset %ebp, -8
+; SSE41-X86-NEXT: movl %esp, %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE41-X86-NEXT: andl $-8, %esp
+; SSE41-X86-NEXT: subl $8, %esp
+; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0
+; SSE41-X86-NEXT: movsd %xmm0, (%esp)
+; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: movl %ebp, %esp
+; SSE41-X86-NEXT: popl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: fceilf64:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundsd $10, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: fceilf64:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: .cfi_offset %ebp, -8
+; AVX-X86-NEXT: movl %esp, %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX-X86-NEXT: andl $-8, %esp
+; AVX-X86-NEXT: subl $8, %esp
+; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
+; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: movl %ebp, %esp
+; AVX-X86-NEXT: popl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: fceilf64:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call double @llvm.experimental.constrained.ceil.f64(
+ double %f, metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define float @ffloor32(float %f) #0 {
+; SSE41-X86-LABEL: ffloor32:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0
+; SSE41-X86-NEXT: movss %xmm0, (%esp)
+; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: popl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: ffloor32:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundss $9, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: ffloor32:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovss %xmm0, (%esp)
+; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: popl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: ffloor32:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call float @llvm.experimental.constrained.floor.f32(
+ float %f, metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @ffloorf64(double %f) #0 {
+; SSE41-X86-LABEL: ffloorf64:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: .cfi_offset %ebp, -8
+; SSE41-X86-NEXT: movl %esp, %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE41-X86-NEXT: andl $-8, %esp
+; SSE41-X86-NEXT: subl $8, %esp
+; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0
+; SSE41-X86-NEXT: movsd %xmm0, (%esp)
+; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: movl %ebp, %esp
+; SSE41-X86-NEXT: popl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: ffloorf64:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundsd $9, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: ffloorf64:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: .cfi_offset %ebp, -8
+; AVX-X86-NEXT: movl %esp, %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX-X86-NEXT: andl $-8, %esp
+; AVX-X86-NEXT: subl $8, %esp
+; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
+; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: movl %ebp, %esp
+; AVX-X86-NEXT: popl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: ffloorf64:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call double @llvm.experimental.constrained.floor.f64(
+ double %f, metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define float @ftrunc32(float %f) #0 {
+; SSE41-X86-LABEL: ftrunc32:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0
+; SSE41-X86-NEXT: movss %xmm0, (%esp)
+; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: popl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: ftrunc32:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundss $11, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: ftrunc32:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovss %xmm0, (%esp)
+; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: popl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: ftrunc32:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call float @llvm.experimental.constrained.trunc.f32(
+ float %f, metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @ftruncf64(double %f) #0 {
+; SSE41-X86-LABEL: ftruncf64:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: .cfi_offset %ebp, -8
+; SSE41-X86-NEXT: movl %esp, %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE41-X86-NEXT: andl $-8, %esp
+; SSE41-X86-NEXT: subl $8, %esp
+; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0
+; SSE41-X86-NEXT: movsd %xmm0, (%esp)
+; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: movl %ebp, %esp
+; SSE41-X86-NEXT: popl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: ftruncf64:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundsd $11, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: ftruncf64:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: .cfi_offset %ebp, -8
+; AVX-X86-NEXT: movl %esp, %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX-X86-NEXT: andl $-8, %esp
+; AVX-X86-NEXT: subl $8, %esp
+; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
+; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: movl %ebp, %esp
+; AVX-X86-NEXT: popl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: ftruncf64:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call double @llvm.experimental.constrained.trunc.f64(
+ double %f, metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define float @frint32(float %f) #0 {
+; SSE41-X86-LABEL: frint32:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0
+; SSE41-X86-NEXT: movss %xmm0, (%esp)
+; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: popl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: frint32:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundss $4, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: frint32:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovss %xmm0, (%esp)
+; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: popl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: frint32:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call float @llvm.experimental.constrained.rint.f32(
+ float %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @frintf64(double %f) #0 {
+; SSE41-X86-LABEL: frintf64:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: .cfi_offset %ebp, -8
+; SSE41-X86-NEXT: movl %esp, %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE41-X86-NEXT: andl $-8, %esp
+; SSE41-X86-NEXT: subl $8, %esp
+; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0
+; SSE41-X86-NEXT: movsd %xmm0, (%esp)
+; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: movl %ebp, %esp
+; SSE41-X86-NEXT: popl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: frintf64:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundsd $4, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: frintf64:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: .cfi_offset %ebp, -8
+; AVX-X86-NEXT: movl %esp, %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX-X86-NEXT: andl $-8, %esp
+; AVX-X86-NEXT: subl $8, %esp
+; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
+; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: movl %ebp, %esp
+; AVX-X86-NEXT: popl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: frintf64:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call double @llvm.experimental.constrained.rint.f64(
+ double %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define float @fnearbyint32(float %f) #0 {
+; SSE41-X86-LABEL: fnearbyint32:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0
+; SSE41-X86-NEXT: movss %xmm0, (%esp)
+; SSE41-X86-NEXT: flds (%esp)
+; SSE41-X86-NEXT: popl %eax
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: fnearbyint32:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundss $12, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: fnearbyint32:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovss %xmm0, (%esp)
+; AVX-X86-NEXT: flds (%esp)
+; AVX-X86-NEXT: popl %eax
+; AVX-X86-NEXT: .cfi_def_cfa_offset 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: fnearbyint32:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call float @llvm.experimental.constrained.nearbyint.f32(
+ float %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define double @fnearbyintf64(double %f) #0 {
+; SSE41-X86-LABEL: fnearbyintf64:
+; SSE41-X86: # %bb.0:
+; SSE41-X86-NEXT: pushl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_offset 8
+; SSE41-X86-NEXT: .cfi_offset %ebp, -8
+; SSE41-X86-NEXT: movl %esp, %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE41-X86-NEXT: andl $-8, %esp
+; SSE41-X86-NEXT: subl $8, %esp
+; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0
+; SSE41-X86-NEXT: movsd %xmm0, (%esp)
+; SSE41-X86-NEXT: fldl (%esp)
+; SSE41-X86-NEXT: movl %ebp, %esp
+; SSE41-X86-NEXT: popl %ebp
+; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4
+; SSE41-X86-NEXT: retl
+;
+; SSE41-X64-LABEL: fnearbyintf64:
+; SSE41-X64: # %bb.0:
+; SSE41-X64-NEXT: roundsd $12, %xmm0, %xmm0
+; SSE41-X64-NEXT: retq
+;
+; AVX-X86-LABEL: fnearbyintf64:
+; AVX-X86: # %bb.0:
+; AVX-X86-NEXT: pushl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_offset 8
+; AVX-X86-NEXT: .cfi_offset %ebp, -8
+; AVX-X86-NEXT: movl %esp, %ebp
+; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX-X86-NEXT: andl $-8, %esp
+; AVX-X86-NEXT: subl $8, %esp
+; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
+; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
+; AVX-X86-NEXT: fldl (%esp)
+; AVX-X86-NEXT: movl %ebp, %esp
+; AVX-X86-NEXT: popl %ebp
+; AVX-X86-NEXT: .cfi_def_cfa %esp, 4
+; AVX-X86-NEXT: retl
+;
+; AVX-X64-LABEL: fnearbyintf64:
+; AVX-X64: # %bb.0:
+; AVX-X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
+; AVX-X64-NEXT: retq
+ %res = call double @llvm.experimental.constrained.nearbyint.f64(
+ double %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret double %res
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll
index a0dc8bca875c..404e691c37c6 100644
--- a/llvm/test/CodeGen/X86/vec-strict-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-256.ll
@@ -18,6 +18,16 @@ declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata)
+declare <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata)
+declare <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata)
+declare <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata)
+declare <8 x float> @llvm.experimental.constrained.rint.v8f32(<8 x float>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
+declare <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 {
; CHECK-LABEL: f1:
@@ -178,4 +188,111 @@ define <4 x double> @f14(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {
ret <4 x double> %res
}
+define <8 x float> @fceilv8f32(<8 x float> %f) #0 {
+; CHECK-LABEL: fceilv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundps $10, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x float> @llvm.experimental.constrained.ceil.v8f32(
+ <8 x float> %f, metadata !"fpexcept.strict")
+ ret <8 x float> %res
+}
+
+define <4 x double> @fceilv4f64(<4 x double> %f) #0 {
+; CHECK-LABEL: fceilv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <4 x double> @llvm.experimental.constrained.ceil.v4f64(
+ <4 x double> %f, metadata !"fpexcept.strict")
+ ret <4 x double> %res
+}
+
+define <8 x float> @ffloorv8f32(<8 x float> %f) #0 {
+; CHECK-LABEL: ffloorv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundps $9, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x float> @llvm.experimental.constrained.floor.v8f32(
+ <8 x float> %f, metadata !"fpexcept.strict")
+ ret <8 x float> %res
+}
+
+define <4 x double> @ffloorv4f64(<4 x double> %f) #0 {
+; CHECK-LABEL: ffloorv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <4 x double> @llvm.experimental.constrained.floor.v4f64(
+ <4 x double> %f, metadata !"fpexcept.strict")
+ ret <4 x double> %res
+}
+
+
+define <8 x float> @ftruncv8f32(<8 x float> %f) #0 {
+; CHECK-LABEL: ftruncv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundps $11, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x float> @llvm.experimental.constrained.trunc.v8f32(
+ <8 x float> %f, metadata !"fpexcept.strict")
+ ret <8 x float> %res
+}
+
+define <4 x double> @ftruncv4f64(<4 x double> %f) #0 {
+; CHECK-LABEL: ftruncv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <4 x double> @llvm.experimental.constrained.trunc.v4f64(
+ <4 x double> %f, metadata !"fpexcept.strict")
+ ret <4 x double> %res
+}
+
+
+define <8 x float> @frintv8f32(<8 x float> %f) #0 {
+; CHECK-LABEL: frintv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundps $4, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x float> @llvm.experimental.constrained.rint.v8f32(
+ <8 x float> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <8 x float> %res
+}
+
+define <4 x double> @frintv4f64(<4 x double> %f) #0 {
+; CHECK-LABEL: frintv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+ <4 x double> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <4 x double> %res
+}
+
+
+define <8 x float> @fnearbyintv8f32(<8 x float> %f) #0 {
+; CHECK-LABEL: fnearbyintv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundps $12, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(
+ <8 x float> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <8 x float> %res
+}
+
+define <4 x double> @fnearbyintv4f64(<4 x double> %f) #0 {
+; CHECK-LABEL: fnearbyintv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+ <4 x double> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <4 x double> %res
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll
index 236fb77f9b59..88dec6a427db 100644
--- a/llvm/test/CodeGen/X86/vec-strict-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-512.ll
@@ -16,6 +16,17 @@ declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float
declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, metadata, metadata)
declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata)
+declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, metadata)
+declare <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double>, metadata)
+declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, metadata)
+declare <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double>, metadata)
+declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, metadata)
+declare <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double>, metadata)
+declare <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float>, metadata, metadata)
+declare <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double>, metadata, metadata)
+declare <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float>, metadata, metadata)
+declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double>, metadata, metadata)
+
define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 {
; CHECK-LABEL: f1:
@@ -175,4 +186,98 @@ define <8 x double> @f14(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 {
ret <8 x double> %res
}
+define <16 x float> @strict_vector_fceil_v16f32(<16 x float> %f) #0 {
+; CHECK-LABEL: strict_vector_fceil_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
+ ret <16 x float> %res
+}
+
+define <8 x double> @strict_vector_fceil_v8f64(<8 x double> %f) #0 {
+; CHECK-LABEL: strict_vector_fceil_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
+ ret <8 x double> %res
+}
+
+define <16 x float> @strict_vector_ffloor_v16f32(<16 x float> %f) #0 {
+; CHECK-LABEL: strict_vector_ffloor_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
+ ret <16 x float> %res
+}
+
+define <8 x double> @strict_vector_ffloor_v8f64(<8 x double> %f) #0 {
+; CHECK-LABEL: strict_vector_ffloor_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
+ ret <8 x double> %res
+}
+
+define <16 x float> @strict_vector_ftrunc_v16f32(<16 x float> %f) #0 {
+; CHECK-LABEL: strict_vector_ftrunc_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float> %f, metadata !"fpexcept.strict")
+ ret <16 x float> %res
+}
+
+define <8 x double> @strict_vector_ftrunc_v8f64(<8 x double> %f) #0 {
+; CHECK-LABEL: strict_vector_ftrunc_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %f, metadata !"fpexcept.strict")
+ ret <8 x double> %res
+}
+
+define <16 x float> @strict_vector_frint_v16f32(<16 x float> %f) #0 {
+; CHECK-LABEL: strict_vector_frint_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <16 x float> %res
+}
+
+define <8 x double> @strict_vector_frint_v8f64(<8 x double> %f) #0 {
+; CHECK-LABEL: strict_vector_frint_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <8 x double> %res
+}
+
+define <16 x float> @strict_vector_fnearbyint_v16f32(<16 x float> %f) #0 {
+; CHECK-LABEL: strict_vector_fnearbyint_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <16 x float> %res
+}
+
+define <8 x double> @strict_vector_fnearbyint_v8f64(<8 x double> %f) #0 {
+; CHECK-LABEL: strict_vector_fnearbyint_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %res = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <8 x double> %res
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-round-128.ll b/llvm/test/CodeGen/X86/vec-strict-round-128.ll
new file mode 100644
index 000000000000..7d6a66e33666
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-round-128.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
+
+declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
+declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
+declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
+declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+
+define <4 x float> @fceilv4f32(<4 x float> %f) #0 {
+; SSE41-LABEL: fceilv4f32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundps $10, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: fceilv4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundps $10, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(
+ <4 x float> %f, metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @fceilv2f64(<2 x double> %f) #0 {
+; SSE41-LABEL: fceilv2f64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundpd $10, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: fceilv2f64:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundpd $10, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+ <2 x double> %f, metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <4 x float> @ffloorv4f32(<4 x float> %f) #0 {
+; SSE41-LABEL: ffloorv4f32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundps $9, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: ffloorv4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundps $9, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32(
+ <4 x float> %f, metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @ffloorv2f64(<2 x double> %f) #0 {
+; SSE41-LABEL: ffloorv2f64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundpd $9, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: ffloorv2f64:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundpd $9, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+ <2 x double> %f, metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <4 x float> @ftruncv4f32(<4 x float> %f) #0 {
+; SSE41-LABEL: ftruncv4f32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundps $11, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: ftruncv4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundps $11, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(
+ <4 x float> %f, metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @ftruncv2f64(<2 x double> %f) #0 {
+; SSE41-LABEL: ftruncv2f64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: ftruncv2f64:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundpd $11, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+ <2 x double> %f, metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <4 x float> @frintv4f32(<4 x float> %f) #0 {
+; SSE41-LABEL: frintv4f32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundps $4, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: frintv4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundps $4, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <4 x float> @llvm.experimental.constrained.rint.v4f32(
+ <4 x float> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @frintv2f64(<2 x double> %f) #0 {
+; SSE41-LABEL: frintv2f64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundpd $4, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: frintv2f64:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundpd $4, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+ <2 x double> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <4 x float> @fnearbyintv4f32(<4 x float> %f) #0 {
+; SSE41-LABEL: fnearbyintv4f32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundps $12, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: fnearbyintv4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundps $12, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
+ <4 x float> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <2 x double> @fnearbyintv2f64(<2 x double> %f) #0 {
+; SSE41-LABEL: fnearbyintv2f64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: roundpd $12, %xmm0, %xmm0
+; SSE41-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: fnearbyintv2f64:
+; AVX: # %bb.0:
+; AVX-NEXT: vroundpd $12, %xmm0, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+ <2 x double> %f,
+ metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+attributes #0 = { strictfp }
More information about the llvm-commits
mailing list