[clang] cf0da91 - [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE
Bradley Smith via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 17 04:41:52 PDT 2021
Author: Bradley Smith
Date: 2021-03-17T11:41:22Z
New Revision: cf0da91ba5e192920809e30dbb359042c2f2112a
URL: https://github.com/llvm/llvm-project/commit/cf0da91ba5e192920809e30dbb359042c2f2112a
DIFF: https://github.com/llvm/llvm-project/commit/cf0da91ba5e192920809e30dbb359042c2f2112a.diff
LOG: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE
Previously NEON used a target specific intrinsic for frintn, given that
the FROUNDEVEN ISD node now exists, move over to that instead and add
codegen support for that node for both NEON and fixed length SVE.
Differential Revision: https://reviews.llvm.org/D98487
Added:
llvm/test/CodeGen/AArch64/frintn.ll
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-neon-intrinsics.c
clang/test/CodeGen/aarch64-neon-misc.c
clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
clang/test/CodeGen/arm-neon-directed-rounding.c
clang/test/CodeGen/arm64-vrnd.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/IR/AutoUpgrade.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-vcvt.ll
llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
llvm/test/CodeGen/AArch64/f16-instructions.ll
llvm/test/CodeGen/AArch64/fp-intrinsics.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
llvm/test/CodeGen/AArch64/vec-libcalls.ll
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e5778c0c78f7..8d1d3c50870c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10620,17 +10620,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vrndnh_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::aarch64_neon_frintn;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_roundeven
+ : Intrinsic::roundeven;
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
}
case NEON::BI__builtin_neon_vrndn_v:
case NEON::BI__builtin_neon_vrndnq_v: {
- Int = Intrinsic::aarch64_neon_frintn;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_roundeven
+ : Intrinsic::roundeven;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
}
case NEON::BI__builtin_neon_vrndns_f32: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Int = Intrinsic::aarch64_neon_frintn;
+ Int = Builder.getIsFPConstrained()
+ ? Intrinsic::experimental_constrained_roundeven
+ : Intrinsic::roundeven;
return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
}
case NEON::BI__builtin_neon_vrndph_f16: {
diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c
index a56080bace0f..76f5cfd3aaa8 100644
--- a/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -18155,7 +18155,7 @@ float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
// CHECK-LABEL: @test_vrndn_f64(
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
+// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
// CHECK: ret <1 x double> [[VRNDN1_I]]
float64x1_t test_vrndn_f64(float64x1_t a) {
return vrndn_f64(a);
diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c
index 4f85f67cdaec..ed9af88b56c1 100644
--- a/clang/test/CodeGen/aarch64-neon-misc.c
+++ b/clang/test/CodeGen/aarch64-neon-misc.c
@@ -2287,7 +2287,7 @@ float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {
// CHECK-LABEL: @test_vrndnq_f64(
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a)
+// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %a)
// CHECK: ret <2 x double> [[VRNDN1_I]]
float64x2_t test_vrndnq_f64(float64x2_t a) {
return vrndnq_f64(a);
diff --git a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
index 32161146ef45..01df5b0d1930 100644
--- a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -366,7 +366,7 @@ float16_t test_vrndmh_f16(float16_t a) {
}
// CHECK-LABEL: test_vrndnh_f16
-// CHECK: [[RND:%.*]] = call half @llvm.aarch64.neon.frintn.f16(half %a)
+// CHECK: [[RND:%.*]] = call half @llvm.roundeven.f16(half %a)
// CHECK: ret half [[RND]]
float16_t test_vrndnh_f16(float16_t a) {
return vrndnh_f16(a);
diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index 5c4f9053a9ae..401aa4da8d5c 100644
--- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -348,14 +348,14 @@ float16x8_t test_vrndmq_f16(float16x8_t a) {
}
// CHECK-LABEL: test_vrndn_f16
-// CHECK: [[RND:%.*]] = call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %a)
+// CHECK: [[RND:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %a)
// CHECK: ret <4 x half> [[RND]]
float16x4_t test_vrndn_f16(float16x4_t a) {
return vrndn_f16(a);
}
// CHECK-LABEL: test_vrndnq_f16
-// CHECK: [[RND:%.*]] = call <8 x half> @llvm.aarch64.neon.frintn.v8f16(<8 x half> %a)
+// CHECK: [[RND:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a)
// CHECK: ret <8 x half> [[RND]]
float16x8_t test_vrndnq_f16(float16x8_t a) {
return vrndnq_f16(a);
diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c
index f329c669ba56..c493e3897ab6 100644
--- a/clang/test/CodeGen/arm-neon-directed-rounding.c
+++ b/clang/test/CodeGen/arm-neon-directed-rounding.c
@@ -41,7 +41,7 @@ float32x4_t test_vrndmq_f32(float32x4_t a) {
// CHECK-LABEL: define{{.*}} <2 x float> @test_vrndn_f32(<2 x float> %a)
// CHECK-A32: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a)
-// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a)
+// CHECK-A64: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %a)
// CHECK: ret <2 x float> [[VRNDN_V1_I]]
float32x2_t test_vrndn_f32(float32x2_t a) {
return vrndn_f32(a);
@@ -49,7 +49,7 @@ float32x2_t test_vrndn_f32(float32x2_t a) {
// CHECK-LABEL: define{{.*}} <4 x float> @test_vrndnq_f32(<4 x float> %a)
// CHECK-A32: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a)
-// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a)
+// CHECK-A64: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
// CHECK: ret <4 x float> [[VRNDNQ_V1_I]]
float32x4_t test_vrndnq_f32(float32x4_t a) {
return vrndnq_f32(a);
@@ -105,7 +105,7 @@ float32x4_t test_vrndq_f32(float32x4_t a) {
// CHECK-LABEL: define{{.*}} float @test_vrndns_f32(float %a)
// CHECK-A32: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float %a)
-// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.aarch64.neon.frintn.f32(float %a)
+// CHECK-A64: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float %a)
// CHECK: ret float [[VRNDN_I]]
float32_t test_vrndns_f32(float32_t a) {
return vrndns_f32(a);
diff --git a/clang/test/CodeGen/arm64-vrnd.c b/clang/test/CodeGen/arm64-vrnd.c
index c710caedf181..24298f896d31 100644
--- a/clang/test/CodeGen/arm64-vrnd.c
+++ b/clang/test/CodeGen/arm64-vrnd.c
@@ -6,7 +6,7 @@ float64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); }
// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double>
float64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); }
-// CHECK: call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>
+// CHECK: call <2 x double> @llvm.roundeven.v2f64(<2 x double>
float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); }
// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double>
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 8f8f713fb5f0..91b3c8fe2114 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -471,10 +471,6 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
- // Vector FP Rounding: only ties to even is unrepresented by a normal
- // intrinsic.
- def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
-
// v8.5-A Vector FP Rounding
def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic;
def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 247ac68034b2..7a96ed3fd93a 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -152,10 +152,10 @@ def SDTIntTruncOp : SDTypeProfile<1, 1, [ // trunc
def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc
SDTCisSameAs<0, 1>, SDTCisFP<0>
]>;
-def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fround
+def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
]>;
-def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fextend
+def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
]>;
def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp
@@ -486,6 +486,7 @@ def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>;
def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>;
def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
def fround : SDNode<"ISD::FROUND" , SDTFPUnaryOp>;
+def froundeven : SDNode<"ISD::FROUNDEVEN" , SDTFPUnaryOp>;
def lround : SDNode<"ISD::LROUND" , SDTFPToIntOp>;
def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>;
@@ -547,6 +548,8 @@ def strict_llround : SDNode<"ISD::STRICT_LLROUND",
SDTFPToIntOp, [SDNPHasChain]>;
def strict_fround : SDNode<"ISD::STRICT_FROUND",
SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_froundeven : SDNode<"ISD::STRICT_FROUNDEVEN",
+ SDTFPUnaryOp, [SDNPHasChain]>;
def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM",
@@ -1414,6 +1417,9 @@ def any_llround : PatFrags<(ops node:$src),
def any_fround : PatFrags<(ops node:$src),
[(strict_fround node:$src),
(fround node:$src)]>;
+def any_froundeven : PatFrags<(ops node:$src),
+ [(strict_froundeven node:$src),
+ (froundeven node:$src)]>;
def any_ftrunc : PatFrags<(ops node:$src),
[(strict_ftrunc node:$src),
(ftrunc node:$src)]>;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e0d152b5ec21..c5d085fafbc7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -548,6 +548,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
F->arg_begin()->getType());
return true;
}
+ if (Name.startswith("aarch64.neon.frintn")) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
+ F->arg_begin()->getType());
+ return true;
+ }
if (Name.startswith("arm.neon.vclz")) {
Type* args[2] = {
F->arg_begin()->getType(),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e61a6edac34c..3c823f5ac522 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -605,6 +605,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
@@ -624,6 +625,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
@@ -648,6 +650,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
@@ -667,6 +670,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
+ setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
setOperationAction(ISD::FMINIMUM, Ty, Legal);
@@ -684,6 +688,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, MVT::f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
@@ -943,6 +948,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
@@ -1069,6 +1075,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
+ setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
@@ -1079,6 +1086,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
+ setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
}
@@ -1403,6 +1411,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f62ff547333a..d5dd0ae99463 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3796,12 +3796,9 @@ defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
-defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>;
defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
-def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
- (FRINTNDr FPR64:$Rn)>;
-
defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
@@ -4090,7 +4087,7 @@ defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
-defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>;
defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 67eba3f4e307..43ed1aba735c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -590,7 +590,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
;CHECK-NOT: ld1
;CHECK: frintn.2s v0, v0
;CHECK-NEXT: ret
- %tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
+ %tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
}
@@ -599,7 +599,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
;CHECK-NOT: ld1
;CHECK: frintn.4s v0, v0
;CHECK-NEXT: ret
- %tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
+ %tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
}
@@ -608,13 +608,13 @@ define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
;CHECK-NOT: ld1
;CHECK: frintn.2d v0, v0
;CHECK-NEXT: ret
- %tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
+ %tmp3 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A)
ret <2 x double> %tmp3
}
-declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
+declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) nounwind readnone
; FALLBACK-NOT: remark{{.*}}frintp_2s
define <2 x float> @frintp_2s(<2 x float> %A) nounwind {
diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
index 524b6e0528e0..65bb08ec2e4e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -245,6 +245,20 @@ define %v4f16 @test_v4f16.round(%v4f16 %a) {
%1 = call %v4f16 @llvm.round.v4f16(%v4f16 %a)
ret %v4f16 %1
}
+define %v4f16 @test_v4f16.roundeven(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.roundeven:
+ ; CHECK-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintn.4h
+ ; CHECK-FP16-NEXT: ret
+ ; GISEL-LABEL: test_v4f16.roundeven:
+ ; GISEL-NOFP16-COUNT-4: frintn s{{[0-9]+}}, s{{[0-9]+}}
+ ; GISEL-FP16-NOT: fcvt
+ ; GISEL-FP16: frintn.4h
+ ; GISEL-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.roundeven.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0
@@ -264,6 +278,7 @@ declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
declare %v4f16 @llvm.round.v4f16(%v4f16) #0
+declare %v4f16 @llvm.roundeven.v4f16(%v4f16) #0
;;;
@@ -502,6 +517,20 @@ define %v8f16 @test_v8f16.round(%v8f16 %a) {
%1 = call %v8f16 @llvm.round.v8f16(%v8f16 %a)
ret %v8f16 %1
}
+define %v8f16 @test_v8f16.roundeven(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.roundeven:
+ ; CHECK-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintn.8h
+ ; CHECK-FP16-NEXT: ret
+ ; GISEL-LABEL: test_v8f16.roundeven:
+ ; GISEL-NOFP16-COUNT-8: frintn s{{[0-9]+}}, s{{[0-9]+}}
+ ; GISEL-FP16-NOT: fcvt
+ ; GISEL-FP16: frintn.8h
+ ; GISEL-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.roundeven.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0
@@ -521,6 +550,7 @@ declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
declare %v8f16 @llvm.round.v8f16(%v8f16) #0
+declare %v8f16 @llvm.roundeven.v8f16(%v8f16) #0
;;; Float vectors
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index a324034f5b20..bb445f08d1ed 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -796,6 +796,7 @@ declare half @llvm.trunc.f16(half %a) #0
declare half @llvm.rint.f16(half %a) #0
declare half @llvm.nearbyint.f16(half %a) #0
declare half @llvm.round.f16(half %a) #0
+declare half @llvm.roundeven.f16(half %a) #0
declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
declare half @llvm.aarch64.neon.frecpe.f16(half %a) #0
declare half @llvm.aarch64.neon.frecpx.f16(half %a) #0
@@ -1313,6 +1314,32 @@ define half @test_round(half %a) #0 {
ret half %r
}
+; CHECK-CVT-LABEL: test_roundeven:
+; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-CVT-NEXT: fcvt h0, [[INT32]]
+; CHECK-CVT-NEXT: ret
+
+; GISEL-CVT-LABEL: test_roundeven:
+; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; GISEL-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]]
+; GISEL-CVT-NEXT: fcvt h0, [[INT32]]
+; GISEL-CVT-NEXT: ret
+
+
+; CHECK-FP16-LABEL: test_roundeven:
+; CHECK-FP16-NEXT: frintn h0, h0
+; CHECK-FP16-NEXT: ret
+
+; GISEL-FP16-LABEL: test_roundeven:
+; GISEL-FP16-NEXT: frintn h0, h0
+; GISEL-FP16-NEXT: ret
+
+define half @test_roundeven(half %a) #0 {
+ %r = call half @llvm.roundeven.f16(half %a)
+ ret half %r
+}
+
; CHECK-CVT-LABEL: test_fmuladd:
; CHECK-CVT-NEXT: fcvt s1, h1
; CHECK-CVT-NEXT: fcvt s0, h0
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
index 3c412a5f7e0e..f2694ab08a0d 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
@@ -266,6 +266,13 @@ define float @round_f32(float %x) #0 {
ret float %val
}
+; CHECK-LABEL: roundeven_f32:
+; CHECK: frintn s0, s0
+define float @roundeven_f32(float %x) #0 {
+ %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0
+ ret float %val
+}
+
; CHECK-LABEL: trunc_f32:
; CHECK: frintz s0, s0
define float @trunc_f32(float %x) #0 {
@@ -729,6 +736,13 @@ define double @round_f64(double %x) #0 {
ret double %val
}
+; CHECK-LABEL: roundeven_f64:
+; CHECK: frintn d0, d0
+define double @roundeven_f64(double %x) #0 {
+ %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0
+ ret double %val
+}
+
; CHECK-LABEL: trunc_f64:
; CHECK: frintz d0, d0
define double @trunc_f64(double %x) #0 {
@@ -1474,6 +1488,7 @@ declare float @llvm.experimental.constrained.floor.f32(float, metadata)
declare i32 @llvm.experimental.constrained.lround.f32(float, metadata)
declare i64 @llvm.experimental.constrained.llround.f32(float, metadata)
declare float @llvm.experimental.constrained.round.f32(float, metadata)
+declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
@@ -1515,6 +1530,7 @@ declare double @llvm.experimental.constrained.floor.f64(double, metadata)
declare i32 @llvm.experimental.constrained.lround.f64(double, metadata)
declare i64 @llvm.experimental.constrained.llround.f64(double, metadata)
declare double @llvm.experimental.constrained.round.f64(double, metadata)
+declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
diff --git a/llvm/test/CodeGen/AArch64/frintn.ll b/llvm/test/CodeGen/AArch64/frintn.ll
new file mode 100644
index 000000000000..2dc03db39a1a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/frintn.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=aarch64-eabi -mattr=+fullfp16 %s -o - | FileCheck %s
+
+; The llvm.aarch64.neon.frintn intrinsic should be auto-upgraded to the
+; target-independent roundeven intrinsic.
+
+define <4 x half> @frintn_4h(<4 x half> %A) nounwind {
+;CHECK-LABEL: frintn_4h:
+;CHECK: frintn v0.4h, v0.4h
+;CHECK-NEXT: ret
+ %tmp3 = call <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half> %A)
+ ret <4 x half> %tmp3
+}
+
+define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frintn_2s:
+;CHECK: frintn v0.2s, v0.2s
+;CHECK-NEXT: ret
+ %tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
+ ret <2 x float> %tmp3
+}
+
+define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frintn_4s:
+;CHECK: frintn v0.4s, v0.4s
+;CHECK-NEXT: ret
+ %tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
+ ret <4 x float> %tmp3
+}
+
+define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frintn_2d:
+;CHECK: frintn v0.2d, v0.2d
+;CHECK-NEXT: ret
+ %tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
+ ret <2 x double> %tmp3
+}
+
+declare <4 x half> @llvm.aarch64.neon.frintn.v4f16(<4 x half>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
index a3a677a5d9f8..1d7472707b0a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@ define void @frinta_v32f64(<32 x double>* %a) #0 {
ret void
}
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+ %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+ ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+ %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+ ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+ %op = load <16 x half>, <16 x half>* %a
+ %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+ store <16 x half> %res, <16 x half>* %a
+ ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+ %op = load <32 x half>, <32 x half>* %a
+ %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+ store <32 x half> %res, <32 x half>* %a
+ ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+ %op = load <64 x half>, <64 x half>* %a
+ %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
+ store <64 x half> %res, <64 x half>* %a
+ ret void
+}
+
+define void @frintn_v128f16(<128 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v128f16:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
+; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_2048-NEXT: ret
+ %op = load <128 x half>, <128 x half>* %a
+ %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
+ store <128 x half> %res, <128 x half>* %a
+ ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <2 x float> @frintn_v2f32(<2 x float> %op) #0 {
+; CHECK-LABEL: frintn_v2f32:
+; CHECK: frintn v0.2s, v0.2s
+; CHECK-NEXT: ret
+ %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
+ ret <2 x float> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <4 x float> @frintn_v4f32(<4 x float> %op) #0 {
+; CHECK-LABEL: frintn_v4f32:
+; CHECK: frintn v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
+ ret <4 x float> %res
+}
+
+define void @frintn_v8f32(<8 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v8f32:
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; CHECK-NEXT: ret
+ %op = load <8 x float>, <8 x float>* %a
+ %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
+ store <8 x float> %res, <8 x float>* %a
+ ret void
+}
+
+define void @frintn_v16f32(<16 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v16f32:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
+; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
+; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+ %op = load <16 x float>, <16 x float>* %a
+ %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
+ store <16 x float> %res, <16 x float>* %a
+ ret void
+}
+
+define void @frintn_v32f32(<32 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v32f32:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
+; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+ %op = load <32 x float>, <32 x float>* %a
+ %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
+ store <32 x float> %res, <32 x float>* %a
+ ret void
+}
+
+define void @frintn_v64f32(<64 x float>* %a) #0 {
+; CHECK-LABEL: frintn_v64f32:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
+; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
+; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; VBITS_GE_2048-NEXT: ret
+ %op = load <64 x float>, <64 x float>* %a
+ %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
+ store <64 x float> %res, <64 x float>* %a
+ ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <1 x double> @frintn_v1f64(<1 x double> %op) #0 {
+; CHECK-LABEL: frintn_v1f64:
+; CHECK: frintn d0, d0
+; CHECK-NEXT: ret
+ %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
+ ret <1 x double> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <2 x double> @frintn_v2f64(<2 x double> %op) #0 {
+; CHECK-LABEL: frintn_v2f64:
+; CHECK: frintn v0.2d, v0.2d
+; CHECK-NEXT: ret
+ %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
+ ret <2 x double> %res
+}
+
+define void @frintn_v4f64(<4 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v4f64:
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; CHECK-NEXT: ret
+ %op = load <4 x double>, <4 x double>* %a
+ %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
+ store <4 x double> %res, <4 x double>* %a
+ ret void
+}
+
+define void @frintn_v8f64(<8 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v8f64:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
+; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
+; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+ %op = load <8 x double>, <8 x double>* %a
+ %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
+ store <8 x double> %res, <8 x double>* %a
+ ret void
+}
+
+define void @frintn_v16f64(<16 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v16f64:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
+; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+ %op = load <16 x double>, <16 x double>* %a
+ %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
+ store <16 x double> %res, <16 x double>* %a
+ ret void
+}
+
+define void @frintn_v32f64(<32 x double>* %a) #0 {
+; CHECK-LABEL: frintn_v32f64:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
+; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
+; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; VBITS_GE_2048-NEXT: ret
+ %op = load <32 x double>, <32 x double>* %a
+ %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
+ store <32 x double> %res, <32 x double>* %a
+ ret void
+}
+
;
; TRUNC -> FRINTZ
;
@@ -1599,6 +1846,25 @@ declare <8 x double> @llvm.round.v8f64(<8 x double>)
declare <16 x double> @llvm.round.v16f64(<16 x double>)
declare <32 x double> @llvm.round.v32f64(<32 x double>)
+declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
+declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
+declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
+declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
+declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
+declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
+declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
+declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
+declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
+declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
+declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
+declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
+declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
+declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
+declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
+declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
+
declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
index 80ec45ca49cb..e96a4b815d6b 100644
--- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@ declare <3 x float> @llvm.log2.v3f32(<3 x float>)
declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
declare <3 x float> @llvm.rint.v3f32(<3 x float>)
declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
@@ -478,6 +479,15 @@ define <3 x float> @round_v3f32(<3 x float> %x) nounwind {
ret <3 x float> %r
}
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintn v0.4s, v0.4s
+; CHECK-NEXT: ret
+ %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+ ret <3 x float> %r
+}
+
define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
; CHECK-LABEL: sqrt_v3f32:
; CHECK: // %bb.0:
More information about the cfe-commits
mailing list