[llvm] 9fcea2e - [ARM] Add neon vector support for roundeven
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 07:27:40 PDT 2025
Author: David Green
Date: 2025-07-04T15:27:33+01:00
New Revision: 9fcea2e4656acbf6025e73da641e619ff12ee3d0
URL: https://github.com/llvm/llvm-project/commit/9fcea2e4656acbf6025e73da641e619ff12ee3d0
DIFF: https://github.com/llvm/llvm-project/commit/9fcea2e4656acbf6025e73da641e619ff12ee3d0.diff
LOG: [ARM] Add neon vector support for roundeven
As per #142559, this marks froundeven as legal for Neon and upgrades the
existing arm.neon.vrintn intrinsics.
Added:
Modified:
clang/lib/CodeGen/TargetBuiltins/ARM.cpp
clang/test/CodeGen/arm-neon-directed-rounding.c
clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/IR/AutoUpgrade.cpp
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMInstrNEON.td
llvm/lib/Target/ARM/ARMInstrVFP.td
llvm/test/CodeGen/ARM/vrint.ll
llvm/test/CodeGen/ARM/vrintn.ll
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index fcfb92d65958e..7e6a47fd7c103 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -845,8 +845,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP0(vrndiq_v),
NEONMAP1(vrndm_v, floor, Add1ArgType),
NEONMAP1(vrndmq_v, floor, Add1ArgType),
- NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
- NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
+ NEONMAP1(vrndn_v, roundeven, Add1ArgType),
+ NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
NEONMAP1(vrndp_v, ceil, Add1ArgType),
NEONMAP1(vrndpq_v, ceil, Add1ArgType),
NEONMAP1(vrndq_v, trunc, Add1ArgType),
@@ -3132,7 +3132,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vrndns_f32: {
Value *Arg = EmitScalarExpr(E->getArg(0));
llvm::Type *Tys[] = {Arg->getType()};
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
+ Function *F = CGM.getIntrinsic(Intrinsic::roundeven, Tys);
return Builder.CreateCall(F, {Arg}, "vrndn"); }
case NEON::BI__builtin_neon_vset_lane_i8:
diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c
index 47fa6ade44830..08667314e37ce 100644
--- a/clang/test/CodeGen/arm-neon-directed-rounding.c
+++ b/clang/test/CodeGen/arm-neon-directed-rounding.c
@@ -116,7 +116,7 @@ float32x4_t test_vrndmq_f32(float32x4_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
// CHECK-A32-NEXT: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-A32-NEXT: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> [[VRNDN_V_I]])
+// CHECK-A32-NEXT: [[VRNDN_V1_I:%.*]] = call <2 x float> @llvm.roundeven.v2f32(<2 x float> [[VRNDN_V_I]])
// CHECK-A32-NEXT: [[VRNDN_V2_I:%.*]] = bitcast <2 x float> [[VRNDN_V1_I]] to <8 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
@@ -141,7 +141,7 @@ float32x2_t test_vrndn_f32(float32x2_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
// CHECK-A32-NEXT: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-A32-NEXT: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> [[VRNDNQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDNQ_V1_I:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[VRNDNQ_V_I]])
// CHECK-A32-NEXT: [[VRNDNQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDNQ_V1_I]] to <16 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
@@ -310,24 +310,18 @@ float32x4_t test_vrndq_f32(float32x4_t a) {
return vrndq_f32(a);
}
-// CHECK-A32-LABEL: define dso_local float @test_vrndns_f32(
-// CHECK-A32-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-A32-NEXT: [[ENTRY:.*:]]
-// CHECK-A32-NEXT: [[VRNDN_I:%.*]] = call float @llvm.arm.neon.vrintn.f32(float [[A]])
-// CHECK-A32-NEXT: ret float [[VRNDN_I]]
-//
-// CHECK-A64-LABEL: define dso_local float @test_vrndns_f32(
-// CHECK-A64-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-A64-NEXT: [[ENTRY:.*:]]
-// CHECK-A64-NEXT: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float [[A]])
-// CHECK-A64-NEXT: ret float [[VRNDN_I]]
+// CHECK-LABEL: define dso_local float @test_vrndns_f32(
+// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VRNDN_I:%.*]] = call float @llvm.roundeven.f32(float [[A]])
+// CHECK-NEXT: ret float [[VRNDN_I]]
//
float32_t test_vrndns_f32(float32_t a) {
return vrndns_f32(a);
}
// CHECK-LABEL: define dso_local <2 x float> @test_vrndi_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
index c55bb9ca0d78c..17d4eef1f7631 100644
--- a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
@@ -618,7 +618,7 @@ float16x8_t test_vrndmq_f16(float16x8_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-NEXT: [[VRNDN_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VRNDN_V1_I:%.*]] = call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> [[VRNDN_V_I]])
+// CHECK-NEXT: [[VRNDN_V1_I:%.*]] = call <4 x half> @llvm.roundeven.v4f16(<4 x half> [[VRNDN_V_I]])
// CHECK-NEXT: [[VRNDN_V2_I:%.*]] = bitcast <4 x half> [[VRNDN_V1_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDN_V2_I]] to <4 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
@@ -634,7 +634,7 @@ float16x4_t test_vrndn_f16(float16x4_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: [[VRNDNQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VRNDNQ_V1_I:%.*]] = call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> [[VRNDNQ_V_I]])
+// CHECK-NEXT: [[VRNDNQ_V1_I:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[VRNDNQ_V_I]])
// CHECK-NEXT: [[VRNDNQ_V2_I:%.*]] = bitcast <8 x half> [[VRNDNQ_V1_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDNQ_V2_I]] to <8 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index c7929e78a5fda..1219ce2f86da8 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -451,9 +451,6 @@ class Neon_3Arg_Long_Intrinsic
LLVMTruncatedType<0>],
[IntrNoMem]>;
-class Neon_1FloatArg_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-
class Neon_CvtFxToFP_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
[IntrNoMem]>;
@@ -677,9 +674,6 @@ def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
-// Vector and Scalar Rounding.
-def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
-
// De-interleaving vector loads from N-element structures.
// Source operands are the address and alignment.
def int_arm_neon_vld1 : DefaultAttrsIntrinsic<
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 1758b63a76dcd..86285a03c66bb 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -720,6 +720,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
.StartsWith("vqsubs.", Intrinsic::ssub_sat)
.StartsWith("vqsubu.", Intrinsic::usub_sat)
.StartsWith("vrinta.", Intrinsic::round)
+ .StartsWith("vrintn.", Intrinsic::roundeven)
.StartsWith("vrintm.", Intrinsic::floor)
.StartsWith("vrintp.", Intrinsic::ceil)
.StartsWith("vrintx.", Intrinsic::rint)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d107fb3884079..b073c8651dcdb 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1548,6 +1548,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
@@ -1571,6 +1573,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);
setOperationAction(ISD::FROUND, MVT::v4f16, Legal);
setOperationAction(ISD::FROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f16, Legal);
setOperationAction(ISD::FCEIL, MVT::v8f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Legal);
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index d7324014ab4ba..7485ef569445a 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7312,7 +7312,7 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
}
}
-defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
+defm VRINTNN : VRINT_FPI<"n", 0b000, froundeven>;
defm VRINTXN : VRINT_FPI<"x", 0b001, frint>;
defm VRINTAN : VRINT_FPI<"a", 0b010, fround>;
defm VRINTZN : VRINT_FPI<"z", 0b011, ftrunc>;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 1d5c12fabf093..31650e0137beb 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1135,13 +1135,8 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
Requires<[HasFPARMv8,HasDPVFP]>;
}
-// Match either froundeven or int_arm_neon_vrintn
-def vrintn_or_froundeven : PatFrags<(ops node:$src),
- [(int_arm_neon_vrintn node:$src),
- (froundeven node:$src)]>;
-
defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>;
-defm VRINTN : vrint_inst_anpm<"n", 0b01, vrintn_or_froundeven>;
+defm VRINTN : vrint_inst_anpm<"n", 0b01, froundeven>;
defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
diff --git a/llvm/test/CodeGen/ARM/vrint.ll b/llvm/test/CodeGen/ARM/vrint.ll
index 1766af1486b9c..52107aac65187 100644
--- a/llvm/test/CodeGen/ARM/vrint.ll
+++ b/llvm/test/CodeGen/ARM/vrint.ll
@@ -1084,21 +1084,7 @@ define <4 x half> @frintn_4h(<4 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintn_4h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s2, s0
-; CHECK-FP16-NEXT: vrintn.f16 s2, s2
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vrintn.f16 s2, s0
-; CHECK-FP16-NEXT: vmov r1, s2
-; CHECK-FP16-NEXT: vrintn.f16 s2, s1
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vrintn.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr d0, d16, d16
+; CHECK-FP16-NEXT: vrintn.f16 d0, d0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A)
ret <4 x half> %tmp3
@@ -1248,35 +1234,7 @@ define <8 x half> @frintn_8h(<8 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintn_8h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s4, s2
-; CHECK-FP16-NEXT: vrintn.f16 s4, s4
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintn.f16 s4, s2
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintn.f16 s4, s3
-; CHECK-FP16-NEXT: vmov.16 d17[0], r1
-; CHECK-FP16-NEXT: vmov.16 d17[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s3
-; CHECK-FP16-NEXT: vrintn.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[2], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s0
-; CHECK-FP16-NEXT: vrintn.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[3], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintn.f16 s4, s0
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintn.f16 s4, s1
-; CHECK-FP16-NEXT: vrintn.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr q0, q8, q8
+; CHECK-FP16-NEXT: vrintn.f16 q0, q0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A)
ret <8 x half> %tmp3
@@ -1302,9 +1260,7 @@ define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
;
; CHECK-LABEL: frintn_2s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintn.f32 s3, s1
-; CHECK-NEXT: vrintn.f32 s2, s0
-; CHECK-NEXT: vmov.f64 d0, d1
+; CHECK-NEXT: vrintn.f32 d0, d0
; CHECK-NEXT: bx lr
%tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
@@ -1336,11 +1292,7 @@ define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
;
; CHECK-LABEL: frintn_4s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintn.f32 s7, s3
-; CHECK-NEXT: vrintn.f32 s6, s2
-; CHECK-NEXT: vrintn.f32 s5, s1
-; CHECK-NEXT: vrintn.f32 s4, s0
-; CHECK-NEXT: vorr q0, q1, q1
+; CHECK-NEXT: vrintn.f32 q0, q0
; CHECK-NEXT: bx lr
%tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
diff --git a/llvm/test/CodeGen/ARM/vrintn.ll b/llvm/test/CodeGen/ARM/vrintn.ll
index 40f806ba55d36..077007b22fc4b 100644
--- a/llvm/test/CodeGen/ARM/vrintn.ll
+++ b/llvm/test/CodeGen/ARM/vrintn.ll
@@ -1,12 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armv8 -mattr=+neon %s -o - | FileCheck %s
declare float @llvm.arm.neon.vrintn.f32(float) nounwind readnone
declare <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float>) nounwind readnone
-; CHECK-LABEL: vrintn_f32:
-; CHECK: vrintn.f32
define float @vrintn_f32(ptr %A) nounwind {
+; CHECK-LABEL: vrintn_f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr s0, [r0]
+; CHECK-NEXT: vrintn.f32 s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%tmp1 = load float, ptr %A
%tmp2 = call float @llvm.arm.neon.vrintn.f32(float %tmp1)
ret float %tmp2
@@ -74,10 +79,9 @@ define <4 x half> @roundeven_4h(<4 x half> %A) nounwind {
define <2 x float> @roundeven_2s(<2 x float> %A) nounwind {
; CHECK-LABEL: roundeven_2s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d0, r0, r1
-; CHECK-NEXT: vrintn.f32 s3, s1
-; CHECK-NEXT: vrintn.f32 s2, s0
-; CHECK-NEXT: vmov r0, r1, d1
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vrintn.f32 d16, d16
+; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
%tmp3 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
@@ -86,14 +90,11 @@ define <2 x float> @roundeven_2s(<2 x float> %A) nounwind {
define <4 x float> @roundeven_4s(<4 x float> %A) nounwind {
; CHECK-LABEL: roundeven_4s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d1, r2, r3
-; CHECK-NEXT: vmov d0, r0, r1
-; CHECK-NEXT: vrintn.f32 s7, s3
-; CHECK-NEXT: vrintn.f32 s6, s2
-; CHECK-NEXT: vrintn.f32 s5, s1
-; CHECK-NEXT: vrintn.f32 s4, s0
-; CHECK-NEXT: vmov r2, r3, d3
-; CHECK-NEXT: vmov r0, r1, d2
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vrintn.f32 q8, q8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: bx lr
%tmp3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
More information about the llvm-commits
mailing list