[clang] [llvm] [ARM] Handle roundeven for MVE. (PR #142557)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 00:53:02 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/142557
Now that #141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.
>From b187e7baca5ba141f3524b11533a6f201b05b5bc Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 3 Jun 2025 08:18:51 +0100
Subject: [PATCH] [ARM] Handle roundeven for MVE.
Now that #141786 handles scalar and neon types, this adds MVE definitions and
legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn
are auto-upgraded to llvm.roundeven like other vrint instructions, so should
continue to work.
---
clang/include/clang/Basic/arm_mve.td | 2 +-
clang/test/CodeGen/arm-mve-intrinsics/vrnd.c | 4 +-
llvm/include/llvm/IR/IntrinsicsARM.td | 2 -
llvm/lib/IR/AutoUpgrade.cpp | 6 ++
llvm/lib/Target/ARM/ARMInstrMVE.td | 2 +-
llvm/test/CodeGen/Thumb2/mve-frint.ll | 89 +++++++++++++++----
.../CodeGen/Thumb2/mve-intrinsics/vrintn.ll | 2 +
7 files changed, 83 insertions(+), 24 deletions(-)
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index e33c065059c44..412ef9abac1bc 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -710,7 +710,7 @@ defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">;
defm vrndpq: vrnd<IRIntBase<"ceil", [Vector]>, "p">;
defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">;
defm vrndxq: vrnd<IRIntBase<"rint", [Vector]>, "x">;
-defm vrndnq: vrnd<IRInt<"vrintn", [Vector]>, "n">;
+defm vrndnq: vrnd<IRIntBase<"roundeven", [Vector]>, "n">;
multiclass compare_with_pred<string condname, dag arguments,
dag cmp, string suffix> {
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
index 3e625c739bde9..4888bc8c5e98f 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
@@ -148,7 +148,7 @@ float32x4_t test_vrndxq_f32(float32x4_t a)
// CHECK-LABEL: @test_vrndnq_f16(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]])
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vrndnq_f16(float16x8_t a)
@@ -162,7 +162,7 @@ float16x8_t test_vrndnq_f16(float16x8_t a)
// CHECK-LABEL: @test_vrndnq_f32(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]])
// CHECK-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vrndnq_f32(float32x4_t a)
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 9b7dd8099368d..3ee69b72cc5cd 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1306,8 +1306,6 @@ foreach suffix = ["a","n","p","m"] in {
[llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
}
-def int_arm_mve_vrintn: DefaultAttrsIntrinsic<
- [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vcls: DefaultAttrsIntrinsic<
[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7ba6d411bc7b5..814c00c669cb3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -767,6 +767,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
return false; // Not 'arm.mve.vctp64'.
}
+ if (Name.starts_with("vrintn.v")) {
+ NewFn = Intrinsic::getOrInsertDeclaration(
+ F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
+ return true;
+ }
+
// These too are changed to accept a v2i1 instead of the old v4i1.
if (Name.consume_back(".v4i1")) {
// 'arm.mve.*.v4i1'.
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 6dd8a374a92af..9dffd945d5baa 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3527,7 +3527,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
}
multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
- defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+ defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>;
defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll
index 1d7dcc8bf8440..6946ec37ddf33 100644
--- a/llvm/test/CodeGen/Thumb2/mve-frint.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll
@@ -424,21 +424,74 @@ entry:
ret <2 x double> %0
}
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
-declare <4 x float> @llvm.rint.v4f32(<4 x float>)
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
-declare <4 x float> @llvm.floor.v4f32(<4 x float>)
-declare <4 x float> @llvm.round.v4f32(<4 x float>)
-declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
-declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
-declare <8 x half> @llvm.rint.v8f16(<8 x half>)
-declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
-declare <8 x half> @llvm.floor.v8f16(<8 x half>)
-declare <8 x half> @llvm.round.v8f16(<8 x half>)
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
-declare <2 x double> @llvm.rint.v2f64(<2 x double>)
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
-declare <2 x double> @llvm.floor.v2f64(<2 x double>)
-declare <2 x double> @llvm.round.v2f64(<2 x double>)
+define arm_aapcs_vfpcc <4 x float> @froundeven_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: froundeven_float32_t:
+; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vrintn.f32 s3, s3
+; CHECK-MVE-NEXT: vrintn.f32 s2, s2
+; CHECK-MVE-NEXT: vrintn.f32 s1, s1
+; CHECK-MVE-NEXT: vrintn.f32 s0, s0
+; CHECK-MVE-NEXT: bx lr
+;
+; CHECK-MVEFP-LABEL: froundeven_float32_t:
+; CHECK-MVEFP: @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT: vrintn.f32 q0, q0
+; CHECK-MVEFP-NEXT: bx lr
+entry:
+ %0 = call fast <4 x float> @llvm.roundeven.v4f32(<4 x float> %src)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @froundeven_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: froundeven_float16_t:
+; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: vmovx.f16 s4, s0
+; CHECK-MVE-NEXT: vrintn.f16 s0, s0
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vins.f16 s0, s4
+; CHECK-MVE-NEXT: vmovx.f16 s4, s1
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vrintn.f16 s1, s1
+; CHECK-MVE-NEXT: vins.f16 s1, s4
+; CHECK-MVE-NEXT: vmovx.f16 s4, s2
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vrintn.f16 s2, s2
+; CHECK-MVE-NEXT: vins.f16 s2, s4
+; CHECK-MVE-NEXT: vmovx.f16 s4, s3
+; CHECK-MVE-NEXT: vrintn.f16 s4, s4
+; CHECK-MVE-NEXT: vrintn.f16 s3, s3
+; CHECK-MVE-NEXT: vins.f16 s3, s4
+; CHECK-MVE-NEXT: bx lr
+;
+; CHECK-MVEFP-LABEL: froundeven_float16_t:
+; CHECK-MVEFP: @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT: vrintn.f16 q0, q0
+; CHECK-MVEFP-NEXT: bx lr
+entry:
+ %0 = call fast <8 x half> @llvm.roundeven.v8f16(<8 x half> %src)
+ ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <2 x double> @froundeven_float64_t(<2 x double> %src) {
+; CHECK-LABEL: froundeven_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.roundeven.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
index a70975e1e7318..b30bb2e3ad3ff 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+; The llvm.arm.mve.vrintn should auto-upgrade to llvm.roundeven, which are selected to vrintn.
+
define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) {
; CHECK-LABEL: test_vrndnq_f16:
; CHECK: @ %bb.0: @ %entry
More information about the llvm-commits
mailing list