[llvm] [AArch64][NEON] Add famax/famin codegen patterns (PR #103027)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 13 06:35:11 PDT 2024
https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/103027
>From 82d44ce1aa36f39f4b7dc9c5ec486faf494ff440 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Mon, 12 Aug 2024 14:23:54 +0000
Subject: [PATCH 1/2] Add famax/famin codegen patterns for NEON
- Replace min(abs(a), abs(b)) with famin(a, b)
- Replace max(abs(a), abs(b)) with famax(a, b)
- llvm/lib/Target/AArch64InstrInfo.td
- Add pattern for NEON types
- +llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
- Add tests with and without +faminmax flag.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 +-
.../CodeGen/AArch64/aarch64-neon-faminmax.ll | 207 ++++++++++++++++++
2 files changed, 220 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1e5c5e2657e65d..2ca11310019122 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -10155,7 +10155,19 @@ let Predicates = [HasFP8] in {
let Predicates = [HasFAMINMAX] in {
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
-} // End let Predicates = [HasFAMAXMIN]
+} // End let Predicates = [HasFAMINMAX]
+
+let Predicates = [HasNEON, HasFAMINMAX] in {
+ foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
+ // Replace min(abs(a), abs(b)) with famin(a, b)
+ def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
+ (!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
+
+ // Replace max(abs(a), abs(b)) with famax(a, b)
+ def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
+ (!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
+ }
+} // End let Predicates = [HasNEON, HasFAMINMAX]
let Predicates = [HasFP8FMA] in {
defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
new file mode 100644
index 00000000000000..a8ec25565f59dc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -mattr=+faminmax -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
+
+; Replace min(abs(a), abs(b)) with famin(a, b)
+; Replace max(abs(a), abs(b)) with famax(a, b)
+
+define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_max_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_min_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.minimum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_max_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_min_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_max_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.maximum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_min_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.minimum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_max_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_min_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.minimum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_max_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_min_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+
+declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
+declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
+declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
+declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
+
+declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
+declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
+declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
+declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
+
+attributes #0 = { nounwind "target-features"="+fullfp16" }
>From e87bfcac9a14c8614a71ffc0d36567ed77f4300f Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 13 Aug 2024 13:31:10 +0000
Subject: [PATCH 2/2] Update prediction of NEON famin/famax
- Change prediction of NEON famin/fmax instructions to
[HasNEON,HasFAMINMAX]
- Use target triple string in faminmax llc test instead
of 'mtriple' flag.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 +---
llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll | 6 ++++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2ca11310019122..9928828a6767be 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -10152,12 +10152,10 @@ let Predicates = [HasFP8] in {
defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
} // End let Predicates = [HasFP8]
-let Predicates = [HasFAMINMAX] in {
+let Predicates = [HasNEON, HasFAMINMAX] in {
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
-} // End let Predicates = [HasFAMINMAX]
-let Predicates = [HasNEON, HasFAMINMAX] in {
foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
// Replace min(abs(a), abs(b)) with famin(a, b)
def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
index a8ec25565f59dc..dd2deda9839a0d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64 -mattr=+faminmax -verify-machineinstrs %s -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
+; RUN: llc -mattr=+faminmax -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
+
+target triple = "aarch64-unknown-linux-gnu"
; Replace min(abs(a), abs(b)) with famin(a, b)
; Replace max(abs(a), abs(b)) with famax(a, b)
More information about the llvm-commits
mailing list