[llvm] fa9f6b5 - [AArch64][NEON] Add famax/famin codegen patterns (#103027)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 13 09:06:05 PDT 2024
Author: SpencerAbson
Date: 2024-08-13T17:06:02+01:00
New Revision: fa9f6b58285b86de794f956daa7f4027e2fc6baa
URL: https://github.com/llvm/llvm-project/commit/fa9f6b58285b86de794f956daa7f4027e2fc6baa
DIFF: https://github.com/llvm/llvm-project/commit/fa9f6b58285b86de794f956daa7f4027e2fc6baa.diff
LOG: [AArch64][NEON] Add famax/famin codegen patterns (#103027)
- min(abs(a), abs(b)) -> famin(a, b), max(abs(a), abs(b))-> famax(a, b)
- Changes to LLVM
- llvm/lib/Target/AArch64InstrInfo.td
- Add pattern for NEON types
- +llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
- Add tests with and without +faminmax flag.
Added:
llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1b914b52ad2f8..bb05dc85d29a1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -10147,10 +10147,20 @@ let Predicates = [HasFP8] in {
defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
} // End let Predicates = [HasFP8]
-let Predicates = [HasFAMINMAX] in {
+let Predicates = [HasNEON, HasFAMINMAX] in {
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
-} // End let Predicates = [HasFAMAXMIN]
+
+ foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
+ // Replace min(abs(a), abs(b)) with famin(a, b)
+ def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
+ (!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
+
+ // Replace max(abs(a), abs(b)) with famax(a, b)
+ def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
+ (!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
+ }
+} // End let Predicates = [HasNEON, HasFAMINMAX]
let Predicates = [HasFP8FMA] in {
defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
new file mode 100644
index 0000000000000..dd2deda9839a0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
@@ -0,0 +1,209 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+faminmax -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Replace min(abs(a), abs(b)) with famin(a, b)
+; Replace max(abs(a), abs(b)) with famax(a, b)
+
+define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_max_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_min_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.minimum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_max_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_min_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_max_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.maximum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_min_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.minimum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_max_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_min_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.minimum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_max_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_max_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmax v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_min_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmin v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+
+declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
+declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
+declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
+declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
+
+declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
+declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
+declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
+declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
+
+attributes #0 = { nounwind "target-features"="+fullfp16" }
More information about the llvm-commits
mailing list