[llvm] [GlobalISel][AArch64] Legalize G_FABS and G_FNEG for SVE (PR #114784)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 22:16:53 PST 2024
https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/114784
>From 424ff805c67663dc1d04b0d1f36da8f005def088 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 3 Nov 2024 13:50:55 +0100
Subject: [PATCH 1/4] [GlobalISel][AArch64] Legalize G_FABS and G_FNEG
add patterns to unprediate instructions
FNEG_ZPmZ and FABS_ZPmZ are merging predicated instructions.
See https://discourse.llvm.org/t/pat-s-with-destinations/82918
---
llvm/lib/Target/AArch64/AArch64.td | 18 +++++
.../AArch64/AArch64GlobalISelPatterns.td | 36 ++++++++++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 9 ---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 1 +
llvm/test/CodeGen/AArch64/sve-float.ll | 71 +++++++++++++++++++
5 files changed, 126 insertions(+), 9 deletions(-)
create mode 100644 llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td
create mode 100644 llvm/test/CodeGen/AArch64/sve-float.ll
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index e3dd334e7b098b..1517cb8a1ae5d1 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -179,3 +179,21 @@ def AArch64 : Target {
//===----------------------------------------------------------------------===//
include "AArch64PfmCounters.td"
+
+
+//===----------------------------------------------------------------------===//
+// GlobalISel patterns
+//===----------------------------------------------------------------------===//
+
+include "AArch64GlobalISelPatterns.td"
+
+// We want to first hit the instruction patterns.
+foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
+ // No dedicated instruction, so just clear the sign bit.
+ def : Pat<(VT (fabs VT:$op)),
+ (AND_ZI $op, (i64 (logical_imm64_XFORM(i64 0x7fff7fff7fff7fff))))>;
+ // No dedicated instruction, so just invert the sign bit.
+ def : Pat<(VT (fneg VT:$op)),
+ (EOR_ZI $op, (i64 (logical_imm64_XFORM(i64 0x8000800080008000))))>;
+}
+
diff --git a/llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td b/llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td
new file mode 100644
index 00000000000000..f4424ed3499ab6
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td
@@ -0,0 +1,36 @@
+//===-- AArch64GlobalISelPatterns.td - GlobalISel patterns -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Selection and combine patterns for GlobalISel.
+//
+//===----------------------------------------------------------------------===//
+
+
+//unpredicate patterns
+
+
+
+// fneg
+def : Pat<(nxv2f64 (fneg nxv2f64:$src)),
+ (FNEG_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$src)>;
+
+def : Pat<(nxv4f32 (fneg nxv4f32:$src)),
+ (FNEG_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$src)>;
+
+def : Pat<(nxv8f16 (fneg nxv8f16:$src)),
+ (FNEG_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$src)>;
+
+// fabs
+def : Pat<(nxv2f64 (fabs nxv2f64:$src)),
+ (FABS_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$src)>;
+
+def : Pat<(nxv4f32 (fabs nxv4f32:$src)),
+ (FABS_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$src)>;
+
+def : Pat<(nxv8f16 (fabs nxv8f16:$src)),
+ (FABS_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$src)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 574432869471ad..ecf1cc24e0a500 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -683,15 +683,6 @@ let Predicates = [HasSVEorSME] in {
defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64neg_mt>;
}
- foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
- // No dedicated instruction, so just clear the sign bit.
- def : Pat<(VT (fabs VT:$op)),
- (AND_ZI $op, (i64 (logical_imm64_XFORM(i64 0x7fff7fff7fff7fff))))>;
- // No dedicated instruction, so just invert the sign bit.
- def : Pat<(VT (fneg VT:$op)),
- (EOR_ZI $op, (i64 (logical_imm64_XFORM(i64 0x8000800080008000))))>;
- }
-
// zext(cmpeq(x, splat(0))) -> cnot(x)
def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)))),
(CNOT_ZPmZ_B $Op2, $Pg, $Op2)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 619a041c273cd8..c7130696a614d1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -279,6 +279,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_FABS, G_FNEG})
.legalFor({s32, s64, v2s32, v4s32, v2s64})
.legalFor(HasFP16, {s16, v4s16, v8s16})
+ .legalFor(HasSVE, {nxv2s64, nxv4s32, nxv8s16})
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.lowerIf(scalarOrEltWiderThan(0, 64))
.clampNumElements(0, v4s16, v8s16)
diff --git a/llvm/test/CodeGen/AArch64/sve-float.ll b/llvm/test/CodeGen/AArch64/sve-float.ll
new file mode 100644
index 00000000000000..186d4e39226dda
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-float.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; fneg
+define <vscale x 2 x double> @fnegnxv2double(<vscale x 2 x double> %a) {
+; CHECK-LABEL: fnegnxv2double:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fneg z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+entry:
+ %c = fneg <vscale x 2 x double> %a
+ ret <vscale x 2 x double> %c
+}
+
+define <vscale x 4 x float> @fnegnxv4float(<vscale x 4 x float> %a) {
+; CHECK-LABEL: fnegnxv4float:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fneg z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+entry:
+ %c = fneg <vscale x 4 x float> %a
+ ret <vscale x 4 x float> %c
+}
+
+define <vscale x 8 x half> @fnegnxv8half(<vscale x 8 x half> %a) {
+; CHECK-LABEL: fnegnxv8half:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fneg z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+entry:
+ %c = fneg <vscale x 8 x half> %a
+ ret <vscale x 8 x half> %c
+}
+
+;; fabs
+define <vscale x 2 x double> @fabsnxv2double(<vscale x 2 x double> %a) {
+; CHECK-LABEL: fabsnxv2double:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fabs z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+entry:
+ %c = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %a)
+ ret <vscale x 2 x double> %c
+}
+
+define <vscale x 4 x float> @fabsnxv4float(<vscale x 4 x float> %a) {
+; CHECK-LABEL: fabsnxv4float:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+entry:
+ %c = tail call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %a)
+ ret <vscale x 4 x float> %c
+}
+
+define <vscale x 8 x half> @fabsnxv8half(<vscale x 8 x half> %a) {
+; CHECK-LABEL: fabsnxv8half:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+entry:
+ %c = tail call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ ret <vscale x 8 x half> %c
+}
>From 07bdbc4502297108787850b827b1bd11c1e83182 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Mon, 4 Nov 2024 19:17:54 +0100
Subject: [PATCH 2/4] address review comments
---
llvm/test/CodeGen/AArch64/sve-float.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/sve-float.ll b/llvm/test/CodeGen/AArch64/sve-float.ll
index 186d4e39226dda..64d018c79a7e7f 100644
--- a/llvm/test/CodeGen/AArch64/sve-float.ll
+++ b/llvm/test/CodeGen/AArch64/sve-float.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel=0 | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
;; fneg
>From a313265cca03029200491296d8f0361d62b2f96c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 5 Nov 2024 06:44:45 +0100
Subject: [PATCH 3/4] address review comments
---
llvm/test/CodeGen/AArch64/sve-float.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/sve-float.ll b/llvm/test/CodeGen/AArch64/sve-float.ll
index 64d018c79a7e7f..665bee964ce8bd 100644
--- a/llvm/test/CodeGen/AArch64/sve-float.ll
+++ b/llvm/test/CodeGen/AArch64/sve-float.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel=0 | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
>From bd87c0389c9d316c7d432d479e544611cc484105 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 12 Dec 2024 07:16:17 +0100
Subject: [PATCH 4/4] switch to undef
---
llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td b/llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td
index f4424ed3499ab6..5d1d5816ed4fc9 100644
--- a/llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td
+++ b/llvm/lib/Target/AArch64/AArch64GlobalISelPatterns.td
@@ -17,20 +17,20 @@
// fneg
def : Pat<(nxv2f64 (fneg nxv2f64:$src)),
- (FNEG_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$src)>;
+ (FNEG_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$src)>;
def : Pat<(nxv4f32 (fneg nxv4f32:$src)),
- (FNEG_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$src)>;
+ (FNEG_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$src)>;
def : Pat<(nxv8f16 (fneg nxv8f16:$src)),
- (FNEG_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$src)>;
+ (FNEG_ZPmZ_H_UNDEF (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$src)>;
// fabs
def : Pat<(nxv2f64 (fabs nxv2f64:$src)),
- (FABS_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$src)>;
+ (FABS_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$src)>;
def : Pat<(nxv4f32 (fabs nxv4f32:$src)),
- (FABS_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$src)>;
+ (FABS_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$src)>;
def : Pat<(nxv8f16 (fabs nxv8f16:$src)),
- (FABS_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$src)>;
+ (FABS_ZPmZ_H_UNDEF (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$src)>;
More information about the llvm-commits
mailing list