[llvm] [AArch64][SVE2] SVE2 NBSL instruction lowering. (PR #89732)
Dinar Temirbulatov via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 26 02:10:38 PDT 2024
https://github.com/dtemirbulatov updated https://github.com/llvm/llvm-project/pull/89732
>From 9705cee0fa583597a88a98fcdbed17ecb5b5abce Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Tue, 23 Apr 2024 10:09:43 +0000
Subject: [PATCH 1/3] [AArch64][SVE2] SVE2 NBSL instruction lowering.
Allow to fold BSL/CNOT instuctions to NBSL instruction for scalable vectors.
---
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 17 +++++++++++++++++
llvm/test/CodeGen/AArch64/sve2-bsl.ll | 15 +++++++++++++++
2 files changed, 32 insertions(+)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 525ae79da99624..00a1e5f3efadda 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3767,6 +3767,23 @@ let Predicates = [HasSVE2orSME] in {
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
+
+ // zext(cmpeq(bsl(x, y, z), splat(0))) -> nbsl(x, y, z)
+ def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive)),
+ (nxv16i8 (AArch64bsp nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), (SVEDup0), SETEQ)))),
+ (NBSL_ZZZZ nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)>;
+
+ def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive)),
+ (nxv8i16 (AArch64bsp nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)), (SVEDup0), SETEQ)))),
+ (NBSL_ZZZZ nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)>;
+
+ def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive)),
+ (nxv4i32 (AArch64bsp nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)), (SVEDup0), SETEQ)))),
+ (NBSL_ZZZZ nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)>;
+
+ def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive)),
+ (nxv2i64 (AArch64bsp nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)), (SVEDup0), SETEQ)))),
+ (NBSL_ZZZZ nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)>;
} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 23b2622f5f5863..a7edd944e399fe 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -41,3 +41,18 @@ define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32>
%c = or <vscale x 4 x i32> %1, %2
ret <vscale x 4 x i32> %c
}
+
+define <vscale x 4 x i32> @nbsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: nbsl:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.s, #0x7fffffff
+; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z1.d
+; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2147483647, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %2 = and <vscale x 4 x i32> %b, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -2147483648, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %3 = or <vscale x 4 x i32> %1, %2
+ %4 = icmp eq <vscale x 4 x i32> %3, zeroinitializer
+ %5 = zext <vscale x 4 x i1> %4 to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %5
+}
>From 1889d8dca6d73ce5409a5e998db23cde6d33d69b Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 25 Apr 2024 00:53:04 +0000
Subject: [PATCH 2/3] Resolved remarks.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 19 +------
llvm/test/CodeGen/AArch64/sve2-bsl.ll | 55 ++++++++++++++++---
3 files changed, 49 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7abb58064a535..9c585e15cef27c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -740,6 +740,8 @@ def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
+def AArch64nbsl: PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
+ (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>;
def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 00a1e5f3efadda..06b943cf951460 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3760,30 +3760,13 @@ let Predicates = [HasSVE2orSME] in {
defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl, AArch64bsp>;
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
- defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>;
+ defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl, AArch64nbsl>;
// SVE2 bitwise xor and rotate right by immediate
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>;
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
-
- // zext(cmpeq(bsl(x, y, z), splat(0))) -> nbsl(x, y, z)
- def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive)),
- (nxv16i8 (AArch64bsp nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), (SVEDup0), SETEQ)))),
- (NBSL_ZZZZ nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)>;
-
- def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive)),
- (nxv8i16 (AArch64bsp nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)), (SVEDup0), SETEQ)))),
- (NBSL_ZZZZ nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)>;
-
- def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive)),
- (nxv4i32 (AArch64bsp nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)), (SVEDup0), SETEQ)))),
- (NBSL_ZZZZ nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)>;
-
- def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive)),
- (nxv2i64 (AArch64bsp nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)), (SVEDup0), SETEQ)))),
- (NBSL_ZZZZ nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)>;
} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index a7edd944e399fe..ef7d4abe5c5f4e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -42,17 +42,54 @@ define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32>
ret <vscale x 4 x i32> %c
}
-define <vscale x 4 x i32> @nbsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: nbsl:
+define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: nbsl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.b, #127 // =0x7f
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 16 x i8> %a, splat(i8 127)
+ %2 = and <vscale x 16 x i8> %b, splat(i8 -128)
+ %3 = or <vscale x 16 x i8> %1, %2
+ %4 = xor <vscale x 16 x i8> %3, splat(i8 -1)
+ ret <vscale x 16 x i8> %4
+}
+
+define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: nbsl_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.h, #32767 // =0x7fff
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 8 x i16> %a, splat(i16 32767)
+ %2 = and <vscale x 8 x i16> %b, splat(i16 -32768)
+ %3 = or <vscale x 8 x i16> %1, %2
+ %4 = xor <vscale x 8 x i16> %3, splat(i16 -1)
+ ret <vscale x 8 x i16> %4
+}
+
+define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: nbsl_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z2.s, #0x7fffffff
-; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
; CHECK-NEXT: ret
- %1 = and <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2147483647, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
- %2 = and <vscale x 4 x i32> %b, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -2147483648, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647)
+ %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648)
%3 = or <vscale x 4 x i32> %1, %2
- %4 = icmp eq <vscale x 4 x i32> %3, zeroinitializer
- %5 = zext <vscale x 4 x i1> %4 to <vscale x 4 x i32>
- ret <vscale x 4 x i32> %5
+ %4 = xor <vscale x 4 x i32> %3, splat(i32 -1)
+ ret <vscale x 4 x i32> %4
+}
+
+define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: nbsl_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807)
+ %2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808)
+ %3 = or <vscale x 2 x i64> %1, %2
+ %4 = xor <vscale x 2 x i64> %3, splat(i64 -1)
+ ret <vscale x 2 x i64> %4
}
>From 45ad4b195a7fda04b7a7b081ac01421b5939d808 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Fri, 26 Apr 2024 09:03:16 +0000
Subject: [PATCH 3/3] Formatting.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9c585e15cef27c..59cdd7f071c09a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -741,7 +741,7 @@ def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
def AArch64nbsl: PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
- (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>;
+ (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>;
def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
More information about the llvm-commits
mailing list