[llvm] [AArch64][SVE2] SVE2 NBSL instruction lowering. (PR #89732)

Tue Apr 23 03:15:24 PDT 2024

https://github.com/dtemirbulatov created https://github.com/llvm/llvm-project/pull/89732

Allow to fold BSL/CNOT instuctions to NBSL instruction for scalable vectors.

>From cb39861c2f1f51d2ea1d37550bd37714d2d61378 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Tue, 23 Apr 2024 10:09:43 +0000
Subject: [PATCH] [AArch64][SVE2] SVE2 NBSL instruction lowering.

Allow to fold BSL/CNOT instuctions to NBSL instruction for scalable vectors.
---
 llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 17 +++++++++++++++++
 llvm/test/CodeGen/AArch64/sve2-bsl.ll          | 15 +++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 6972acd985cb9a..e291d8857bdcd5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3750,6 +3750,23 @@ let Predicates = [HasSVE2orSME] in {
 
   // SVE2 extract vector (immediate offset, constructive)
   def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
+
+  // zext(cmpeq(bsl(x, y, z), splat(0))) -> nbsl(x, y, z)
+  def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive)),
+            (nxv16i8 (AArch64bsp nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), (SVEDup0), SETEQ)))),
+            (NBSL_ZZZZ nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)>;
+
+  def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive)),
+            (nxv8i16 (AArch64bsp nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)), (SVEDup0), SETEQ)))),
+            (NBSL_ZZZZ nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)>;
+
+  def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive)),
+            (nxv4i32 (AArch64bsp nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)), (SVEDup0), SETEQ)))),
+            (NBSL_ZZZZ nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)>;
+
+  def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive)),
+            (nxv2i64 (AArch64bsp nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)), (SVEDup0), SETEQ)))),
+            (NBSL_ZZZZ nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)>;
 } // End HasSVE2orSME
 
 let Predicates = [HasSVE2] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 23b2622f5f5863..a7edd944e399fe 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -41,3 +41,18 @@ define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32>
   %c = or <vscale x 4 x i32> %1, %2
   ret <vscale x 4 x i32> %c
 }
+
+define <vscale x 4 x i32> @nbsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: nbsl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.s, #0x7fffffff
+; CHECK-NEXT:    nbsl z2.d, z2.d, z0.d, z1.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %1 = and <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2147483647, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %2 = and <vscale x 4 x i32> %b, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -2147483648, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %3 = or <vscale x 4 x i32> %1, %2
+  %4 = icmp eq <vscale x 4 x i32> %3, zeroinitializer
+  %5 = zext <vscale x 4 x i1> %4 to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %5
+}