[llvm] [AArch64] add optimisation for MATCH/NMATCH instruction (PR #108179)

Wed Sep 11 02:57:58 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: None (Lukacma)

<details>
<summary>Changes</summary>

This patch replaces match/nmatch intrinsics with 0 when predicate lanes are inactive.

---
Full diff: https://github.com/llvm/llvm-project/pull/108179.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+2) 
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll (+23) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 649ba1ac8749f5..079f1e7c08058c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2275,6 +2275,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   case Intrinsic::aarch64_sve_ldnt1_gather_index:
   case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
   case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
+  case Intrinsic::aarch64_sve_match:
+  case Intrinsic::aarch64_sve_nmatch:
     return instCombineSVENoActiveZero(IC, II);
   case Intrinsic::aarch64_sve_prf:
   case Intrinsic::aarch64_sve_prfb_gather_index:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll
index e58aa2eeefa8d3..0035763d0812a5 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-zero.ll
@@ -230,3 +230,26 @@ define <8 x i16> @umaxqv_i16(<vscale x 8 x i16> %a) {
   %res = call <8 x i16> @llvm.aarch64.sve.umaxqv.v8i16.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a);
   ret <8 x i16> %res
 }
+
+define <vscale x 8 x i1> @match_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x i1> @match_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x i1> zeroinitializer
+;
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.match.nxv8i16(<vscale x 8 x i1> zeroinitializer,
+  <vscale x 8 x i16> %a,
+  <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i1> %out
+}
+
+
+define <vscale x 8 x i1> @nmatch_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x i1> @nmatch_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x i1> zeroinitializer
+;
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.nmatch.nxv8i16(<vscale x 8 x i1> zeroinitializer,
+  <vscale x 8 x i16> %a,
+  <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i1> %out
+}
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/108179