[llvm] [AArch64][SVE] Optimize svand_z/svorr_z with all-true predicates. (PR #160408)

Tue Oct 28 03:30:25 PDT 2025

https://github.com/miloserdow updated https://github.com/llvm/llvm-project/pull/160408

>From da65767749017c0d2e3cf6dd3996ee317a490e74 Mon Sep 17 00:00:00 2001
From: Vladimir Miloserdov <milosvova at gmail.com>
Date: Tue, 23 Sep 2025 23:07:05 +0100
Subject: [PATCH] [AArch64][SVE] Optimize logical ops with convert.to.svbool

When both operands of a logical operation (and/or/xor) are convert.to.svbool
from the same narrower type, unwrap to that type, simplify using simplifyBinOp,
and rewrap the result. This eliminates redundant instructions in cases like:
 svand_z(svptrue_b8(), svpnext_b16(prev, pg), svptrue_b16());

Fixes #160279.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  40 +++++-
 .../sve-intrinsic-and-or-with-all-true.ll     | 123 ++++++++++++++++++
 2 files changed, 160 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index fede586cf35bc..24bad469d251e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1469,8 +1469,17 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
     return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
         Instruction::UDiv);
 
-  case Intrinsic::aarch64_sve_addqv:
   case Intrinsic::aarch64_sve_and_z:
+    return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+        Instruction::And);
+  case Intrinsic::aarch64_sve_orr_z:
+    return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+        Instruction::Or);
+  case Intrinsic::aarch64_sve_eor_z:
+    return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+        Instruction::Xor);
+
+  case Intrinsic::aarch64_sve_addqv:
   case Intrinsic::aarch64_sve_bic_z:
   case Intrinsic::aarch64_sve_brka_z:
   case Intrinsic::aarch64_sve_brkb_z:
@@ -1479,13 +1488,11 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
   case Intrinsic::aarch64_sve_brkpb_z:
   case Intrinsic::aarch64_sve_cntp:
   case Intrinsic::aarch64_sve_compact:
-  case Intrinsic::aarch64_sve_eor_z:
   case Intrinsic::aarch64_sve_eorv:
   case Intrinsic::aarch64_sve_eorqv:
   case Intrinsic::aarch64_sve_nand_z:
   case Intrinsic::aarch64_sve_nor_z:
   case Intrinsic::aarch64_sve_orn_z:
-  case Intrinsic::aarch64_sve_orr_z:
   case Intrinsic::aarch64_sve_orv:
   case Intrinsic::aarch64_sve_orqv:
   case Intrinsic::aarch64_sve_pnext:
@@ -1659,6 +1666,30 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
     return &II;
   }
 
+  // If both operands are convert.to.svbool from the same narrower type, try to
+  // simplify the operation at that narrower type first.
+  if (isAllActivePredicate(Pg)) {
+    auto *ConvIntr1 = dyn_cast<IntrinsicInst>(Op1);
+    auto *ConvIntr2 = dyn_cast<IntrinsicInst>(Op2);
+    if (ConvIntr1 && ConvIntr2 &&
+        ConvIntr1->getIntrinsicID() ==
+            Intrinsic::aarch64_sve_convert_to_svbool &&
+        ConvIntr2->getIntrinsicID() ==
+            Intrinsic::aarch64_sve_convert_to_svbool) {
+      Value *NarrowOp1 = ConvIntr1->getArgOperand(0);
+      Value *NarrowOp2 = ConvIntr2->getArgOperand(0);
+      if (NarrowOp1->getType() == NarrowOp2->getType()) {
+        if (Value *SimplifiedNarrow =
+                simplifyBinOp(Opc, NarrowOp1, NarrowOp2, DL)) {
+          Value *NewConv = IC.Builder.CreateIntrinsic(
+              Intrinsic::aarch64_sve_convert_to_svbool,
+              {SimplifiedNarrow->getType()}, {SimplifiedNarrow});
+          return IC.replaceInstUsesWith(II, NewConv);
+        }
+      }
+    }
+  }
+
   // Only active lanes matter when simplifying the operation.
   Op1 = stripInactiveLanes(Op1, Pg);
   Op2 = stripInactiveLanes(Op2, Pg);
@@ -1679,6 +1710,9 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
   if (IInfo.inactiveLanesAreNotDefined())
     return IC.replaceInstUsesWith(II, SimpleII);
 
+  if (!IInfo.inactiveLanesTakenFromOperand())
+    return std::nullopt;
+
   Value *Inactive = II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom());
 
   // The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
new file mode 100644
index 0000000000000..f214fa5872b9e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool ../../llvm-build/bin/opt
+; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S < %s | FileCheck %s
+;
+; Test AArch64-specific InstCombine optimizations for SVE logical operations
+; with all-true predicates.
+; - a AND true = a
+; - a OR true = true
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
+
+define <vscale x 16 x i1> @test_and_convert_all_true_right_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_right_b16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+  %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_and_convert_all_true_left_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_left_b16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+  %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_true, <vscale x 16 x i1> %conv_x)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_or_convert_all_true_right_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_or_convert_all_true_right_b16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+  %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_or_convert_all_true_left_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_or_convert_all_true_left_b16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+  %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_true, <vscale x 16 x i1> %conv_x)
+  ret <vscale x 16 x i1> %result
+}
+define <vscale x 16 x i1> @test_and_convert_all_true_b32(<vscale x 4 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_b32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[X:%.*]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %x)
+  %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_and_convert_all_true_b64(<vscale x 2 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_b64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[X:%.*]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %x)
+  %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+  ret <vscale x 16 x i1> %result
+}
+
+; Negative test
+define <vscale x 16 x i1> @test_and_convert_different_granularities(<vscale x 8 x i1> %x, <vscale x 4 x i1> %y) {
+; CHECK-LABEL: @test_and_convert_different_granularities(
+; CHECK-NEXT:    [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT:    [[CONV_Y:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[Y:%.*]])
+; CHECK-NEXT:    [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_Y]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[RESULT]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+  %conv_y = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %y)
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_y)
+  ret <vscale x 16 x i1> %result
+}
+
+; Negative test
+define <vscale x 16 x i1> @test_and_convert_non_all_true_predicate(<vscale x 16 x i1> %pred, <vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_non_all_true_predicate(
+; CHECK-NEXT:    [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT:    [[CONV_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_TRUE]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[RESULT]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+  %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+  ret <vscale x 16 x i1> %result
+}
+
+; Negative test
+define <vscale x 16 x i1> @test_and_convert_no_all_true(<vscale x 8 x i1> %x, <vscale x 8 x i1> %y) {
+; CHECK-LABEL: @test_and_convert_no_all_true(
+; CHECK-NEXT:    [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT:    [[CONV_Y:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[Y:%.*]])
+; CHECK-NEXT:    [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_Y]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[RESULT]]
+;
+  %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+  %conv_y = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %y)
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_y)
+  ret <vscale x 16 x i1> %result
+}