[llvm] [AArch64][SVE] Optimize svand_z/svorr_z with all-true predicates. (PR #160408)

Wed Sep 24 04:53:39 PDT 2025

https://github.com/miloserdow updated https://github.com/llvm/llvm-project/pull/160408

>From c61ae4bba8710e76e71bad2431d6ec59371bafdc Mon Sep 17 00:00:00 2001
From: Vladimir Miloserdov <milosvova at gmail.com>
Date: Tue, 23 Sep 2025 23:07:05 +0100
Subject: [PATCH] [AArch64][SVE] Optimize svand_z/svorr_z with all-true
 predicates.

Fix redundant AND/OR operations with all-true SVE predicates that were
not being optimized.

Modify isAllActivePredicate to detect splat(i1 true) patterns and
add IR opcode mappings for and_z/orr_z/eor_z intrinsics to enable
simplification through simplifySVEIntrinsicBinOp.

Fixes #160279.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 55 +++++++++++--
 .../sve-intrinsic-and-or-with-all-true.ll     | 80 +++++++++++++++++++
 2 files changed, 130 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8c4b4f6e4d6de..c96e4e43e01bc 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1441,8 +1441,17 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
     return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
         Instruction::UDiv);
 
-  case Intrinsic::aarch64_sve_addqv:
   case Intrinsic::aarch64_sve_and_z:
+    return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+        Instruction::And);
+  case Intrinsic::aarch64_sve_orr_z:
+    return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+        Instruction::Or);
+  case Intrinsic::aarch64_sve_eor_z:
+    return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+        Instruction::Xor);
+
+  case Intrinsic::aarch64_sve_addqv:
   case Intrinsic::aarch64_sve_bic_z:
   case Intrinsic::aarch64_sve_brka_z:
   case Intrinsic::aarch64_sve_brkb_z:
@@ -1451,13 +1460,11 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
   case Intrinsic::aarch64_sve_brkpb_z:
   case Intrinsic::aarch64_sve_cntp:
   case Intrinsic::aarch64_sve_compact:
-  case Intrinsic::aarch64_sve_eor_z:
   case Intrinsic::aarch64_sve_eorv:
   case Intrinsic::aarch64_sve_eorqv:
   case Intrinsic::aarch64_sve_nand_z:
   case Intrinsic::aarch64_sve_nor_z:
   case Intrinsic::aarch64_sve_orn_z:
-  case Intrinsic::aarch64_sve_orr_z:
   case Intrinsic::aarch64_sve_orv:
   case Intrinsic::aarch64_sve_orqv:
   case Intrinsic::aarch64_sve_pnext:
@@ -1587,8 +1594,21 @@ static bool isAllActivePredicate(Value *Pred) {
     if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
         cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
       Pred = UncastedPred;
-  auto *C = dyn_cast<Constant>(Pred);
-  return (C && C->isAllOnesValue());
+
+  // Also look through just convert.to.svbool if the input is an all-true splat
+  Value *ConvertArg;
+  if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+                      m_Value(ConvertArg))))
+    Pred = ConvertArg;
+  // Check for splat(i1 true) pattern used by svptrue intrinsics
+  if (auto *C = dyn_cast<Constant>(Pred)) {
+    if (C->isAllOnesValue())
+      return true;
+    if (auto *SplatVal = C->getSplatValue())
+      if (auto *CI = dyn_cast<ConstantInt>(SplatVal))
+        return CI->isOne();
+  }
+  return false;
 }
 
 // Simplify `V` by only considering the operations that affect active lanes.
@@ -1623,6 +1643,22 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
     return &II;
   }
 
+  // For logical operations with all-true predicates, apply simplifications.
+  if (isAllActivePredicate(Pg)) {
+    if (Opc == Instruction::And) {
+      if (isAllActivePredicate(Op1))
+        return IC.replaceInstUsesWith(II, Op2);
+      if (isAllActivePredicate(Op2))
+        return IC.replaceInstUsesWith(II, Op1);
+    }
+    if (Opc == Instruction::Or) {
+      if (isAllActivePredicate(Op1))
+        return IC.replaceInstUsesWith(II, Op1);
+      if (isAllActivePredicate(Op2))
+        return IC.replaceInstUsesWith(II, Op2);
+    }
+  }
+
   // Only active lanes matter when simplifying the operation.
   Op1 = stripInactiveLanes(Op1, Pg);
   Op2 = stripInactiveLanes(Op2, Pg);
@@ -1643,6 +1679,15 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
   if (IInfo.inactiveLanesAreNotDefined())
     return IC.replaceInstUsesWith(II, SimpleII);
 
+  // For zeroing operations, if we have an all-true predicate and the result
+  // simplifies, we can just use the simplified result directly since there
+  // are no inactive lanes to worry about.
+  if (IInfo.inactiveLanesAreUnused() && isAllActivePredicate(Pg))
+    return IC.replaceInstUsesWith(II, SimpleII);
+
+  if (!IInfo.inactiveLanesTakenFromOperand())
+    return std::nullopt;
+
   Value *Inactive = II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom());
 
   // The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
new file mode 100644
index 0000000000000..9eff3acc12c99
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
@@ -0,0 +1,80 @@
+; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S -o - < %s | FileCheck %s
+;
+; Test AArch64-specific InstCombine optimizations for SVE logical operations
+; with all-true predicates.
+; - a AND true = a
+; - a OR true = true
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
+
+define <vscale x 16 x i1> @test_sve_and_z_all_true_right(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_all_true_right(
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[A:%.*]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_all_true_left(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_all_true_left(
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[A:%.*]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_orr_z_all_true_right(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_orr_z_all_true_right(
+; CHECK-NEXT:    [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[ALL_TRUE]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_orr_z_all_true_left(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_orr_z_all_true_left(
+; CHECK-NEXT:    [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[ALL_TRUE]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_original_bug_case(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %prev) {
+; CHECK-LABEL: @test_original_bug_case(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PREV:%.*]])
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP3]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP4]]
+  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %prev)
+  %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %2)
+  %4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %3)
+  %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %6 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %4, <vscale x 16 x i1> %5)
+  ret <vscale x 16 x i1> %6
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_not_all_true_predicate(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_not_all_true_predicate(
+; CHECK-NEXT:    [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT:    [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[ALL_TRUE]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[RESULT]]
+  %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+  ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_no_all_true_operands(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @test_sve_and_z_no_all_true_operands(
+; CHECK-NEXT:    [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[RESULT]]
+  %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+  ret <vscale x 16 x i1> %result
+}