[llvm] [AArch64][SVE] Optimize svand_z/svorr_z with all-true predicates. (PR #160408)
Vladimir Miloserdov via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 04:53:39 PDT 2025
https://github.com/miloserdow updated https://github.com/llvm/llvm-project/pull/160408
>From c61ae4bba8710e76e71bad2431d6ec59371bafdc Mon Sep 17 00:00:00 2001
From: Vladimir Miloserdov <milosvova at gmail.com>
Date: Tue, 23 Sep 2025 23:07:05 +0100
Subject: [PATCH] [AArch64][SVE] Optimize svand_z/svorr_z with all-true
predicates.
Fix redundant AND/OR operations with all-true SVE predicates that were
not being optimized.
Modify isAllActivePredicate to detect splat(i1 true) patterns and
add IR opcode mappings for and_z/orr_z/eor_z intrinsics to enable
simplification through simplifySVEIntrinsicBinOp.
Fixes #160279.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 55 +++++++++++--
.../sve-intrinsic-and-or-with-all-true.ll | 80 +++++++++++++++++++
2 files changed, 130 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8c4b4f6e4d6de..c96e4e43e01bc 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1441,8 +1441,17 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
Instruction::UDiv);
- case Intrinsic::aarch64_sve_addqv:
case Intrinsic::aarch64_sve_and_z:
+ return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+ Instruction::And);
+ case Intrinsic::aarch64_sve_orr_z:
+ return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+ Instruction::Or);
+ case Intrinsic::aarch64_sve_eor_z:
+ return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+ Instruction::Xor);
+
+ case Intrinsic::aarch64_sve_addqv:
case Intrinsic::aarch64_sve_bic_z:
case Intrinsic::aarch64_sve_brka_z:
case Intrinsic::aarch64_sve_brkb_z:
@@ -1451,13 +1460,11 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
case Intrinsic::aarch64_sve_brkpb_z:
case Intrinsic::aarch64_sve_cntp:
case Intrinsic::aarch64_sve_compact:
- case Intrinsic::aarch64_sve_eor_z:
case Intrinsic::aarch64_sve_eorv:
case Intrinsic::aarch64_sve_eorqv:
case Intrinsic::aarch64_sve_nand_z:
case Intrinsic::aarch64_sve_nor_z:
case Intrinsic::aarch64_sve_orn_z:
- case Intrinsic::aarch64_sve_orr_z:
case Intrinsic::aarch64_sve_orv:
case Intrinsic::aarch64_sve_orqv:
case Intrinsic::aarch64_sve_pnext:
@@ -1587,8 +1594,21 @@ static bool isAllActivePredicate(Value *Pred) {
if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
Pred = UncastedPred;
- auto *C = dyn_cast<Constant>(Pred);
- return (C && C->isAllOnesValue());
+
+ // Also look through just convert.to.svbool if the input is an all-true splat
+ Value *ConvertArg;
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+ m_Value(ConvertArg))))
+ Pred = ConvertArg;
+ // Check for splat(i1 true) pattern used by svptrue intrinsics
+ if (auto *C = dyn_cast<Constant>(Pred)) {
+ if (C->isAllOnesValue())
+ return true;
+ if (auto *SplatVal = C->getSplatValue())
+ if (auto *CI = dyn_cast<ConstantInt>(SplatVal))
+ return CI->isOne();
+ }
+ return false;
}
// Simplify `V` by only considering the operations that affect active lanes.
@@ -1623,6 +1643,22 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
return &II;
}
+ // For logical operations with all-true predicates, apply simplifications.
+ if (isAllActivePredicate(Pg)) {
+ if (Opc == Instruction::And) {
+ if (isAllActivePredicate(Op1))
+ return IC.replaceInstUsesWith(II, Op2);
+ if (isAllActivePredicate(Op2))
+ return IC.replaceInstUsesWith(II, Op1);
+ }
+ if (Opc == Instruction::Or) {
+ if (isAllActivePredicate(Op1))
+ return IC.replaceInstUsesWith(II, Op1);
+ if (isAllActivePredicate(Op2))
+ return IC.replaceInstUsesWith(II, Op2);
+ }
+ }
+
// Only active lanes matter when simplifying the operation.
Op1 = stripInactiveLanes(Op1, Pg);
Op2 = stripInactiveLanes(Op2, Pg);
@@ -1643,6 +1679,15 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
if (IInfo.inactiveLanesAreNotDefined())
return IC.replaceInstUsesWith(II, SimpleII);
+ // For zeroing operations, if we have an all-true predicate and the result
+ // simplifies, we can just use the simplified result directly since there
+ // are no inactive lanes to worry about.
+ if (IInfo.inactiveLanesAreUnused() && isAllActivePredicate(Pg))
+ return IC.replaceInstUsesWith(II, SimpleII);
+
+ if (!IInfo.inactiveLanesTakenFromOperand())
+ return std::nullopt;
+
Value *Inactive = II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom());
// The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
new file mode 100644
index 0000000000000..9eff3acc12c99
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
@@ -0,0 +1,80 @@
+; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S -o - < %s | FileCheck %s
+;
+; Test AArch64-specific InstCombine optimizations for SVE logical operations
+; with all-true predicates.
+; - a AND true = a
+; - a OR true = true
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
+
+define <vscale x 16 x i1> @test_sve_and_z_all_true_right(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_all_true_right(
+; CHECK-NEXT: ret <vscale x 16 x i1> [[A:%.*]]
+ %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_all_true_left(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_all_true_left(
+; CHECK-NEXT: ret <vscale x 16 x i1> [[A:%.*]]
+ %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_orr_z_all_true_right(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_orr_z_all_true_right(
+; CHECK-NEXT: [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT: ret <vscale x 16 x i1> [[ALL_TRUE]]
+ %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_orr_z_all_true_left(<vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_orr_z_all_true_left(
+; CHECK-NEXT: [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT: ret <vscale x 16 x i1> [[ALL_TRUE]]
+ %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_original_bug_case(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %prev) {
+; CHECK-LABEL: @test_original_bug_case(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PREV:%.*]])
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP3]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
+ %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+ %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %prev)
+ %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %2)
+ %4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %3)
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %6 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %4, <vscale x 16 x i1> %5)
+ ret <vscale x 16 x i1> %6
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_not_all_true_predicate(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: @test_sve_and_z_not_all_true_predicate(
+; CHECK-NEXT: [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[ALL_TRUE]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
+ %all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_sve_and_z_no_all_true_operands(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @test_sve_and_z_no_all_true_operands(
+; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ ret <vscale x 16 x i1> %result
+}
More information about the llvm-commits
mailing list