[llvm] [AArch64][SVE] Optimize svand_z/svorr_z with all-true predicates. (PR #160408)
Vladimir Miloserdov via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 28 03:30:25 PDT 2025
https://github.com/miloserdow updated https://github.com/llvm/llvm-project/pull/160408
>From da65767749017c0d2e3cf6dd3996ee317a490e74 Mon Sep 17 00:00:00 2001
From: Vladimir Miloserdov <milosvova at gmail.com>
Date: Tue, 23 Sep 2025 23:07:05 +0100
Subject: [PATCH] [AArch64][SVE] Optimize logical ops with convert.to.svbool
When both operands of a logical operation (and/or/xor) are convert.to.svbool
from the same narrower type, unwrap to that type, simplify using simplifyBinOp,
and rewrap the result. This eliminates redundant instructions in cases like:
svand_z(svptrue_b8(), svpnext_b16(prev, pg), svptrue_b16());
Fixes #160279.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 40 +++++-
.../sve-intrinsic-and-or-with-all-true.ll | 123 ++++++++++++++++++
2 files changed, 160 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index fede586cf35bc..24bad469d251e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1469,8 +1469,17 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
Instruction::UDiv);
- case Intrinsic::aarch64_sve_addqv:
case Intrinsic::aarch64_sve_and_z:
+ return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+ Instruction::And);
+ case Intrinsic::aarch64_sve_orr_z:
+ return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+ Instruction::Or);
+ case Intrinsic::aarch64_sve_eor_z:
+ return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
+ Instruction::Xor);
+
+ case Intrinsic::aarch64_sve_addqv:
case Intrinsic::aarch64_sve_bic_z:
case Intrinsic::aarch64_sve_brka_z:
case Intrinsic::aarch64_sve_brkb_z:
@@ -1479,13 +1488,11 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
case Intrinsic::aarch64_sve_brkpb_z:
case Intrinsic::aarch64_sve_cntp:
case Intrinsic::aarch64_sve_compact:
- case Intrinsic::aarch64_sve_eor_z:
case Intrinsic::aarch64_sve_eorv:
case Intrinsic::aarch64_sve_eorqv:
case Intrinsic::aarch64_sve_nand_z:
case Intrinsic::aarch64_sve_nor_z:
case Intrinsic::aarch64_sve_orn_z:
- case Intrinsic::aarch64_sve_orr_z:
case Intrinsic::aarch64_sve_orv:
case Intrinsic::aarch64_sve_orqv:
case Intrinsic::aarch64_sve_pnext:
@@ -1659,6 +1666,30 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
return &II;
}
+ // If both operands are convert.to.svbool from the same narrower type, try to
+ // simplify the operation at that narrower type first.
+ if (isAllActivePredicate(Pg)) {
+ auto *ConvIntr1 = dyn_cast<IntrinsicInst>(Op1);
+ auto *ConvIntr2 = dyn_cast<IntrinsicInst>(Op2);
+ if (ConvIntr1 && ConvIntr2 &&
+ ConvIntr1->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_to_svbool &&
+ ConvIntr2->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_to_svbool) {
+ Value *NarrowOp1 = ConvIntr1->getArgOperand(0);
+ Value *NarrowOp2 = ConvIntr2->getArgOperand(0);
+ if (NarrowOp1->getType() == NarrowOp2->getType()) {
+ if (Value *SimplifiedNarrow =
+ simplifyBinOp(Opc, NarrowOp1, NarrowOp2, DL)) {
+ Value *NewConv = IC.Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_convert_to_svbool,
+ {SimplifiedNarrow->getType()}, {SimplifiedNarrow});
+ return IC.replaceInstUsesWith(II, NewConv);
+ }
+ }
+ }
+ }
+
// Only active lanes matter when simplifying the operation.
Op1 = stripInactiveLanes(Op1, Pg);
Op2 = stripInactiveLanes(Op2, Pg);
@@ -1679,6 +1710,9 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
if (IInfo.inactiveLanesAreNotDefined())
return IC.replaceInstUsesWith(II, SimpleII);
+ if (!IInfo.inactiveLanesTakenFromOperand())
+ return std::nullopt;
+
Value *Inactive = II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom());
// The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
new file mode 100644
index 0000000000000..f214fa5872b9e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool ../../llvm-build/bin/opt
+; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S < %s | FileCheck %s
+;
+; Test AArch64-specific InstCombine optimizations for SVE logical operations
+; with all-true predicates.
+; - a AND true = a
+; - a OR true = true
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
+
+define <vscale x 16 x i1> @test_and_convert_all_true_right_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_right_b16(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+ %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_and_convert_all_true_left_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_left_b16(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+ %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_true, <vscale x 16 x i1> %conv_x)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_or_convert_all_true_right_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_or_convert_all_true_right_b16(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+ %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_or_convert_all_true_left_b16(<vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_or_convert_all_true_left_b16(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+ %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_true, <vscale x 16 x i1> %conv_x)
+ ret <vscale x 16 x i1> %result
+}
+define <vscale x 16 x i1> @test_and_convert_all_true_b32(<vscale x 4 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_b32(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[X:%.*]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %x)
+ %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+ ret <vscale x 16 x i1> %result
+}
+
+define <vscale x 16 x i1> @test_and_convert_all_true_b64(<vscale x 2 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_all_true_b64(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[X:%.*]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %x)
+ %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+ ret <vscale x 16 x i1> %result
+}
+
+; Negative test
+define <vscale x 16 x i1> @test_and_convert_different_granularities(<vscale x 8 x i1> %x, <vscale x 4 x i1> %y) {
+; CHECK-LABEL: @test_and_convert_different_granularities(
+; CHECK-NEXT: [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT: [[CONV_Y:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[Y:%.*]])
+; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_Y]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+ %conv_y = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %y)
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_y)
+ ret <vscale x 16 x i1> %result
+}
+
+; Negative test
+define <vscale x 16 x i1> @test_and_convert_non_all_true_predicate(<vscale x 16 x i1> %pred, <vscale x 8 x i1> %x) {
+; CHECK-LABEL: @test_and_convert_non_all_true_predicate(
+; CHECK-NEXT: [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT: [[CONV_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_TRUE]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+ %conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
+ ret <vscale x 16 x i1> %result
+}
+
+; Negative test
+define <vscale x 16 x i1> @test_and_convert_no_all_true(<vscale x 8 x i1> %x, <vscale x 8 x i1> %y) {
+; CHECK-LABEL: @test_and_convert_no_all_true(
+; CHECK-NEXT: [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
+; CHECK-NEXT: [[CONV_Y:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[Y:%.*]])
+; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_Y]])
+; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
+;
+ %conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
+ %conv_y = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %y)
+ %result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_y)
+ ret <vscale x 16 x i1> %result
+}
More information about the llvm-commits
mailing list