[llvm] [AArch64][SVE] Upgrade PTRUE patterns to ALL when they match vector length. (PR #172993)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 19 03:47:18 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Ricardo Jesus (rj-jesus)
<details>
<summary>Changes</summary>
When the number of active elements of a PTRUE pattern matches the scalable vector length, we can upgrade the pattern to ALL. This enables CSE with similar PTRUEs as well as other simplifications.
There was similar logic in `getPredicateForFixedLengthVector`, which I've removed as it should no longer be needed with this change.
I believe this change also makes the VLS handling in `isAllActivePredicate` ([link](https://github.com/llvm/llvm-project/blob/f91657dbd51c83a75e1c8cc219ffcab0101eedab/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp#L15339)) redundant as I'm not aware of any PTRUEs it may match that are not created through `getPTrue`, but I left it as is in case that's not true. If others agree that this code can be removed, I'm happy to do so as well.
---
Full diff: https://github.com/llvm/llvm-project/pull/172993.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+11-10)
- (modified) llvm/test/CodeGen/AArch64/active_lane_mask.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 837393b0cbdcd..5c3eb8ad20a20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5894,6 +5894,17 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {
if (Pattern == AArch64SVEPredPattern::all)
return DAG.getConstant(1, DL, VT);
+
+ // When the number of active elements of a pattern matches the scalable vector
+ // length, we can upgrade the pattern to ALL and emit a splat instead.
+ if (unsigned PatNumElts = getNumElementsFromSVEPredPattern(Pattern)) {
+ const AArch64Subtarget &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ unsigned NumElts = VT.getVectorMinNumElements();
+ unsigned VScale = Subtarget.getSVEVectorSizeInBits() / 128;
+ if (PatNumElts == (NumElts * VScale))
+ return DAG.getConstant(1, DL, VT);
+ }
+
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
@@ -30326,16 +30337,6 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
getSVEPredPatternFromNumElements(VT.getVectorNumElements());
assert(PgPattern && "Unexpected element count for SVE predicate");
- // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
- // AArch64SVEPredPattern::all, which can enable the use of unpredicated
- // variants of instructions when available.
- const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
- unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
- unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
- if (MaxSVESize && MinSVESize == MaxSVESize &&
- MaxSVESize == VT.getSizeInBits())
- PgPattern = AArch64SVEPredPattern::all;
-
MVT MaskVT;
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
default:
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 879dd4c12c0ba..b77e90f6fdc45 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -453,7 +453,7 @@ entry:
define <vscale x 16 x i1> @lane_mask_nxv16i1_imm256() vscale_range(16, 16) {
; CHECK-LABEL: lane_mask_nxv16i1_imm256:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ret
entry:
%active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256)
diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
index c3322ca38f9e5..d0026db0176e1 100644
--- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
@@ -208,7 +208,7 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(ptr %
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p0.d, vl8
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
index 72d839a21a29f..2aef74a91c056 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -563,7 +563,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale
define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
; CHECK-LABEL: splice_nxv16i8_neg256:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: rev p0.b, p0.b
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
index a82998473fe68..4005e7d99400d 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
@@ -110,7 +110,7 @@ define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
define <vscale x 16 x i1> @whilele_b_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilele_b_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255)
ret <vscale x 16 x i1> %out
@@ -245,7 +245,7 @@ define <vscale x 16 x i1> @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
define <vscale x 16 x i1> @whilelo_b_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilelo_b_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256)
ret <vscale x 16 x i1> %out
@@ -370,7 +370,7 @@ define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
define <vscale x 16 x i1> @whilels_b_ii_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilels_b_ii_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255)
ret <vscale x 16 x i1> %out
@@ -504,7 +504,7 @@ define <vscale x 16 x i1> @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
define <vscale x 16 x i1> @whilelt_b_ii_vl_maximum() vscale_range(16, 16) {
; CHECK-LABEL: whilelt_b_ii_vl_maximum:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b, vl256
+; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256)
ret <vscale x 16 x i1> %out
``````````
</details>
https://github.com/llvm/llvm-project/pull/172993
More information about the llvm-commits
mailing list