[llvm] [AArch64][SVE] Implement demanded bits for @llvm.aarch64.sve.cntp (PR #168714)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 06:38:23 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This allows DemandedBits to see that the SVE CNTP intrinsic will only ever produce small positive integers. The maximum value you could get here is 256, which is CNTP on a nxv16i1 on a machine with a 2048bit vector size (the maximum for SVE).
Using this various redundant operations (zexts, sexts, ands, ors, etc) can be eliminated.
---
Full diff: https://github.com/llvm/llvm-project/pull/168714.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+27-1)
- (modified) llvm/test/CodeGen/AArch64/sve-vector-compress.ll (+5-6)
- (modified) llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll (+56)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8f41f230b5521..809c2af499958 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19459,6 +19459,32 @@ static std::optional<unsigned> IsSVECntIntrinsic(SDValue S) {
return {};
}
+// Returns the element size associated with an SVE cnt[bhwdp] intrinsic. For
+// cntp (predicate), the element size corresponds to the legal (packed) SVE
+// vector type associated with the predicate. E.g. nxv4i1 returns 32.
+static std::optional<unsigned> GetSVECntElementSize(SDValue Op) {
+ if (auto ElementSize = IsSVECntIntrinsic(Op))
+ return ElementSize;
+ Intrinsic::ID IID = getIntrinsicID(Op.getNode());
+ if (IID != Intrinsic::aarch64_sve_cntp)
+ return {};
+ EVT PredVT = Op.getOperand(Op.getNumOperands() - 1).getValueType();
+ switch (PredVT.getSimpleVT().SimpleTy) {
+ case MVT::nxv1i1:
+ return 128;
+ case MVT::nxv2i1:
+ return 64;
+ case MVT::nxv4i1:
+ return 32;
+ case MVT::nxv8i1:
+ return 16;
+ case MVT::nxv16i1:
+ return 8;
+ default:
+ llvm_unreachable("unexpected predicate type");
+ }
+}
+
/// Calculates what the pre-extend type is, based on the extension
/// operation node provided by \p Extend.
///
@@ -31666,7 +31692,7 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
case ISD::INTRINSIC_WO_CHAIN: {
- if (auto ElementSize = IsSVECntIntrinsic(Op)) {
+ if (auto ElementSize = GetSVECntElementSize(Op)) {
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
if (!MaxSVEVectorSizeInBits)
MaxSVEVectorSizeInBits = AArch64::SVEMaxBitsPerVector;
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index cc3a3734a9721..f700dee0fb2e4 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -143,20 +143,19 @@ define <vscale x 8 x i32> @test_compress_large(<vscale x 8 x i32> %vec, <vscale
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: punpklo p2.h, p0.b
+; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: cnth x9
-; CHECK-NEXT: ptrue p1.s
+; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: sub x9, x9, #1
; CHECK-NEXT: punpkhi p0.h, p0.b
-; CHECK-NEXT: compact z0.s, p2, z0.s
-; CHECK-NEXT: cntp x8, p1, p2.s
+; CHECK-NEXT: compact z0.s, p1, z0.s
+; CHECK-NEXT: cntp x8, p2, p1.s
; CHECK-NEXT: compact z1.s, p0, z1.s
; CHECK-NEXT: str z0, [sp]
-; CHECK-NEXT: mov w8, w8
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: st1w { z1.s }, p1, [x9, x8, lsl #2]
+; CHECK-NEXT: st1w { z1.s }, p2, [x9, x8, lsl #2]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
diff --git a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
index 9572778484f8d..568abe718ad9b 100644
--- a/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
+++ b/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
@@ -80,6 +80,62 @@ define i64 @cntd_and_elimination() {
ret i64 %result
}
+define i64 @cntp_nxv16i1_and_elimination(<vscale x 16 x i1> %p) {
+; CHECK-LABEL: cntp_nxv16i1_and_elimination:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.b
+; CHECK-NEXT: and x9, x8, #0x1fc
+; CHECK-NEXT: add x0, x8, x9
+; CHECK-NEXT: ret
+ %cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %p, <vscale x 16 x i1> %p)
+ %and_redundant = and i64 %cntp, 511
+ %and_required = and i64 %cntp, 17179869180
+ %result = add i64 %and_redundant, %and_required
+ ret i64 %result
+}
+
+define i64 @cntp_nxv8i1_and_elimination(<vscale x 8 x i1> %p) {
+; CHECK-LABEL: cntp_nxv8i1_and_elimination:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.h
+; CHECK-NEXT: and x9, x8, #0xfc
+; CHECK-NEXT: add x0, x8, x9
+; CHECK-NEXT: ret
+ %cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %p, <vscale x 8 x i1> %p)
+ %and_redundant = and i64 %cntp, 1023
+ %and_required = and i64 %cntp, 17179869180
+ %result = add i64 %and_redundant, %and_required
+ ret i64 %result
+}
+
+define i64 @cntp_nxv4i1_and_elimination(<vscale x 4 x i1> %p) {
+; CHECK-LABEL: cntp_nxv4i1_and_elimination:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.s
+; CHECK-NEXT: and x9, x8, #0x7c
+; CHECK-NEXT: add x0, x8, x9
+; CHECK-NEXT: ret
+ %cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %p, <vscale x 4 x i1> %p)
+ %and_redundant = and i64 %cntp, 127
+ %and_required = and i64 %cntp, 17179869180
+ %result = add i64 %and_redundant, %and_required
+ ret i64 %result
+}
+
+define i64 @cntp_nxv2i1_and_elimination(<vscale x 2 x i1> %p) {
+; CHECK-LABEL: cntp_nxv2i1_and_elimination:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.d
+; CHECK-NEXT: and x9, x8, #0x3c
+; CHECK-NEXT: add x0, x8, x9
+; CHECK-NEXT: ret
+ %cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %p, <vscale x 2 x i1> %p)
+ %and_redundant = and i64 %cntp, 63
+ %and_required = and i64 %cntp, 17179869180
+ %result = add i64 %and_redundant, %and_required
+ ret i64 %result
+}
+
define i64 @vscale_trunc_zext() vscale_range(1,16) {
; CHECK-LABEL: vscale_trunc_zext:
; CHECK: // %bb.0:
``````````
</details>
https://github.com/llvm/llvm-project/pull/168714
More information about the llvm-commits
mailing list