[llvm] [AArch64] Fold addv(ctpop) to ctpop if the operand is known to be 8-bit (PR #159086)
Hongyu Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 06:28:19 PDT 2025
https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/159086
>From 071886a46e96964bf5f25c0b28e9ec1a36d16716 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 19:41:21 +0800
Subject: [PATCH 1/3] Precommit tests
---
llvm/test/CodeGen/AArch64/ctpop.ll | 125 +++++++++++++++++++++++++++++
1 file changed, 125 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index c739be95cd243..013bcd4e29d3d 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -505,3 +505,128 @@ entry:
%s = call <4 x i128> @llvm.ctpop(<4 x i128> %d)
ret <4 x i128> %s
}
+
+define i8 @i8(i8 %x) {
+; CHECK-SD-LABEL: i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: cnt v0.8b, v0.8b
+; CHECK-SD-NEXT: addv b0, v0.8b
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: and x8, x0, #0xff
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: cnt v0.8b, v0.8b
+; CHECK-GI-NEXT: uaddlv h0, v0.8b
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
+entry:
+ %s = call i8 @llvm.ctpop.i8(i8 %x)
+ ret i8 %s
+}
+
+define i16 @i16_mask(i16 %x) {
+; CHECK-SD-LABEL: i16_mask:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: cnt v0.8b, v0.8b
+; CHECK-SD-NEXT: addv b0, v0.8b
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i16_mask:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: and w8, w0, #0xff
+; CHECK-GI-NEXT: and x8, x8, #0xffff
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: cnt v0.8b, v0.8b
+; CHECK-GI-NEXT: uaddlv h0, v0.8b
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
+entry:
+ %and = and i16 %x, 255
+ %s = call i16 @llvm.ctpop.i16(i16 %and)
+ ret i16 %s
+}
+
+define i32 @i32_mask(i32 %x) {
+; CHECK-SD-LABEL: i32_mask:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: cnt v0.8b, v0.8b
+; CHECK-SD-NEXT: addv b0, v0.8b
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i32_mask:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: and w8, w0, #0xff
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: cnt v0.8b, v0.8b
+; CHECK-GI-NEXT: uaddlv h0, v0.8b
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
+entry:
+ %and = and i32 %x, 255
+ %s = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %s
+}
+
+define i32 @i32_mask_negative(i32 %x) {
+; CHECK-SD-LABEL: i32_mask_negative:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: cnt v0.8b, v0.8b
+; CHECK-SD-NEXT: addv b0, v0.8b
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i32_mask_negative:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: cnt v0.8b, v0.8b
+; CHECK-GI-NEXT: uaddlv h0, v0.8b
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
+entry:
+ %and = and i32 %x, 65535
+ %s = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %s
+}
+
+define i128 @i128_mask(i128 %x) {
+; CHECK-SD-LABEL: i128_mask:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: and x8, x0, #0xff
+; CHECK-SD-NEXT: mov x1, xzr
+; CHECK-SD-NEXT: mov v0.d[0], x8
+; CHECK-SD-NEXT: cnt v0.16b, v0.16b
+; CHECK-SD-NEXT: addv b0, v0.16b
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i128_mask:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: and x8, x0, #0xff
+; CHECK-GI-NEXT: mov x1, xzr
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: mov v0.d[1], xzr
+; CHECK-GI-NEXT: cnt v0.16b, v0.16b
+; CHECK-GI-NEXT: uaddlv h0, v0.16b
+; CHECK-GI-NEXT: mov w0, v0.s[0]
+; CHECK-GI-NEXT: ret
+entry:
+ %and = and i128 %x, 255
+ %s = call i128 @llvm.ctpop.i128(i128 %and)
+ ret i128 %s
+}
>From 952945f4bb1792b96a209f9585a1b6d8ab01e9a8 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 20:38:48 +0800
Subject: [PATCH 2/3] [AArch64] Fold addv(ctpop) to ctpop if the operand is
known to be 8-bit
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 9 +++++++++
llvm/test/CodeGen/AArch64/abds.ll | 6 +-----
llvm/test/CodeGen/AArch64/abdu.ll | 6 +-----
llvm/test/CodeGen/AArch64/ctpop.ll | 3 ---
4 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5ffaf2c49b4c0..b114aae3deb63 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18995,6 +18995,15 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
else if (SDValue R = performUADDVZextCombine(A, DAG))
return R;
}
+
+ // uaddv(a) --> a if all lanes of a is known to zero except the 0th lane.
+ MVT VT = N->getSimpleValueType(0);
+ APInt Mask = APInt::getAllOnes(VT.getVectorNumElements());
+ Mask.clearBit(0);
+ KnownBits KnownLeadingLanes = DAG.computeKnownBits(A, Mask);
+ if (KnownLeadingLanes.isZero())
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, A);
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bf52e71ec21fe..c42ef1a96e5a3 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -497,13 +497,9 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind {
define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: subs w8, w8, w1, sxth
-; CHECK-NEXT: addp d0, v0.2d
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: add x0, x9, x8
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%ea = sext i16 %a to i32
%eb = sext i16 %b to i32
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 8d2b0b0742d7d..9fbcc1c82017f 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -362,13 +362,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
define i64 @vector_legalized(i16 %a, i16 %b) {
; CHECK-LABEL: vector_legalized:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w8, w8, mi
-; CHECK-NEXT: addp d0, v0.2d
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: add x0, x9, x8
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%ea = zext i16 %a to i32
%eb = zext i16 %b to i32
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index 013bcd4e29d3d..d547b6bec5b83 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -512,7 +512,6 @@ define i8 @i8(i8 %x) {
; CHECK-SD-NEXT: and w8, w0, #0xff
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: cnt v0.8b, v0.8b
-; CHECK-SD-NEXT: addv b0, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
;
@@ -536,7 +535,6 @@ define i16 @i16_mask(i16 %x) {
; CHECK-SD-NEXT: and w8, w0, #0xff
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: cnt v0.8b, v0.8b
-; CHECK-SD-NEXT: addv b0, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
;
@@ -561,7 +559,6 @@ define i32 @i32_mask(i32 %x) {
; CHECK-SD-NEXT: and w8, w0, #0xff
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: cnt v0.8b, v0.8b
-; CHECK-SD-NEXT: addv b0, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
;
>From 4b52fbe4378961a5f8621e4f970ee1d615e96981 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 16 Sep 2025 21:27:53 +0800
Subject: [PATCH 3/3] format
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b114aae3deb63..511cbe5eccd32 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18998,11 +18998,11 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
// uaddv(a) --> a if all lanes of a is known to zero except the 0th lane.
MVT VT = N->getSimpleValueType(0);
- APInt Mask = APInt::getAllOnes(VT.getVectorNumElements());
- Mask.clearBit(0);
- KnownBits KnownLeadingLanes = DAG.computeKnownBits(A, Mask);
- if (KnownLeadingLanes.isZero())
- return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, A);
+ APInt Mask = APInt::getAllOnes(VT.getVectorNumElements());
+ Mask.clearBit(0);
+ KnownBits KnownLeadingLanes = DAG.computeKnownBits(A, Mask);
+ if (KnownLeadingLanes.isZero())
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, A);
return SDValue();
}
More information about the llvm-commits
mailing list