[llvm] [AArch64] Fix BE popcount casts. (PR #129879)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 04:22:36 PST 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/129879
A bitcast, being defined as a load and a store, can change the lane order. We need to use a NVCAST instead to keep the lanes out of the VADDV in the same in big-endian. The extracting from a v2i64 vector is to keep the types of the nvcast legal, but also allow us to replace a lane mov with a mov 0.
Fixes #129843
>From f26225a9b258162f5964579cd8126b09ee06b0a7 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 5 Mar 2025 11:25:11 +0000
Subject: [PATCH] [AArch64] Fix BE popcount casts.
A bitcast, being defined as a load and a store, can change the lane order. We
need to use a NVCAST instead to keep the lanes out of the VADDV in the same
in big-endiad. The extracting from a v2i64 vector is to keep the types of the
nvcast legal, but also allow us to replace a lane mov with a mov 0.
Fixes #129843
---
.../Target/AArch64/AArch64ISelLowering.cpp | 10 ++++++++--
llvm/test/CodeGen/AArch64/arm64-popcnt.ll | 8 +++-----
llvm/test/CodeGen/AArch64/parity.ll | 2 +-
llvm/test/CodeGen/AArch64/popcount.ll | 19 +++++++++----------
4 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 52ec4753ec4c1..ef5d833c03428 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10788,7 +10788,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
if (VT == MVT::i32)
AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, AddV,
DAG.getConstant(0, DL, MVT::i64));
- AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV);
+ else
+ AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, AddV),
+ DAG.getConstant(0, DL, MVT::i64));
if (IsParity)
AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT));
return AddV;
@@ -10797,7 +10800,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
SDValue AddV = DAG.getNode(AArch64ISD::UADDV, DL, MVT::v16i8, CtPop);
- AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV);
+ AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64,
+ DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v2i64, AddV),
+ DAG.getConstant(0, DL, MVT::i64));
+ AddV = DAG.getZExtOrTrunc(AddV, DL, VT);
if (IsParity)
AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT));
return AddV;
diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index 369667ec33f66..d06e42f5405ef 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -129,7 +129,6 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: cnt v0.8b, v0.8b
; CHECK-BE-NEXT: addv b0, v0.8b
-; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: fmov x0, d0
; CHECK-BE-NEXT: ret
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
@@ -436,9 +435,9 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: mov.d v0[1], x1
+; CHECK-NEXT: mov x1, xzr
; CHECK-NEXT: cnt.16b v0, v0
; CHECK-NEXT: addv.16b b0, v0
-; CHECK-NEXT: mov.d x1, v0[1]
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
@@ -481,13 +480,12 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; CHECK-BE-LABEL: cnt128:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov d0, x0
+; CHECK-BE-NEXT: mov x0, xzr
; CHECK-BE-NEXT: mov v0.d[1], x1
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
; CHECK-BE-NEXT: cnt v0.16b, v0.16b
; CHECK-BE-NEXT: addv b0, v0.16b
-; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
-; CHECK-BE-NEXT: mov x1, v0.d[1]
-; CHECK-BE-NEXT: fmov x0, d0
+; CHECK-BE-NEXT: fmov x1, d0
; CHECK-BE-NEXT: ret
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
diff --git a/llvm/test/CodeGen/AArch64/parity.ll b/llvm/test/CodeGen/AArch64/parity.ll
index 1e51793fb5f91..91515277cb3f6 100644
--- a/llvm/test/CodeGen/AArch64/parity.ll
+++ b/llvm/test/CodeGen/AArch64/parity.ll
@@ -159,7 +159,7 @@ define i32 @parity_64_trunc(i64 %x) {
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: cnt v0.8b, v0.8b
; CHECK-NEXT: addv b0, v0.8b
-; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 6cc925f0ae91f..e664e73594923 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -41,8 +41,8 @@ define i8 @popcount128(ptr nocapture nonnull readonly %0) {
; BE-NEXT: rev64 v0.16b, v0.16b
; BE-NEXT: cnt v0.16b, v0.16b
; BE-NEXT: addv b0, v0.16b
-; BE-NEXT: rev32 v0.16b, v0.16b
-; BE-NEXT: mov w0, v0.s[3]
+; BE-NEXT: rev64 v0.4s, v0.4s
+; BE-NEXT: mov w0, v0.s[1]
; BE-NEXT: ret
;
; GISEL-LABEL: popcount128:
@@ -138,10 +138,10 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
; BE-NEXT: cnt v1.16b, v1.16b
; BE-NEXT: addv b0, v0.16b
; BE-NEXT: addv b1, v1.16b
-; BE-NEXT: rev32 v0.16b, v0.16b
-; BE-NEXT: rev32 v1.16b, v1.16b
-; BE-NEXT: mov w8, v0.s[3]
-; BE-NEXT: mov w9, v1.s[3]
+; BE-NEXT: rev64 v0.4s, v0.4s
+; BE-NEXT: rev64 v1.4s, v1.4s
+; BE-NEXT: mov w8, v0.s[1]
+; BE-NEXT: mov w9, v1.s[1]
; BE-NEXT: add w0, w9, w8
; BE-NEXT: ret
;
@@ -227,22 +227,21 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
; CHECK: // %bb.0: // %Entry
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: mov v0.d[1], x1
+; CHECK-NEXT: mov x1, xzr
; CHECK-NEXT: cnt v0.16b, v0.16b
; CHECK-NEXT: addv b0, v0.16b
-; CHECK-NEXT: mov x1, v0.d[1]
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
; BE-LABEL: popcount1x128:
; BE: // %bb.0: // %Entry
; BE-NEXT: fmov d0, x0
+; BE-NEXT: mov x0, xzr
; BE-NEXT: mov v0.d[1], x1
; BE-NEXT: rev64 v0.16b, v0.16b
; BE-NEXT: cnt v0.16b, v0.16b
; BE-NEXT: addv b0, v0.16b
-; BE-NEXT: rev64 v0.16b, v0.16b
-; BE-NEXT: mov x1, v0.d[1]
-; BE-NEXT: fmov x0, d0
+; BE-NEXT: fmov x1, d0
; BE-NEXT: ret
;
; GISEL-LABEL: popcount1x128:
More information about the llvm-commits
mailing list