[llvm] ab811e7 - [AArch64] Fix BE popcount casts. (#129879)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 5 12:08:37 PST 2025


Author: David Green
Date: 2025-03-05T20:08:34Z
New Revision: ab811e75734a77247dae6df1579fa6f29394f200

URL: https://github.com/llvm/llvm-project/commit/ab811e75734a77247dae6df1579fa6f29394f200
DIFF: https://github.com/llvm/llvm-project/commit/ab811e75734a77247dae6df1579fa6f29394f200.diff

LOG: [AArch64] Fix BE popcount casts. (#129879)

A bitcast, being defined as a load and a store, can change the lane
order. We need to use a NVCAST instead to keep the lanes out of the
VADDV the same in big-endian. The extracting from a v2i64 vector is
to keep the types of the nvcast legal, but also allow us to replace a
lane mov with a mov 0.

Fixes #129843

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/arm64-popcnt.ll
    llvm/test/CodeGen/AArch64/parity.ll
    llvm/test/CodeGen/AArch64/popcount.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ec8b18fd6d0dd..e7d141d22c7c7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10807,7 +10807,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
     if (VT == MVT::i32)
       AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, AddV,
                          DAG.getConstant(0, DL, MVT::i64));
-    AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV);
+    else
+      AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                         DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, AddV),
+                         DAG.getConstant(0, DL, MVT::i64));
     if (IsParity)
       AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT));
     return AddV;
@@ -10816,7 +10819,10 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
 
     SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
     SDValue AddV = DAG.getNode(AArch64ISD::UADDV, DL, MVT::v16i8, CtPop);
-    AddV = DAG.getNode(ISD::BITCAST, DL, VT, AddV);
+    AddV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64,
+                       DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v2i64, AddV),
+                       DAG.getConstant(0, DL, MVT::i64));
+    AddV = DAG.getZExtOrTrunc(AddV, DL, VT);
     if (IsParity)
       AddV = DAG.getNode(ISD::AND, DL, VT, AddV, DAG.getConstant(1, DL, VT));
     return AddV;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index 369667ec33f66..d06e42f5405ef 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -129,7 +129,6 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK-BE-NEXT:    rev64 v0.8b, v0.8b
 ; CHECK-BE-NEXT:    cnt v0.8b, v0.8b
 ; CHECK-BE-NEXT:    addv b0, v0.8b
-; CHECK-BE-NEXT:    rev64 v0.8b, v0.8b
 ; CHECK-BE-NEXT:    fmov x0, d0
 ; CHECK-BE-NEXT:    ret
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
@@ -436,9 +435,9 @@ define i128 @cnt128(i128 %x) nounwind readnone {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    mov.d v0[1], x1
+; CHECK-NEXT:    mov x1, xzr
 ; CHECK-NEXT:    cnt.16b v0, v0
 ; CHECK-NEXT:    addv.16b b0, v0
-; CHECK-NEXT:    mov.d x1, v0[1]
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
 ;
@@ -481,13 +480,12 @@ define i128 @cnt128(i128 %x) nounwind readnone {
 ; CHECK-BE-LABEL: cnt128:
 ; CHECK-BE:       // %bb.0:
 ; CHECK-BE-NEXT:    fmov d0, x0
+; CHECK-BE-NEXT:    mov x0, xzr
 ; CHECK-BE-NEXT:    mov v0.d[1], x1
 ; CHECK-BE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECK-BE-NEXT:    cnt v0.16b, v0.16b
 ; CHECK-BE-NEXT:    addv b0, v0.16b
-; CHECK-BE-NEXT:    rev64 v0.16b, v0.16b
-; CHECK-BE-NEXT:    mov x1, v0.d[1]
-; CHECK-BE-NEXT:    fmov x0, d0
+; CHECK-BE-NEXT:    fmov x1, d0
 ; CHECK-BE-NEXT:    ret
   %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
   ret i128 %cnt

diff  --git a/llvm/test/CodeGen/AArch64/parity.ll b/llvm/test/CodeGen/AArch64/parity.ll
index 1e51793fb5f91..91515277cb3f6 100644
--- a/llvm/test/CodeGen/AArch64/parity.ll
+++ b/llvm/test/CodeGen/AArch64/parity.ll
@@ -159,7 +159,7 @@ define i32 @parity_64_trunc(i64 %x) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    cnt v0.8b, v0.8b
 ; CHECK-NEXT:    addv b0, v0.8b
-; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 6cc925f0ae91f..e664e73594923 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -41,8 +41,8 @@ define i8 @popcount128(ptr nocapture nonnull readonly %0) {
 ; BE-NEXT:    rev64 v0.16b, v0.16b
 ; BE-NEXT:    cnt v0.16b, v0.16b
 ; BE-NEXT:    addv b0, v0.16b
-; BE-NEXT:    rev32 v0.16b, v0.16b
-; BE-NEXT:    mov w0, v0.s[3]
+; BE-NEXT:    rev64 v0.4s, v0.4s
+; BE-NEXT:    mov w0, v0.s[1]
 ; BE-NEXT:    ret
 ;
 ; GISEL-LABEL: popcount128:
@@ -138,10 +138,10 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
 ; BE-NEXT:    cnt v1.16b, v1.16b
 ; BE-NEXT:    addv b0, v0.16b
 ; BE-NEXT:    addv b1, v1.16b
-; BE-NEXT:    rev32 v0.16b, v0.16b
-; BE-NEXT:    rev32 v1.16b, v1.16b
-; BE-NEXT:    mov w8, v0.s[3]
-; BE-NEXT:    mov w9, v1.s[3]
+; BE-NEXT:    rev64 v0.4s, v0.4s
+; BE-NEXT:    rev64 v1.4s, v1.4s
+; BE-NEXT:    mov w8, v0.s[1]
+; BE-NEXT:    mov w9, v1.s[1]
 ; BE-NEXT:    add w0, w9, w8
 ; BE-NEXT:    ret
 ;
@@ -227,22 +227,21 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
 ; CHECK:       // %bb.0: // %Entry
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    mov v0.d[1], x1
+; CHECK-NEXT:    mov x1, xzr
 ; CHECK-NEXT:    cnt v0.16b, v0.16b
 ; CHECK-NEXT:    addv b0, v0.16b
-; CHECK-NEXT:    mov x1, v0.d[1]
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
 ;
 ; BE-LABEL: popcount1x128:
 ; BE:       // %bb.0: // %Entry
 ; BE-NEXT:    fmov d0, x0
+; BE-NEXT:    mov x0, xzr
 ; BE-NEXT:    mov v0.d[1], x1
 ; BE-NEXT:    rev64 v0.16b, v0.16b
 ; BE-NEXT:    cnt v0.16b, v0.16b
 ; BE-NEXT:    addv b0, v0.16b
-; BE-NEXT:    rev64 v0.16b, v0.16b
-; BE-NEXT:    mov x1, v0.d[1]
-; BE-NEXT:    fmov x0, d0
+; BE-NEXT:    fmov x1, d0
 ; BE-NEXT:    ret
 ;
 ; GISEL-LABEL: popcount1x128:


        


More information about the llvm-commits mailing list