[llvm] [AArch64] Improve non-SVE popcount for 32bit and 64 bit using udot (PR #95881)
Tim Gymnich via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 18 11:07:24 PDT 2024
================
@@ -67,25 +108,161 @@ Entry:
declare i256 @llvm.ctpop.i256(i256)
define <1 x i128> @popcount1x128(<1 x i128> %0) {
-; CHECK-LABEL: popcount1x128:
-; CHECK: // %bb.0: // %Entry
-; CHECK-NEXT: // implicit-def: $q0
-; CHECK-NEXT: mov v0.d[0], x0
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: cnt v0.16b, v0.16b
-; CHECK-NEXT: uaddlv h0, v0.16b
-; CHECK-NEXT: // kill: def $q0 killed $h0
-; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: // kill: def $x0 killed $w0
-; CHECK-NEXT: // kill: def $x8 killed $w8
-; CHECK-NEXT: bfi x0, x8, #32, #32
-; CHECK-NEXT: ret
+; NEON-LABEL: popcount1x128:
+; NEON: // %bb.0: // %Entry
+; NEON-NEXT: fmov d1, x0
+; NEON-NEXT: movi v0.2d, #0000000000000000
+; NEON-NEXT: mov v1.d[1], x1
+; NEON-NEXT: cnt v1.16b, v1.16b
+; NEON-NEXT: uaddlv h1, v1.16b
+; NEON-NEXT: mov v0.s[0], v1.s[0]
+; NEON-NEXT: mov x1, v0.d[1]
+; NEON-NEXT: fmov x0, d0
+; NEON-NEXT: ret
+;
+; DOT-LABEL: popcount1x128:
+; DOT: // %bb.0: // %Entry
+; DOT-NEXT: fmov d1, x0
+; DOT-NEXT: movi v0.2d, #0000000000000000
+; DOT-NEXT: mov v1.d[1], x1
+; DOT-NEXT: cnt v1.16b, v1.16b
+; DOT-NEXT: uaddlv h1, v1.16b
+; DOT-NEXT: mov v0.s[0], v1.s[0]
+; DOT-NEXT: mov x1, v0.d[1]
+; DOT-NEXT: fmov x0, d0
+; DOT-NEXT: ret
+;
+; SVE-LABEL: popcount1x128:
+; SVE: // %bb.0: // %Entry
+; SVE-NEXT: fmov d1, x0
+; SVE-NEXT: movi v0.2d, #0000000000000000
+; SVE-NEXT: mov v1.d[1], x1
+; SVE-NEXT: cnt v1.16b, v1.16b
+; SVE-NEXT: uaddlv h1, v1.16b
+; SVE-NEXT: mov v0.s[0], v1.s[0]
+; SVE-NEXT: mov x1, v0.d[1]
+; SVE-NEXT: fmov x0, d0
+; SVE-NEXT: ret
Entry:
%1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
----------------
tgymnich wrote:
done
https://github.com/llvm/llvm-project/pull/95881
More information about the llvm-commits
mailing list