[llvm] [AArch64] Improve non-SVE popcount for 32bit and 64 bit using udot (PR #95881)

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 18 09:57:40 PDT 2024


================
@@ -67,25 +108,161 @@ Entry:
 declare i256 @llvm.ctpop.i256(i256)
 
 define <1 x i128> @popcount1x128(<1 x i128> %0) {
-; CHECK-LABEL: popcount1x128:
-; CHECK:       // %bb.0: // %Entry
-; CHECK-NEXT:    // implicit-def: $q0
-; CHECK-NEXT:    mov v0.d[0], x0
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    cnt v0.16b, v0.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
-; CHECK-NEXT:    // kill: def $q0 killed $h0
-; CHECK-NEXT:    mov x1, xzr
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    // kill: def $x0 killed $w0
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    bfi x0, x8, #32, #32
-; CHECK-NEXT:    ret
+; NEON-LABEL: popcount1x128:
+; NEON:       // %bb.0: // %Entry
+; NEON-NEXT:    fmov d1, x0
+; NEON-NEXT:    movi v0.2d, #0000000000000000
+; NEON-NEXT:    mov v1.d[1], x1
+; NEON-NEXT:    cnt v1.16b, v1.16b
+; NEON-NEXT:    uaddlv h1, v1.16b
+; NEON-NEXT:    mov v0.s[0], v1.s[0]
+; NEON-NEXT:    mov x1, v0.d[1]
+; NEON-NEXT:    fmov x0, d0
+; NEON-NEXT:    ret
+;
+; DOT-LABEL: popcount1x128:
+; DOT:       // %bb.0: // %Entry
+; DOT-NEXT:    fmov d1, x0
+; DOT-NEXT:    movi v0.2d, #0000000000000000
+; DOT-NEXT:    mov v1.d[1], x1
+; DOT-NEXT:    cnt v1.16b, v1.16b
+; DOT-NEXT:    uaddlv h1, v1.16b
+; DOT-NEXT:    mov v0.s[0], v1.s[0]
+; DOT-NEXT:    mov x1, v0.d[1]
+; DOT-NEXT:    fmov x0, d0
+; DOT-NEXT:    ret
+;
+; SVE-LABEL: popcount1x128:
+; SVE:       // %bb.0: // %Entry
+; SVE-NEXT:    fmov d1, x0
+; SVE-NEXT:    movi v0.2d, #0000000000000000
+; SVE-NEXT:    mov v1.d[1], x1
+; SVE-NEXT:    cnt v1.16b, v1.16b
+; SVE-NEXT:    uaddlv h1, v1.16b
+; SVE-NEXT:    mov v0.s[0], v1.s[0]
+; SVE-NEXT:    mov x1, v0.d[1]
+; SVE-NEXT:    fmov x0, d0
+; SVE-NEXT:    ret
 Entry:
   %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
----------------
davemgreen wrote:

Intrinsic names would be llvm.ctpop.v1i128(...)

https://github.com/llvm/llvm-project/pull/95881


More information about the llvm-commits mailing list