[llvm] [AArch64][GlobalISel] Improve non-SVE popcount for 32bit and 64 bit using udot (PR #96409)

Tim Gymnich via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 25 05:46:47 PDT 2024


================
@@ -67,25 +140,338 @@ Entry:
 declare i256 @llvm.ctpop.i256(i256)
 
 define <1 x i128> @popcount1x128(<1 x i128> %0) {
+; CHECKO0-LABEL: popcount1x128:
+; CHECKO0:       // %bb.0: // %Entry
+; CHECKO0-NEXT:    // implicit-def: $q0
+; CHECKO0-NEXT:    mov v0.d[0], x0
+; CHECKO0-NEXT:    mov v0.d[1], x1
+; CHECKO0-NEXT:    cnt v0.16b, v0.16b
+; CHECKO0-NEXT:    uaddlv h0, v0.16b
+; CHECKO0-NEXT:    // kill: def $q0 killed $h0
+; CHECKO0-NEXT:    mov x1, xzr
+; CHECKO0-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECKO0-NEXT:    fmov w0, s0
+; CHECKO0-NEXT:    mov w8, wzr
+; CHECKO0-NEXT:    // kill: def $x0 killed $w0
+; CHECKO0-NEXT:    // kill: def $x8 killed $w8
+; CHECKO0-NEXT:    bfi x0, x8, #32, #32
+; CHECKO0-NEXT:    ret
+;
+; GISEL-LABEL: popcount1x128:
+; GISEL:       // %bb.0: // %Entry
+; GISEL-NEXT:    // implicit-def: $q0
+; GISEL-NEXT:    mov v0.d[0], x0
+; GISEL-NEXT:    mov v0.d[1], x1
+; GISEL-NEXT:    cnt v0.16b, v0.16b
+; GISEL-NEXT:    uaddlv h0, v0.16b
+; GISEL-NEXT:    // kill: def $q0 killed $h0
+; GISEL-NEXT:    mov x1, xzr
+; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; GISEL-NEXT:    fmov w0, s0
+; GISEL-NEXT:    mov w8, wzr
+; GISEL-NEXT:    // kill: def $x0 killed $w0
+; GISEL-NEXT:    // kill: def $x8 killed $w8
+; GISEL-NEXT:    bfi x0, x8, #32, #32
+; GISEL-NEXT:    ret
+;
 ; CHECK-LABEL: popcount1x128:
 ; CHECK:       // %bb.0: // %Entry
-; CHECK-NEXT:    // implicit-def: $q0
 ; CHECK-NEXT:    mov v0.d[0], x0
 ; CHECK-NEXT:    mov v0.d[1], x1
+; CHECK-NEXT:    mov x1, xzr
 ; CHECK-NEXT:    cnt v0.16b, v0.16b
 ; CHECK-NEXT:    uaddlv h0, v0.16b
-; CHECK-NEXT:    // kill: def $q0 killed $h0
-; CHECK-NEXT:    mov x1, xzr
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    // kill: def $x0 killed $w0
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    bfi x0, x8, #32, #32
+; CHECK-NEXT:    mov w0, v0.s[0]
 ; CHECK-NEXT:    ret
 Entry:
-  %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)
+  %1 = tail call <1 x i128> @llvm.ctpop.v1i128(<1 x i128> %0)
   ret <1 x i128> %1
 }
 
-declare <1 x i128> @llvm.ctpop.v1.i128(<1 x i128>)
+declare <1 x i128> @llvm.ctpop.v1i128(<1 x i128>)
+
+define <2 x i64> @popcount2x64(<2 x i64> %0) {
+; CHECKO0-LABEL: popcount2x64:
+; CHECKO0:       // %bb.0: // %Entry
+; CHECKO0-NEXT:    cnt v0.16b, v0.16b
+; CHECKO0-NEXT:    uaddlp v0.8h, v0.16b
+; CHECKO0-NEXT:    uaddlp v0.4s, v0.8h
+; CHECKO0-NEXT:    uaddlp v0.2d, v0.4s
+; CHECKO0-NEXT:    ret
+;
+; GISEL-LABEL: popcount2x64:
+; GISEL:       // %bb.0: // %Entry
+; GISEL-NEXT:    cnt v0.16b, v0.16b
+; GISEL-NEXT:    uaddlp v0.8h, v0.16b
+; GISEL-NEXT:    uaddlp v0.4s, v0.8h
+; GISEL-NEXT:    uaddlp v0.2d, v0.4s
+; GISEL-NEXT:    ret
+;
+; NEON-GISEL-LABEL: popcount2x64:
+; NEON-GISEL:       // %bb.0: // %Entry
+; NEON-GISEL-NEXT:    cnt v0.16b, v0.16b
+; NEON-GISEL-NEXT:    uaddlp v0.8h, v0.16b
+; NEON-GISEL-NEXT:    uaddlp v0.4s, v0.8h
+; NEON-GISEL-NEXT:    uaddlp v0.2d, v0.4s
+; NEON-GISEL-NEXT:    ret
+;
+; DOT-GISEL-LABEL: popcount2x64:
+; DOT-GISEL:       // %bb.0: // %Entry
+; DOT-GISEL-NEXT:    movi v1.2d, #0000000000000000
+; DOT-GISEL-NEXT:    cnt v0.16b, v0.16b
+; DOT-GISEL-NEXT:    movi v2.16b, #1
+; DOT-GISEL-NEXT:    udot v1.4s, v2.16b, v0.16b
+; DOT-GISEL-NEXT:    uaddlp v0.2d, v1.4s
+; DOT-GISEL-NEXT:    ret
+;
+; SVE-GISEL-LABEL: popcount2x64:
+; SVE-GISEL:       // %bb.0: // %Entry
+; SVE-GISEL-NEXT:    cnt v0.16b, v0.16b
+; SVE-GISEL-NEXT:    uaddlp v0.8h, v0.16b
+; SVE-GISEL-NEXT:    uaddlp v0.4s, v0.8h
+; SVE-GISEL-NEXT:    uaddlp v0.2d, v0.4s
+; SVE-GISEL-NEXT:    ret
+; NEON-LABEL: popcount2x64:
+; NEON:       // %bb.0: // %Entry
+; NEON-NEXT:    cnt v0.16b, v0.16b
+; NEON-NEXT:    uaddlp v0.8h, v0.16b
+; NEON-NEXT:    uaddlp v0.4s, v0.8h
+; NEON-NEXT:    uaddlp v0.2d, v0.4s
+; NEON-NEXT:    ret
+; DOT-LABEL: popcount2x64:
+; DOT:       // %bb.0: // %Entry
+; DOT-NEXT:    movi v1.16b, #1
+; DOT-NEXT:    cnt v0.16b, v0.16b
+; DOT-NEXT:    movi v2.2d, #0000000000000000
+; DOT-NEXT:    udot v2.4s, v1.16b, v0.16b
+; DOT-NEXT:    uaddlp v0.2d, v2.4s
+; DOT-NEXT:    ret
+; SVE-LABEL: popcount2x64:
+; SVE:       // %bb.0: // %Entry
+; SVE-NEXT:    cnt v0.16b, v0.16b
+; SVE-NEXT:    uaddlp v0.8h, v0.16b
+; SVE-NEXT:    uaddlp v0.4s, v0.8h
+; SVE-NEXT:    uaddlp v0.2d, v0.4s
+; SVE-NEXT:    ret
+Entry:
+  %1 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
+  ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+
+define <1 x i64> @popcount1x64(<1 x i64> %0) {
+; CHECKO0-LABEL: popcount1x64:
+; CHECKO0:       // %bb.0: // %Entry
+; CHECKO0-NEXT:    fmov x0, d0
+; CHECKO0-NEXT:    fmov d0, x0
+; CHECKO0-NEXT:    cnt v0.8b, v0.8b
+; CHECKO0-NEXT:    uaddlv h0, v0.8b
+; CHECKO0-NEXT:    // kill: def $q0 killed $h0
+; CHECKO0-NEXT:    mov w8, v0.s[0]
+; CHECKO0-NEXT:    // kill: def $x8 killed $w8
+; CHECKO0-NEXT:    fmov d0, x8
+; CHECKO0-NEXT:    ret
+;
+; GISEL-LABEL: popcount1x64:
+; GISEL:       // %bb.0: // %Entry
+; GISEL-NEXT:    fmov x0, d0
+; GISEL-NEXT:    fmov d0, x0
+; GISEL-NEXT:    cnt v0.8b, v0.8b
+; GISEL-NEXT:    uaddlv h0, v0.8b
+; GISEL-NEXT:    // kill: def $q0 killed $h0
+; GISEL-NEXT:    mov w8, v0.s[0]
+; GISEL-NEXT:    // kill: def $x8 killed $w8
+; GISEL-NEXT:    fmov d0, x8
+; GISEL-NEXT:    ret
+;
+; CHECK-LABEL: popcount1x64:
+; CHECK:       // %bb.0: // %Entry
+; CHECK-NEXT:    cnt v0.8b, v0.8b
+; CHECK-NEXT:    uaddlv h0, v0.8b
+; CHECK-NEXT:    mov w8, v0.s[0]
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+Entry:
+  %1 = tail call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %0)
+  ret <1 x i64> %1
+}
+
+declare <1 x i64> @llvm.ctpop.v1i64(<1 x i64>)
+
+define <4 x i32> @popcount4x32(<4 x i32> %0) {
+; CHECKO0-LABEL: popcount4x32:
+; CHECKO0:       // %bb.0: // %Entry
+; CHECKO0-NEXT:    cnt v0.16b, v0.16b
+; CHECKO0-NEXT:    uaddlp v0.8h, v0.16b
+; CHECKO0-NEXT:    uaddlp v0.4s, v0.8h
+; CHECKO0-NEXT:    ret
+;
+; GISEL-LABEL: popcount4x32:
+; GISEL:       // %bb.0: // %Entry
+; GISEL-NEXT:    cnt v0.16b, v0.16b
+; GISEL-NEXT:    uaddlp v0.8h, v0.16b
+; GISEL-NEXT:    uaddlp v0.4s, v0.8h
+; GISEL-NEXT:    ret
+;
+; NEON-GISEL-LABEL: popcount4x32:
+; NEON-GISEL:       // %bb.0: // %Entry
+; NEON-GISEL-NEXT:    cnt v0.16b, v0.16b
+; NEON-GISEL-NEXT:    uaddlp v0.8h, v0.16b
+; NEON-GISEL-NEXT:    uaddlp v0.4s, v0.8h
+; NEON-GISEL-NEXT:    ret
+;
+; DOT-GISEL-LABEL: popcount4x32:
+; DOT-GISEL:       // %bb.0: // %Entry
+; DOT-GISEL-NEXT:    movi v1.2d, #0000000000000000
+; DOT-GISEL-NEXT:    cnt v0.16b, v0.16b
+; DOT-GISEL-NEXT:    movi v2.16b, #1
+; DOT-GISEL-NEXT:    udot v1.4s, v2.16b, v0.16b
+; DOT-GISEL-NEXT:    mov v0.16b, v1.16b
+; DOT-GISEL-NEXT:    ret
+;
+; SVE-GISEL-LABEL: popcount4x32:
+; SVE-GISEL:       // %bb.0: // %Entry
+; SVE-GISEL-NEXT:    cnt v0.16b, v0.16b
+; SVE-GISEL-NEXT:    uaddlp v0.8h, v0.16b
+; SVE-GISEL-NEXT:    uaddlp v0.4s, v0.8h
+; SVE-GISEL-NEXT:    ret
+; NEON-LABEL: popcount4x32:
----------------
tgymnich wrote:

done

https://github.com/llvm/llvm-project/pull/96409


More information about the llvm-commits mailing list