[llvm] b673a59 - [AArch64] Add BE test coverage for popcount. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 5 03:23:37 PST 2025


Author: David Green
Date: 2025-03-05T11:23:33Z
New Revision: b673a59c9ae5583aa08a8d34a48f9409b660d826

URL: https://github.com/llvm/llvm-project/commit/b673a59c9ae5583aa08a8d34a48f9409b660d826
DIFF: https://github.com/llvm/llvm-project/commit/b673a59c9ae5583aa08a8d34a48f9409b660d826.diff

LOG: [AArch64] Add BE test coverage for popcount. NFC

For #129843

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/arm64-popcnt.ll
    llvm/test/CodeGen/AArch64/popcount.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index ad0904ff98080..369667ec33f66 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 ; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
 ; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
+; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE
 
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-LABEL: cnt32_advsimd:
@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-CSSC:       // %bb.0:
 ; CHECK-CSSC-NEXT:    cnt w0, w0
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: cnt32_advsimd:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    fmov s0, w0
+; CHECK-BE-NEXT:    cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:    addv b0, v0.8b
+; CHECK-BE-NEXT:    fmov w0, s0
+; CHECK-BE-NEXT:    ret
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-CSSC-NEXT:    fmov w8, s0
 ; CHECK-CSSC-NEXT:    cnt w0, w8
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: cnt32_advsimd_2:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
+; CHECK-BE-NEXT:    fmov w8, s0
+; CHECK-BE-NEXT:    fmov s0, w8
+; CHECK-BE-NEXT:    cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:    addv b0, v0.8b
+; CHECK-BE-NEXT:    fmov w0, s0
+; CHECK-BE-NEXT:    ret
   %1 = extractelement <2 x i32> %x, i64 0
   %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
   ret i32 %2
@@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK-CSSC:       // %bb.0:
 ; CHECK-CSSC-NEXT:    cnt x0, x0
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: cnt64_advsimd:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    fmov d0, x0
+; CHECK-BE-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT:    cnt v0.8b, v0.8b
+; CHECK-BE-NEXT:    addv b0, v0.8b
+; CHECK-BE-NEXT:    rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT:    fmov x0, d0
+; CHECK-BE-NEXT:    ret
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
 }
@@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
 ; CHECK-CSSC:       // %bb.0:
 ; CHECK-CSSC-NEXT:    cnt w0, w0
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: cnt32:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    lsr w9, w0, #1
+; CHECK-BE-NEXT:    mov w8, #16843009 // =0x1010101
+; CHECK-BE-NEXT:    and w9, w9, #0x55555555
+; CHECK-BE-NEXT:    sub w9, w0, w9
+; CHECK-BE-NEXT:    lsr w10, w9, #2
+; CHECK-BE-NEXT:    and w9, w9, #0x33333333
+; CHECK-BE-NEXT:    and w10, w10, #0x33333333
+; CHECK-BE-NEXT:    add w9, w9, w10
+; CHECK-BE-NEXT:    add w9, w9, w9, lsr #4
+; CHECK-BE-NEXT:    and w9, w9, #0xf0f0f0f
+; CHECK-BE-NEXT:    mul w8, w9, w8
+; CHECK-BE-NEXT:    lsr w0, w8, #24
+; CHECK-BE-NEXT:    ret
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
 ; CHECK-CSSC:       // %bb.0:
 ; CHECK-CSSC-NEXT:    cnt x0, x0
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: cnt64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    lsr x9, x0, #1
+; CHECK-BE-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; CHECK-BE-NEXT:    and x9, x9, #0x5555555555555555
+; CHECK-BE-NEXT:    sub x9, x0, x9
+; CHECK-BE-NEXT:    lsr x10, x9, #2
+; CHECK-BE-NEXT:    and x9, x9, #0x3333333333333333
+; CHECK-BE-NEXT:    and x10, x10, #0x3333333333333333
+; CHECK-BE-NEXT:    add x9, x9, x10
+; CHECK-BE-NEXT:    add x9, x9, x9, lsr #4
+; CHECK-BE-NEXT:    and x9, x9, #0xf0f0f0f0f0f0f0f
+; CHECK-BE-NEXT:    mul x8, x9, x8
+; CHECK-BE-NEXT:    lsr x0, x8, #56
+; CHECK-BE-NEXT:    ret
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
 }
@@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
 ; CHECK-CSSC-NEXT:    cmp x8, #1
 ; CHECK-CSSC-NEXT:    cset w0, eq
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: ctpop_eq_one:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub x8, x0, #1
+; CHECK-BE-NEXT:    eor x9, x0, x8
+; CHECK-BE-NEXT:    cmp x9, x8
+; CHECK-BE-NEXT:    cset w0, hi
+; CHECK-BE-NEXT:    ret
   %count = tail call i64 @llvm.ctpop.i64(i64 %x)
   %cmp = icmp eq i64 %count, 1
   %conv = zext i1 %cmp to i32
@@ -244,6 +313,14 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
 ; CHECK-CSSC-NEXT:    cmp x8, #1
 ; CHECK-CSSC-NEXT:    cset w0, ne
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: ctpop_ne_one:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub x8, x0, #1
+; CHECK-BE-NEXT:    eor x9, x0, x8
+; CHECK-BE-NEXT:    cmp x9, x8
+; CHECK-BE-NEXT:    cset w0, ls
+; CHECK-BE-NEXT:    ret
   %count = tail call i64 @llvm.ctpop.i64(i64 %x)
   %cmp = icmp ne i64 %count, 1
   %conv = zext i1 %cmp to i32
@@ -273,6 +350,14 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
 ; CHECK-CSSC-NEXT:    cmp w8, #1
 ; CHECK-CSSC-NEXT:    cset w0, ne
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: ctpop32_ne_one:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub w8, w0, #1
+; CHECK-BE-NEXT:    eor w9, w0, w8
+; CHECK-BE-NEXT:    cmp w9, w8
+; CHECK-BE-NEXT:    cset w0, ls
+; CHECK-BE-NEXT:    ret
   %count = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ne i32 %count, 1
   ret i1 %cmp
@@ -299,6 +384,13 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
 ; CHECK-CSSC-NEXT:    tst w0, w8
 ; CHECK-CSSC-NEXT:    cset w0, eq
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    sub w8, w0, #1
+; CHECK-BE-NEXT:    tst w0, w8
+; CHECK-BE-NEXT:    cset w0, eq
+; CHECK-BE-NEXT:    ret
 entry:
   %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp eq i32 %popcnt, 1
@@ -326,11 +418,80 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
 ; CHECK-CSSC-NEXT:    tst w0, w8
 ; CHECK-CSSC-NEXT:    cset w0, ne
 ; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    sub w8, w0, #1
+; CHECK-BE-NEXT:    tst w0, w8
+; CHECK-BE-NEXT:    cset w0, ne
+; CHECK-BE-NEXT:    ret
 entry:
   %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ne i32 %popcnt, 1
   ret i1 %cmp
 }
 
+define i128 @cnt128(i128 %x) nounwind readnone {
+; CHECK-LABEL: cnt128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    mov.d v0[1], x1
+; CHECK-NEXT:    cnt.16b v0, v0
+; CHECK-NEXT:    addv.16b b0, v0
+; CHECK-NEXT:    mov.d x1, v0[1]
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+;
+; CHECK-NONEON-LABEL: cnt128:
+; CHECK-NONEON:       // %bb.0:
+; CHECK-NONEON-NEXT:    lsr x9, x0, #1
+; CHECK-NONEON-NEXT:    lsr x10, x1, #1
+; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
+; CHECK-NONEON-NEXT:    and x10, x10, #0x5555555555555555
+; CHECK-NONEON-NEXT:    sub x9, x0, x9
+; CHECK-NONEON-NEXT:    sub x10, x1, x10
+; CHECK-NONEON-NEXT:    mov x1, xzr
+; CHECK-NONEON-NEXT:    lsr x11, x9, #2
+; CHECK-NONEON-NEXT:    lsr x12, x10, #2
+; CHECK-NONEON-NEXT:    and x9, x9, #0x3333333333333333
+; CHECK-NONEON-NEXT:    and x10, x10, #0x3333333333333333
+; CHECK-NONEON-NEXT:    and x11, x11, #0x3333333333333333
+; CHECK-NONEON-NEXT:    add x9, x9, x11
+; CHECK-NONEON-NEXT:    and x11, x12, #0x3333333333333333
+; CHECK-NONEON-NEXT:    add x9, x9, x9, lsr #4
+; CHECK-NONEON-NEXT:    add x10, x10, x11
+; CHECK-NONEON-NEXT:    add x10, x10, x10, lsr #4
+; CHECK-NONEON-NEXT:    and x9, x9, #0xf0f0f0f0f0f0f0f
+; CHECK-NONEON-NEXT:    mul x9, x9, x8
+; CHECK-NONEON-NEXT:    and x10, x10, #0xf0f0f0f0f0f0f0f
+; CHECK-NONEON-NEXT:    mul x8, x10, x8
+; CHECK-NONEON-NEXT:    lsr x9, x9, #56
+; CHECK-NONEON-NEXT:    add x0, x9, x8, lsr #56
+; CHECK-NONEON-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cnt128:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    cnt x8, x1
+; CHECK-CSSC-NEXT:    cnt x9, x0
+; CHECK-CSSC-NEXT:    mov x1, xzr
+; CHECK-CSSC-NEXT:    add x0, x9, x8
+; CHECK-CSSC-NEXT:    ret
+;
+; CHECK-BE-LABEL: cnt128:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    fmov d0, x0
+; CHECK-BE-NEXT:    mov v0.d[1], x1
+; CHECK-BE-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-BE-NEXT:    cnt v0.16b, v0.16b
+; CHECK-BE-NEXT:    addv b0, v0.16b
+; CHECK-BE-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-BE-NEXT:    mov x1, v0.d[1]
+; CHECK-BE-NEXT:    fmov x0, d0
+; CHECK-BE-NEXT:    ret
+  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
+  ret i128 %cnt
+}
+
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone

diff  --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 89b1ac0a0edf1..6cc925f0ae91f 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=CHECK,DOT
 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE
+; RUN: llc < %s -mtriple=aarch64_be-unknown-unknown | FileCheck %s --check-prefixes=BE
 ; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISEL
 ; RUN: llc < %s -O0 -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISELO0
 ; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=GISEL,NEON-GISEL
@@ -32,6 +33,18 @@ define i8 @popcount128(ptr nocapture nonnull readonly %0) {
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
 ;
+; BE-LABEL: popcount128:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    ldr d0, [x0]
+; BE-NEXT:    add x8, x0, #8
+; BE-NEXT:    ld1 { v0.d }[1], [x8]
+; BE-NEXT:    rev64 v0.16b, v0.16b
+; BE-NEXT:    cnt v0.16b, v0.16b
+; BE-NEXT:    addv b0, v0.16b
+; BE-NEXT:    rev32 v0.16b, v0.16b
+; BE-NEXT:    mov w0, v0.s[3]
+; BE-NEXT:    ret
+;
 ; GISEL-LABEL: popcount128:
 ; GISEL:       // %bb.0: // %Entry
 ; GISEL-NEXT:    ldr q0, [x0]
@@ -111,6 +124,27 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
 ; CHECK-NEXT:    add w0, w9, w8
 ; CHECK-NEXT:    ret
 ;
+; BE-LABEL: popcount256:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    ldr d0, [x0]
+; BE-NEXT:    ldr d1, [x0, #16]
+; BE-NEXT:    add x8, x0, #24
+; BE-NEXT:    add x9, x0, #8
+; BE-NEXT:    ld1 { v0.d }[1], [x9]
+; BE-NEXT:    ld1 { v1.d }[1], [x8]
+; BE-NEXT:    rev64 v0.16b, v0.16b
+; BE-NEXT:    rev64 v1.16b, v1.16b
+; BE-NEXT:    cnt v0.16b, v0.16b
+; BE-NEXT:    cnt v1.16b, v1.16b
+; BE-NEXT:    addv b0, v0.16b
+; BE-NEXT:    addv b1, v1.16b
+; BE-NEXT:    rev32 v0.16b, v0.16b
+; BE-NEXT:    rev32 v1.16b, v1.16b
+; BE-NEXT:    mov w8, v0.s[3]
+; BE-NEXT:    mov w9, v1.s[3]
+; BE-NEXT:    add w0, w9, w8
+; BE-NEXT:    ret
+;
 ; GISEL-LABEL: popcount256:
 ; GISEL:       // %bb.0: // %Entry
 ; GISEL-NEXT:    ldp x8, x9, [x0]
@@ -199,6 +233,18 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
 ;
+; BE-LABEL: popcount1x128:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    fmov d0, x0
+; BE-NEXT:    mov v0.d[1], x1
+; BE-NEXT:    rev64 v0.16b, v0.16b
+; BE-NEXT:    cnt v0.16b, v0.16b
+; BE-NEXT:    addv b0, v0.16b
+; BE-NEXT:    rev64 v0.16b, v0.16b
+; BE-NEXT:    mov x1, v0.d[1]
+; BE-NEXT:    fmov x0, d0
+; BE-NEXT:    ret
+;
 ; GISEL-LABEL: popcount1x128:
 ; GISEL:       // %bb.0: // %Entry
 ; GISEL-NEXT:    mov v0.d[0], x0
@@ -266,6 +312,17 @@ define <2 x i64> @popcount2x64(<2 x i64> %0) {
 ; SVE-NEXT:    uaddlp v0.2d, v0.4s
 ; SVE-NEXT:    ret
 ;
+; BE-LABEL: popcount2x64:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    rev64 v0.16b, v0.16b
+; BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; BE-NEXT:    cnt v0.16b, v0.16b
+; BE-NEXT:    uaddlp v0.8h, v0.16b
+; BE-NEXT:    uaddlp v0.4s, v0.8h
+; BE-NEXT:    uaddlp v0.2d, v0.4s
+; BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; BE-NEXT:    ret
+;
 ; GISELO0-LABEL: popcount2x64:
 ; GISELO0:       // %bb.0: // %Entry
 ; GISELO0-NEXT:    cnt v0.16b, v0.16b
@@ -326,6 +383,15 @@ define <1 x i64> @popcount1x64(<1 x i64> %0) {
 ; CHECK-NEXT:    uaddlp v0.1d, v0.2s
 ; CHECK-NEXT:    ret
 ;
+; BE-LABEL: popcount1x64:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    rev64 v0.8b, v0.8b
+; BE-NEXT:    cnt v0.8b, v0.8b
+; BE-NEXT:    uaddlp v0.4h, v0.8b
+; BE-NEXT:    uaddlp v0.2s, v0.4h
+; BE-NEXT:    uaddlp v0.1d, v0.2s
+; BE-NEXT:    ret
+;
 ; GISEL-LABEL: popcount1x64:
 ; GISEL:       // %bb.0: // %Entry
 ; GISEL-NEXT:    cnt v0.8b, v0.8b
@@ -382,6 +448,17 @@ define <4 x i32> @popcount4x32(<4 x i32> %0) {
 ; SVE-NEXT:    uaddlp v0.4s, v0.8h
 ; SVE-NEXT:    ret
 ;
+; BE-LABEL: popcount4x32:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    rev64 v0.16b, v0.16b
+; BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; BE-NEXT:    cnt v0.16b, v0.16b
+; BE-NEXT:    uaddlp v0.8h, v0.16b
+; BE-NEXT:    uaddlp v0.4s, v0.8h
+; BE-NEXT:    rev64 v0.4s, v0.4s
+; BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; BE-NEXT:    ret
+;
 ; GISELO0-LABEL: popcount4x32:
 ; GISELO0:       // %bb.0: // %Entry
 ; GISELO0-NEXT:    cnt v0.16b, v0.16b
@@ -449,6 +526,15 @@ define <2 x i32> @popcount2x32(<2 x i32> %0) {
 ; SVE-NEXT:    uaddlp v0.2s, v0.4h
 ; SVE-NEXT:    ret
 ;
+; BE-LABEL: popcount2x32:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    rev64 v0.8b, v0.8b
+; BE-NEXT:    cnt v0.8b, v0.8b
+; BE-NEXT:    uaddlp v0.4h, v0.8b
+; BE-NEXT:    uaddlp v0.2s, v0.4h
+; BE-NEXT:    rev64 v0.2s, v0.2s
+; BE-NEXT:    ret
+;
 ; GISELO0-LABEL: popcount2x32:
 ; GISELO0:       // %bb.0: // %Entry
 ; GISELO0-NEXT:    cnt v0.8b, v0.8b
@@ -498,6 +584,16 @@ define <8 x i16> @popcount8x16(<8 x i16> %0) {
 ; CHECK-NEXT:    uaddlp v0.8h, v0.16b
 ; CHECK-NEXT:    ret
 ;
+; BE-LABEL: popcount8x16:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    rev64 v0.16b, v0.16b
+; BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; BE-NEXT:    cnt v0.16b, v0.16b
+; BE-NEXT:    uaddlp v0.8h, v0.16b
+; BE-NEXT:    rev64 v0.8h, v0.8h
+; BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; BE-NEXT:    ret
+;
 ; GISEL-LABEL: popcount8x16:
 ; GISEL:       // %bb.0: // %Entry
 ; GISEL-NEXT:    cnt v0.16b, v0.16b
@@ -529,6 +625,14 @@ define <4 x i16> @popcount4x16(<4 x i16> %0) {
 ; CHECK-NEXT:    uaddlp v0.4h, v0.8b
 ; CHECK-NEXT:    ret
 ;
+; BE-LABEL: popcount4x16:
+; BE:       // %bb.0: // %Entry
+; BE-NEXT:    rev64 v0.8b, v0.8b
+; BE-NEXT:    cnt v0.8b, v0.8b
+; BE-NEXT:    uaddlp v0.4h, v0.8b
+; BE-NEXT:    rev64 v0.4h, v0.4h
+; BE-NEXT:    ret
+;
 ; GISEL-LABEL: popcount4x16:
 ; GISEL:       // %bb.0: // %Entry
 ; GISEL-NEXT:    cnt v0.8b, v0.8b


        


More information about the llvm-commits mailing list