[llvm] [AArch64] Avoid NEON ctpop in Streaming-SVE mode (PR #93826)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 30 07:06:40 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
The NEON ctpop instruction is also used for scalars.
---
Patch is 106.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93826.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+6-6)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll (+1499-833)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 365ef68dcb19b..ac6f1e07c4184 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9571,13 +9571,17 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
Attribute::NoImplicitFloat))
return SDValue();
- if (!Subtarget->hasNEON())
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
+
+ if (!Subtarget->isNeonAvailable())
return SDValue();
bool IsParity = Op.getOpcode() == ISD::PARITY;
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
- EVT VT = Op.getValueType();
// for i32, general parity function using EORs is more efficient compared to
// using floating point
@@ -9626,10 +9630,6 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
assert(!IsParity && "ISD::PARITY of vector types not supported");
- if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
-
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering");
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index f920efeb4892d..f662140327135 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -741,37 +741,63 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v4i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
-; NONEON-NOSVE-NEXT: str d0, [sp, #64]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #70]
-; NONEON-NOSVE-NEXT: ldrb w9, [sp, #68]
-; NONEON-NOSVE-NEXT: ldrb w10, [sp, #66]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: fmov d1, x9
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #64]
-; NONEON-NOSVE-NEXT: fmov d2, x10
-; NONEON-NOSVE-NEXT: fmov d3, x8
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: cnt v1.8b, v1.8b
-; NONEON-NOSVE-NEXT: cnt v2.8b, v2.8b
-; NONEON-NOSVE-NEXT: cnt v3.8b, v3.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h1, v1.8b
-; NONEON-NOSVE-NEXT: uaddlv h2, v2.8b
-; NONEON-NOSVE-NEXT: uaddlv h3, v3.8b
-; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #48]
-; NONEON-NOSVE-NEXT: stp q3, q2, [sp]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #78]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #32]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #76]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #74]
-; NONEON-NOSVE-NEXT: ldr w8, [sp]
-; NONEON-NOSVE-NEXT: strh w8, [sp, #72]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #72]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
+; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
+; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4]
+; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2]
+; NONEON-NOSVE-NEXT: ldrb w12, [sp]
+; NONEON-NOSVE-NEXT: lsr w13, w9, #1
+; NONEON-NOSVE-NEXT: lsr w14, w11, #1
+; NONEON-NOSVE-NEXT: lsr w15, w10, #1
+; NONEON-NOSVE-NEXT: lsr w16, w12, #1
+; NONEON-NOSVE-NEXT: and w13, w13, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w13
+; NONEON-NOSVE-NEXT: and w13, w14, #0x55555555
+; NONEON-NOSVE-NEXT: and w14, w15, #0x55555555
+; NONEON-NOSVE-NEXT: sub w11, w11, w13
+; NONEON-NOSVE-NEXT: lsr w13, w9, #2
+; NONEON-NOSVE-NEXT: and w15, w16, #0x55555555
+; NONEON-NOSVE-NEXT: sub w10, w10, w14
+; NONEON-NOSVE-NEXT: sub w12, w12, w15
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w13, w13, #0x33333333
+; NONEON-NOSVE-NEXT: lsr w14, w11, #2
+; NONEON-NOSVE-NEXT: lsr w15, w10, #2
+; NONEON-NOSVE-NEXT: add w9, w9, w13
+; NONEON-NOSVE-NEXT: lsr w13, w12, #2
+; NONEON-NOSVE-NEXT: and w11, w11, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: and w14, w14, #0x33333333
+; NONEON-NOSVE-NEXT: and w15, w15, #0x33333333
+; NONEON-NOSVE-NEXT: and w12, w12, #0x33333333
+; NONEON-NOSVE-NEXT: and w13, w13, #0x33333333
+; NONEON-NOSVE-NEXT: add w11, w11, w14
+; NONEON-NOSVE-NEXT: add w10, w10, w15
+; NONEON-NOSVE-NEXT: add w12, w12, w13
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: add w11, w11, w11, lsr #4
+; NONEON-NOSVE-NEXT: add w10, w10, w10, lsr #4
+; NONEON-NOSVE-NEXT: add w12, w12, w12, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: and w11, w11, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: and w10, w10, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: and w12, w12, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: mul w11, w11, w8
+; NONEON-NOSVE-NEXT: mul w10, w10, w8
+; NONEON-NOSVE-NEXT: mul w8, w12, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: lsr w11, w11, #24
+; NONEON-NOSVE-NEXT: lsr w10, w10, #24
+; NONEON-NOSVE-NEXT: strh w9, [sp, #14]
+; NONEON-NOSVE-NEXT: lsr w8, w8, #24
+; NONEON-NOSVE-NEXT: strh w11, [sp, #12]
+; NONEON-NOSVE-NEXT: strh w10, [sp, #10]
+; NONEON-NOSVE-NEXT: strh w8, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %op)
ret <4 x i8> %res
@@ -788,67 +814,115 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v8i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #144
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144
-; NONEON-NOSVE-NEXT: str d0, [sp, #128]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #135]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #134]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #112]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #133]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #96]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #132]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #80]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #131]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #64]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #130]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #48]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #129]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #32]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #128]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #16]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #112]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #143]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #96]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: strb w8, [sp, #142]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #141]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #64]
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: strb w8, [sp, #140]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #139]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #32]
-; NONEON-NOSVE-NEXT: str q0, [sp]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #138]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #137]
-; NONEON-NOSVE-NEXT: ldr w8, [sp]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #136]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #136]
-; NONEON-NOSVE-NEXT: add sp, sp, #144
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
+; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #15]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #14]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #13]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #12]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #11]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #10]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #9]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w8, w9, w8
+; NONEON-NOSVE-NEXT: lsr w8, w8, #24
+; NONEON-NOSVE-NEXT: strb w8, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op)
ret <8 x i8> %res
@@ -865,126 +939,219 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v16i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #304
-; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 304
-; NONEON-NOSVE-NEXT: .cfi_offset w29, -16
-; NONEON-NOSVE-NEXT: str q0, [sp, #256]
-; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Folded Reload
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #271]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #270]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #240]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #269]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #224]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #268]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #208]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #267]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #192]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #266]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #176]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #265]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #160]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #264]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #144]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #263]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #128]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #262]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #112]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #261]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #96]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #260]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #80]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #259]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #64]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #258]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #48]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #257]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #32]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #256]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: str q0, [sp, #16]
-; NONEON-NOSVE-NEXT: fmov d0, x8
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #240]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #287]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #224]
-; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
-; NONEON-NOSVE-NEXT: strb w8, [sp, #286]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #208]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #285]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #192]
-; NONEON-NOSVE-NEXT: uaddlv h0, v0.8b
-; NONEON-NOSVE-NEXT: strb w8, [sp, #284]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #176]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #283]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #160]
-; NONEON-NOSVE-NEXT: str q0, [sp]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #282]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #144]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #281]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #128]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #280]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #112]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #279]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #96]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #278]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #80]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #277]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #64]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #276]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #48]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #275]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #32]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #274]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #273]
-; NONEON-NOSVE-NEXT: ldr w8, [sp]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #272]
-; NONEON-NOSVE-NEXT: ldr q0, [sp, #272]
-; NONEON-NOSVE-NEXT: add sp, sp, #304
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
+; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #31]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: sub w9, w9, w10
+; NONEON-NOSVE-NEXT: lsr w10, w9, #2
+; NONEON-NOSVE-NEXT: and w9, w9, #0x33333333
+; NONEON-NOSVE-NEXT: and w10, w10, #0x33333333
+; NONEON-NOSVE-NEXT: add w9, w9, w10
+; NONEON-NOSVE-NEXT: add w9, w9, w9, lsr #4
+; NONEON-NOSVE-NEXT: and w9, w9, #0xf0f0f0f
+; NONEON-NOSVE-NEXT: mul w9, w9, w8
+; NONEON-NOSVE-NEXT: lsr w9, w9, #24
+; NONEON-NOSVE-NEXT: strb w9, [sp, #30]
+; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
+; NONEON-NOSVE-NEXT: lsr w10, w9, #1
+; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
+; NONEON-NOSVE-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93826
More information about the llvm-commits
mailing list