[llvm] [AArch64][SVE] Rework VECTOR_COMPRESS lowering (PR #171162)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 9 02:50:51 PST 2025
================
@@ -242,14 +238,10 @@ define <4 x double> @test_compress_v4f64_with_sve(<4 x double> %vec, <4 x i1> %m
; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: eor w8, w8, w9
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: cmlt v3.2d, v3.2d, #0
-; CHECK-NEXT: cmlt v4.2d, v4.2d, #0
-; CHECK-NEXT: and x8, x8, #0x3
-; CHECK-NEXT: lsl x8, x8, #3
-; CHECK-NEXT: and z3.d, z3.d, #0x1
-; CHECK-NEXT: and z4.d, z4.d, #0x1
; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z4.d, #0
+; CHECK-NEXT: and x8, x8, #0x3
+; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: compact z0.d, p1, z0.d
; CHECK-NEXT: compact z1.d, p0, z1.d
----------------
MacDue wrote:
Here's my attempt:
```
ushll v2.4s, v2.4h, #0 # Unpack v4i16 predicate to v4i32
ptrue p0.d, vl2
ushll v3.2d, v2.2s, #0 # Unpack low half of v4i32 to v2i64
ushll2 v4.2d, v2.4s, #0 # Unpack high half of v4i32 to v2i64
fmov x8, d2 # Move d2 (the low half of the v4i32 predicate into x8)
shl v3.2d, v3.2d, #63 # Shift up predicate (i.e. ignore everything but bit 0)
shl v4.2d, v4.2d, #63 # ""
lsr x9, x8, #32 # Move high half of v2i32 mask to x9
eor w8, w8, w9 # BUG!!! Due to incorrect usage of VECREDUCE_ADD in SplitVecRes_VECTOR_COMPRESS
# This should be an ADD. This is not related to this PR.
# This should reduce the mask to the number of active lanes.
mov x9, sp
cmpne p1.d, p0/z, z3.d, #0 # Convert lower Neon predicate to SVE
cmpne p0.d, p0/z, z4.d, #0 # Convert upper Neon predicate to SVE
and x8, x8, #0x3
lsl x8, x8, #3 # Compute upper offset
compact z0.d, p1, z0.d # Compact lower half
compact z1.d, p0, z1.d # Compact upper half
str q0, [sp] # Store lower half
str q1, [x9, x8] # Store upper half
```
Note: While doing this I spotted a bug (unrelated to this PR, the issue is in `SplitVecRes_VECTOR_COMPRESS`), that means this lowering is incorrect. I'll address this in another PR.
https://github.com/llvm/llvm-project/pull/171162
More information about the llvm-commits
mailing list