[llvm] [AArch64][SVE] Add basic support for `@llvm.masked.compressstore` (PR #168350)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 03:51:15 PST 2025
================
@@ -0,0 +1,265 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
+; RUN: llc -mtriple=aarch64 -aarch64-sve-vector-bits-min=256 -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL256
+
+;; Full SVE vectors (supported with +sve)
+
+define void @test_compressstore_nxv4i32(ptr %p, <vscale x 4 x i32> %vec, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.s
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv4i32(<vscale x 4 x i32> %vec, ptr align 4 %p, <vscale x 4 x i1> %mask)
+ ret void
+}
+
+define void @test_compressstore_nxv2i64(ptr %p, <vscale x 2 x i64> %vec, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.d
+; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv2i64(<vscale x 2 x i64> %vec, ptr align 8 %p, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @test_compressstore_nxv4f32(ptr %p, <vscale x 4 x float> %vec, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.s
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv4f32(<vscale x 4 x float> %vec, ptr align 4 %p, <vscale x 4 x i1> %mask)
+ ret void
+}
+
+define void @test_compressstore_nxv2f64(ptr %p, <vscale x 2 x double> %vec, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.d
+; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv2f64(<vscale x 2 x double> %vec, ptr align 8 %p, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+;; SVE vectors that will be split
+
+define void @test_compressstore_nxv8i32(ptr %p, <vscale x 8 x i32> %vec, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: punpkhi p1.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: cntp x8, p1, p1.s
+; CHECK-NEXT: compact z1.s, p1, z1.s
+; CHECK-NEXT: cntp x9, p0, p0.s
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: whilelo p1.s, xzr, x9
+; CHECK-NEXT: st1w { z1.s }, p0, [x0, x9, lsl #2]
+; CHECK-NEXT: st1w { z0.s }, p1, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv8i32(<vscale x 8 x i32> %vec, ptr align 4 %p, <vscale x 8 x i1> %mask)
+ ret void
+}
+
+;; Unpacked SVE vector types
+
+define void @test_compressstore_nxv2f32(ptr %p, <vscale x 2 x float> %vec, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.d
+; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: st1w { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv2f32(<vscale x 2 x float> %vec, ptr align 4 %p, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+;; SVE vector types promoted to 32/64-bit (non-exhaustive)
+
+define void @test_compressstore_nxv2i8(ptr %p, <vscale x 2 x i8> %vec, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.d
+; CHECK-NEXT: compact z0.d, p0, z0.d
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: st1b { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv2i8(<vscale x 2 x i8> %vec, ptr align 1 %p, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @test_compressstore_nxv4i16(ptr %p, <vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: test_compressstore_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x8, p0, p0.s
+; CHECK-NEXT: compact z0.s, p0, z0.s
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: st1h { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ tail call void @llvm.masked.compressstore.nxv4i16(<vscale x 4 x i16> %vec, ptr align 2 %p, <vscale x 4 x i1> %mask)
+ ret void
+}
+
----------------
sdesmalen-arm wrote:
Adding a new file with XFAIL makes sense, thanks.
https://github.com/llvm/llvm-project/pull/168350
More information about the llvm-commits
mailing list