[llvm] [AArch64][ISel] Subvector extracts can use undef for second EXT input (PR #151729)

Gaƫtan Bossu via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 1 09:54:43 PDT 2025


================
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+sve2,+bf16 -verify-machineinstrs %s -o - | FileCheck %s
+
+; This is a similar test to sve-fixed-length-extract-subvector.ll, but this one
+; uses SVE2 and extracts multiple subvectors at once to ensure that the ext
+; instruction is used (instead of just using smaller ld/st instructions with an
+; offset).
+
+; Test the patterns selecting EXT_ZZI and EXT_ZZI_B for fixed-length vectors
+; when SVE2 is available.
+
+;
+; Use NEON for 128-bit vectors
+;
+
+define void @extract_v4i32_halves(ptr %in, ptr %out, ptr %out2) {
+; CHECK-LABEL: extract_v4i32_halves:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    str d1, [x1]
+; CHECK-NEXT:    str d0, [x2]
+; CHECK-NEXT:    ret
+entry:
+  %b = load <4 x i32>, ptr %in
+  %hi = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+  store <2 x i32> %hi, ptr %out
+  %lo = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+  store <2 x i32> %lo, ptr %out2
+  ret void
+}
+
+define void @extract_v4i32_half_unaligned(ptr %in, ptr %out) {
+; CHECK-LABEL: extract_v4i32_half_unaligned:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    str d0, [x1]
+; CHECK-NEXT:    ret
+entry:
+  %b = load <4 x i32>, ptr %in
+  %d = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
+  store <2 x i32> %d, ptr %out
+  ret void
+}
+
+;
+; Use SVE for 256-bit vectors
+;
+
+define void @extract_v4i64_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,2) {
+; CHECK-LABEL: extract_v4i64_halves:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr z0, [x0]
+; CHECK-NEXT:    mov z1.d, z0.d
----------------
gbossu wrote:

This is the MOV that can be eliminated by using the constructive `EXT` variant, along with the change in this PR marking the second `vector_splice` input as `undef`.

https://github.com/llvm/llvm-project/pull/151729


More information about the llvm-commits mailing list