[llvm] [AArch64][SVE] Allow factors other than 2/4 for load+deinterleave3 patterns for codegen (PR #162475)
Rajveer Singh Bharadwaj via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 05:35:48 PDT 2025
https://github.com/Rajveer100 created https://github.com/llvm/llvm-project/pull/162475
Resolves #159801 and #162068
>From 7f56e0b4ff283abedccc4257d66694432a51d49a Mon Sep 17 00:00:00 2001
From: Rajveer <rajveer.developer at icloud.com>
Date: Wed, 8 Oct 2025 18:00:53 +0530
Subject: [PATCH] [AArch64][SVE] Allow factors other than 2/4 for
load+deinterleave3 patterns for codegen
Resolves #159801 and #162068
---
.../Target/AArch64/AArch64ISelLowering.cpp | 4 +-
.../AArch64/sve-vector-load+deinterleave.ll | 74 +++++++++++++++++++
2 files changed, 76 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 29d65d5d1db64..a41e3f73fd5b4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17973,7 +17973,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
- if (Factor != 2 && Factor != 4) {
+ if (Factor != 2 && Factor != 3 && Factor != 4) {
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
return false;
}
@@ -18052,7 +18052,7 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
Instruction *Store, Value *Mask,
ArrayRef<Value *> InterleavedValues) const {
unsigned Factor = InterleavedValues.size();
- if (Factor != 2 && Factor != 4) {
+ if (Factor != 2 && Factor != 3 && Factor != 4) {
LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll
new file mode 100644
index 0000000000000..0d41dc9113978
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-elf -mattr=+sve | FileCheck %s -check-prefixes=SVE
+
+define void @load_factor2(i32* %ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2) {
+; SVE-LABEL: load_factor2:
+; SVE: // %bb.0:
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0]
+; SVE-NEXT: str z0, [x1]
+; SVE-NEXT: str z1, [x2]
+; SVE-NEXT: ret
+ %wide.vec = load <vscale x 8 x i32>, ptr %ptr, align 8
+ %ldN = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.vec)
+
+ %3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
+ %4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
+
+ store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
+ store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
+ ret void
+}
+
+define void @load_factor3(i32* %ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2, <vscale x 4 x i32>* %s3) {
+; SVE-LABEL: load_factor3:
+; SVE: // %bb.0:
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0]
+; SVE-NEXT: str z0, [x1]
+; SVE-NEXT: str z1, [x2]
+; SVE-NEXT: str z2, [x3]
+; SVE-NEXT: ret
+ %wide.vec = load <vscale x 12 x i32>, ptr %ptr, align 8
+ %ldN = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave3.nxv12i32(<vscale x 12 x i32> %wide.vec)
+
+ %3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
+ %4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
+ %5 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 2
+
+ store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
+ store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
+ store <vscale x 4 x i32> %5, <vscale x 4 x i32>* %s3
+ ret void
+}
+
+define void @load_factor4(i32* %ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2, <vscale x 4 x i32>* %s3, <vscale x 4 x i32>* %s4) {
+; SVE-LABEL: load_factor4:
+; SVE: // %bb.0:
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0]
+; SVE-NEXT: str z0, [x1]
+; SVE-NEXT: str z1, [x2]
+; SVE-NEXT: str z2, [x3]
+; SVE-NEXT: str z3, [x4]
+; SVE-NEXT: ret
+ %wide.vec = load <vscale x 16 x i32>, ptr %ptr, align 8
+ %ldN = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %wide.vec)
+
+ %3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
+ %4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
+ %5 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 2
+ %6 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 3
+
+ store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
+ store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
+ store <vscale x 4 x i32> %5, <vscale x 4 x i32>* %s3
+ store <vscale x 4 x i32> %6, <vscale x 4 x i32>* %s4
+ ret void
+}
+
+
+declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave3.nxv12i32(<vscale x 12 x i32>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32>)
+
More information about the llvm-commits
mailing list