[llvm] [IA] Relax the requirement of having ExtractValue users on deinterleave intrinsic (PR #148716)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 11:16:41 PDT 2025
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/148716
>From 40ed9ae5d8c8c32fc7896bcc76d11d10f99d8f49 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 11 Jul 2025 16:16:16 -0700
Subject: [PATCH 1/5] [IA] Do not require ExtractValueInst as users for
deinterleave intrinsics
---
llvm/include/llvm/Analysis/VectorUtils.h | 20 ++++
llvm/include/llvm/CodeGen/TargetLowering.h | 8 +-
llvm/lib/Analysis/VectorUtils.cpp | 19 +++
llvm/lib/CodeGen/InterleavedAccessPass.cpp | 32 ++---
.../Target/AArch64/AArch64ISelLowering.cpp | 30 ++---
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 6 +-
.../Target/RISCV/RISCVInterleavedAccess.cpp | 52 ++++-----
.../rvv/fixed-vectors-deinterleave-load.ll | 53 +++++++++
.../RISCV/rvv/vp-vector-interleaved-access.ll | 109 ++++++++++++++++++
.../AArch64/fixed-deinterleave-intrinsics.ll | 32 ++---
.../scalable-deinterleave-intrinsics.ll | 36 +++---
.../AArch64/sve-deinterleave4.ll | 27 +++--
.../AArch64/sve-interleaved-accesses.ll | 4 +
14 files changed, 314 insertions(+), 118 deletions(-)
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index af1e0d7251a4f..8d7407949b3f0 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -23,6 +23,7 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
+class IntrinsicInst;
class TargetLibraryInfo;
/// The Vector Function Database.
@@ -188,6 +189,25 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);
/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID);
+/// A vector can either be deinterleaved through an intrinsic or a combination
+/// of shufflevector instructions. This is a thin abstraction layer to provide
+/// some common information like the deinterleaving factor.
+struct VectorDeinterleaving {
+ IntrinsicInst *DI = nullptr;
+ ArrayRef<Value *> Values;
+
+ unsigned getFactor() const;
+
+ Type *getDeinterleavedType() const;
+
+ explicit VectorDeinterleaving(IntrinsicInst *DI) : DI(DI) {}
+ explicit VectorDeinterleaving(ArrayRef<Value *> Values) : Values(Values) {}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ bool isValid() const { return (DI != nullptr) ^ !Values.empty(); }
+#endif
+};
+
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index a248eb7444b20..f3cf784afb540 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -96,6 +96,7 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class TargetTransformInfo;
class Value;
+struct VectorDeinterleaving;
class VPIntrinsic;
namespace Sched {
@@ -3230,7 +3231,7 @@ class LLVM_ABI TargetLoweringBase {
/// \p Mask is a mask value
/// \p DeinterleaveRes is a list of deinterleaved results.
virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveRes) const {
+ const VectorDeinterleaving &VD) const {
return false;
}
@@ -3251,9 +3252,8 @@ class LLVM_ABI TargetLoweringBase {
///
/// \p LI is the accompanying load instruction.
/// \p DeinterleaveValues contains the deinterleaved values.
- virtual bool
- lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
- ArrayRef<Value *> DeinterleaveValues) const {
+ virtual bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
+ IntrinsicInst *DI) const {
return false;
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 7f0ed0b60a785..cd2ab0edf6fd3 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -306,6 +306,25 @@ unsigned llvm::getDeinterleaveIntrinsicFactor(Intrinsic::ID ID) {
}
}
+unsigned VectorDeinterleaving::getFactor() const {
+ assert(isValid());
+ if (DI)
+ return getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
+ else
+ return Values.size();
+}
+
+Type *VectorDeinterleaving::getDeinterleavedType() const {
+ assert(getFactor() > 0);
+ if (DI) {
+ return *DI->getType()->subtype_begin();
+ } else {
+ Value *FirstActive =
+ *llvm::find_if(Values, [](Value *V) { return V != nullptr; });
+ return FirstActive->getType();
+ }
+}
+
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 7259834975cf4..1363e8b6b28b6 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -381,7 +381,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
SmallVector<Value *, 4> ShuffleValues(Factor, nullptr);
for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
- if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues))
+ VectorDeinterleaving VD(ShuffleValues);
+ if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, VD))
// If Extracts is not empty, tryReplaceExtracts made changes earlier.
return !Extracts.empty() || BinOpShuffleChanged;
} else {
@@ -615,32 +616,17 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
return false;
- const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
+ VectorDeinterleaving VD(DI);
+ const unsigned Factor = VD.getFactor();
assert(Factor && "unexpected deinterleave intrinsic");
- SmallVector<Value *, 8> DeinterleaveValues(Factor, nullptr);
- Value *LastFactor = nullptr;
- for (auto *User : DI->users()) {
- auto *Extract = dyn_cast<ExtractValueInst>(User);
- if (!Extract || Extract->getNumIndices() != 1)
- return false;
- unsigned Idx = Extract->getIndices()[0];
- if (DeinterleaveValues[Idx])
- return false;
- DeinterleaveValues[Idx] = Extract;
- LastFactor = Extract;
- }
-
- if (!LastFactor)
- return false;
-
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
return false;
// Check mask operand. Handle both all-true/false and interleaved mask.
Value *WideMask = VPLoad->getOperand(1);
Value *Mask =
- getMask(WideMask, Factor, cast<VectorType>(LastFactor->getType()));
+ getMask(WideMask, Factor, cast<VectorType>(VD.getDeinterleavedType()));
if (!Mask)
return false;
@@ -649,7 +635,8 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
// TLI function to emit target-specific interleaved instruction.
- if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, DeinterleaveValues))
+ VectorDeinterleaving VD(DI);
+ if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, VD))
return false;
} else {
@@ -661,13 +648,10 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
<< " and factor = " << Factor << "\n");
// Try and match this with target specific intrinsics.
- if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
+ if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DI))
return false;
}
- for (Value *V : DeinterleaveValues)
- if (V)
- DeadInsts.insert(cast<Instruction>(V));
DeadInsts.insert(DI);
// We now have a target-specific load, so delete the old one.
DeadInsts.insert(cast<Instruction>(LoadedVal));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index bde4ba993f69e..ff156f4f04c5d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17476,16 +17476,15 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
}
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
- LoadInst *LI, ArrayRef<Value *> DeinterleavedValues) const {
- unsigned Factor = DeinterleavedValues.size();
+ LoadInst *LI, IntrinsicInst *DI) const {
+ ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
+ const unsigned Factor = DISubtypes.size();
if (Factor != 2 && Factor != 4) {
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
return false;
}
- Value *FirstActive = *llvm::find_if(DeinterleavedValues,
- [](Value *V) { return V != nullptr; });
- VectorType *VTy = cast<VectorType>(FirstActive->getType());
+ VectorType *VTy = cast<VectorType>(DISubtypes[0]);
const DataLayout &DL = LI->getModule()->getDataLayout();
bool UseScalable;
@@ -17513,7 +17512,10 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
Value *BaseAddr = LI->getPointerOperand();
+ Value *Result = nullptr;
if (NumLoads > 1) {
+ Result = PoisonValue::get(DI->getType());
+
// Create multiple legal small ldN.
SmallVector<Value *, 4> ExtractedLdValues(Factor, PoisonValue::get(VTy));
for (unsigned I = 0; I < NumLoads; ++I) {
@@ -17533,25 +17535,17 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
}
- // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
- for (unsigned J = 0; J < Factor; ++J) {
- if (DeinterleavedValues[J])
- DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]);
- }
+
+ for (unsigned J = 0; J < Factor; ++J)
+ Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
} else {
- Value *Result;
if (UseScalable)
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
else
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
- // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
- for (unsigned I = 0; I < Factor; I++) {
- if (DeinterleavedValues[I]) {
- Value *NewExtract = Builder.CreateExtractValue(Result, I);
- DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
- }
- }
}
+
+ DI->replaceAllUsesWith(Result);
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 65fe08e92c235..ca40884e3a5c0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -218,8 +218,8 @@ class AArch64TargetLowering : public TargetLowering {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- bool lowerDeinterleaveIntrinsicToLoad(
- LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override;
+ bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
+ IntrinsicInst *DI) const override;
bool lowerInterleaveIntrinsicToStore(
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 00e969056df7d..743745ba5d65f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -437,14 +437,14 @@ class RISCVTargetLowering : public TargetLowering {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- bool lowerDeinterleaveIntrinsicToLoad(
- LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override;
+ bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
+ IntrinsicInst *DI) const override;
bool lowerInterleaveIntrinsicToStore(
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveRes) const override;
+ const VectorDeinterleaving &VD) const override;
bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
ArrayRef<Value *> InterleaveOps) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index a6ff22c4b391f..044b1eae84f26 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -14,6 +14,7 @@
#include "RISCVISelLowering.h"
#include "RISCVSubtarget.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -233,17 +234,17 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
}
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
- LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const {
- const unsigned Factor = DeinterleaveValues.size();
+ LoadInst *LI, IntrinsicInst *DI) const {
+ VectorDeinterleaving VD(DI);
+ const unsigned Factor = VD.getFactor();
+ assert(Factor && "unexpected deinterleaving factor");
if (Factor > 8)
return false;
assert(LI->isSimple());
IRBuilder<> Builder(LI);
- Value *FirstActive =
- *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
- VectorType *ResVTy = cast<VectorType>(FirstActive->getType());
+ VectorType *ResVTy = cast<VectorType>(VD.getDeinterleavedType());
const DataLayout &DL = LI->getDataLayout();
@@ -293,16 +294,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
}
- for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) {
- if (!DIV)
- continue;
- // We have to create a brand new ExtractValue to replace each
- // of these old ExtractValue instructions.
- Value *NewEV =
- Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
- DIV->replaceAllUsesWith(NewEV);
- }
-
+ DI->replaceAllUsesWith(Return);
return true;
}
@@ -419,16 +411,14 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
/// dealing with factor of 2 (extractvalue is still required for most of other
/// factors though).
bool RISCVTargetLowering::lowerInterleavedVPLoad(
- VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveResults) const {
- const unsigned Factor = DeinterleaveResults.size();
+ VPIntrinsic *Load, Value *Mask, const VectorDeinterleaving &VD) const {
assert(Mask && "Expect a valid mask");
assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
"Unexpected intrinsic");
- Value *FirstActive = *llvm::find_if(DeinterleaveResults,
- [](Value *V) { return V != nullptr; });
- VectorType *VTy = cast<VectorType>(FirstActive->getType());
+ const unsigned Factor = VD.getFactor();
+ assert(Factor && "unexpected vector deinterleaving");
+ VectorType *VTy = cast<VectorType>(VD.getDeinterleavedType());
auto &DL = Load->getModule()->getDataLayout();
Align Alignment = Load->getParamAlign(0).value_or(
@@ -494,14 +484,18 @@ bool RISCVTargetLowering::lowerInterleavedVPLoad(
}
}
- for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
- if (!DIO)
- continue;
- // We have to create a brand new ExtractValue to replace each
- // of these old ExtractValue instructions.
- Value *NewEV =
- Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
- DIO->replaceAllUsesWith(NewEV);
+ if (VD.DI) {
+ VD.DI->replaceAllUsesWith(Return);
+ } else {
+ for (auto [Idx, DIO] : enumerate(VD.Values)) {
+ if (!DIO)
+ continue;
+ // We have to create a brand new ExtractValue to replace each
+ // of these old ExtractValue instructions.
+ Value *NewEV =
+ Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
+ DIO->replaceAllUsesWith(NewEV);
+ }
}
return true;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index 3e822d357b667..861423026b2eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -274,6 +274,59 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p
ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2
}
+define { <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_partial(ptr %p) {
+; CHECK-LABEL: vector_deinterleave_load_factor3_partial:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vmv1r.v v8, v7
+; CHECK-NEXT: ret
+ %vec = load <24 x i8>, ptr %p
+ %d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec)
+ %t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0
+ %t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2
+ %res0 = insertvalue { <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
+ %res1 = insertvalue { <8 x i8>, <8 x i8> } %res0, <8 x i8> %t2, 1
+ ret { <8 x i8>, <8 x i8> } %res1
+}
+
+; InterleavedAccess should kick in even if the users of deinterleave intrinsic is not extractvalue.
+define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_no_extract(ptr %p, ptr %p1, i1 %c) {
+; CHECK-LABEL: vector_deinterleave_load_factor3_no_extract:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a2, a2, 1
+; CHECK-NEXT: beqz a2, .LBB17_2
+; CHECK-NEXT: # %bb.1: # %bb0
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB17_2: # %bb1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlseg3e8.v v6, (a1)
+; CHECK-NEXT: ret
+ br i1 %c, label %bb0, label %bb1
+
+bb0:
+ %vec0 = load <24 x i8>, ptr %p
+ %d0.0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec0)
+ br label %merge
+
+bb1:
+ %vec1 = load <24 x i8>, ptr %p1
+ %d0.1 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec1)
+ br label %merge
+
+merge:
+ %d0 = phi {<8 x i8>, <8 x i8>, <8 x i8>} [%d0.0, %bb0], [%d0.1, %bb1]
+ %t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0
+ %t1 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 1
+ %t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2
+ %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
+ %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 0
+ %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 0
+ ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2
+}
+
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 7fb822d20f892..0fa8e05acc8ff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -66,6 +66,115 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor
ret { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
}
+define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_partial(ptr %ptr, i32 %evl) {
+; RV32-LABEL: load_factor3_partial:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: lui a2, 699051
+; RV32-NEXT: addi a2, a2, -1365
+; RV32-NEXT: mulhu a1, a1, a2
+; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT: vlseg3e32.v v7, (a0)
+; RV32-NEXT: vmv1r.v v8, v7
+; RV32-NEXT: ret
+;
+; RV64-LABEL: load_factor3_partial:
+; RV64: # %bb.0:
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: lui a2, 699051
+; RV64-NEXT: addi a2, a2, -1365
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: slli a2, a2, 32
+; RV64-NEXT: mulhu a1, a1, a2
+; RV64-NEXT: srli a1, a1, 33
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT: vlseg3e32.v v7, (a0)
+; RV64-NEXT: vmv1r.v v8, v7
+; RV64-NEXT: ret
+ %rvl = mul i32 %evl, 3
+ %wide.masked.load = call <vscale x 6 x i32> @llvm.vp.load(ptr %ptr, <vscale x 6 x i1> splat (i1 true), i32 %rvl)
+ %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3(<vscale x 6 x i32> %wide.masked.load)
+ %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
+ %t2 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 2
+ %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+ %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t2, 1
+ ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
+}
+
+; InterleavedAccess should kick in even if the users of deinterleave intrinsic is not extractvalue.
+define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_no_extract(ptr %ptr, i32 %evl) {
+; RV32-LABEL: load_factor3_no_extract:
+; RV32: # %bb.0:
+; RV32-NEXT: li a2, 12
+; RV32-NEXT: beq a1, a2, .LBB3_2
+; RV32-NEXT: # %bb.1: # %bb0
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: lui a2, 699051
+; RV32-NEXT: addi a2, a2, -1365
+; RV32-NEXT: mulhu a1, a1, a2
+; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT: vlseg3e32.v v7, (a0)
+; RV32-NEXT: j .LBB3_3
+; RV32-NEXT: .LBB3_2: # %bb1
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vlseg3e32.v v7, (a0)
+; RV32-NEXT: .LBB3_3: # %merge
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vmv1r.v v8, v7
+; RV32-NEXT: ret
+;
+; RV64-LABEL: load_factor3_no_extract:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a2, a1
+; RV64-NEXT: li a3, 12
+; RV64-NEXT: beq a2, a3, .LBB3_2
+; RV64-NEXT: # %bb.1: # %bb0
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: lui a2, 699051
+; RV64-NEXT: addi a2, a2, -1365
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: slli a2, a2, 32
+; RV64-NEXT: mulhu a1, a1, a2
+; RV64-NEXT: srli a1, a1, 33
+; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT: vlseg3e32.v v7, (a0)
+; RV64-NEXT: j .LBB3_3
+; RV64-NEXT: .LBB3_2: # %bb1
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vlseg3e32.v v7, (a0)
+; RV64-NEXT: .LBB3_3: # %merge
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vmv1r.v v8, v7
+; RV64-NEXT: ret
+ %p = icmp ne i32 %evl, 12
+ br i1 %p, label %bb0, label %bb1
+
+bb0:
+ %rvl.0 = mul i32 %evl, 3
+ %wide.load.0 = call <vscale x 6 x i32> @llvm.vp.load(ptr %ptr, <vscale x 6 x i1> splat (i1 true), i32 %rvl.0)
+ %deinterleaved.results.0 = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3(<vscale x 6 x i32> %wide.load.0)
+ br label %merge
+
+bb1:
+ %wide.load.1 = call <vscale x 6 x i32> @llvm.vp.load(ptr %ptr, <vscale x 6 x i1> splat (i1 true), i32 12)
+ %deinterleaved.results.1 = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3(<vscale x 6 x i32> %wide.load.1)
+ br label %merge
+
+merge:
+ %deinterleaved.results = phi { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [%deinterleaved.results.0, %bb0], [%deinterleaved.results.1, %bb1]
+ %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
+ %t2 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 2
+ %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+ %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t2, 1
+ ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
+}
+
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor4_v2(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor4_v2:
; RV32: # %bb.0:
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll
index 09e2c53465cd7..6c81d9a4d2ed6 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll
@@ -10,8 +10,8 @@ define void @deinterleave_i8_factor2(ptr %ptr) {
; NEON-LABEL: define void @deinterleave_i8_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[PTR]])
-; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 0
-; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_i8_factor2
@@ -33,8 +33,8 @@ define void @deinterleave_i16_factor2(ptr %ptr) {
; NEON-LABEL: define void @deinterleave_i16_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[PTR]])
-; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 0
-; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_i16_factor2
@@ -56,8 +56,8 @@ define void @deinterleave_8xi32_factor2(ptr %ptr) {
; NEON-LABEL: define void @deinterleave_8xi32_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[PTR]])
-; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
-; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_8xi32_factor2
@@ -79,8 +79,8 @@ define void @deinterleave_i64_factor2(ptr %ptr) {
; NEON-LABEL: define void @deinterleave_i64_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[PTR]])
-; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0
-; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_i64_factor2
@@ -102,8 +102,8 @@ define void @deinterleave_float_factor2(ptr %ptr) {
; NEON-LABEL: define void @deinterleave_float_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PTR]])
-; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
-; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_float_factor2
@@ -125,8 +125,8 @@ define void @deinterleave_double_factor2(ptr %ptr) {
; NEON-LABEL: define void @deinterleave_double_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr [[PTR]])
-; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 0
-; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_double_factor2
@@ -148,8 +148,8 @@ define void @deinterleave_ptr_factor2(ptr %ptr) {
; NEON-LABEL: define void @deinterleave_ptr_factor2
; NEON-SAME: (ptr [[PTR:%.*]]) {
; NEON-NEXT: [[LDN:%.*]] = call { <2 x ptr>, <2 x ptr> } @llvm.aarch64.neon.ld2.v2p0.p0(ptr [[PTR]])
-; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 0
-; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_ptr_factor2
@@ -301,6 +301,10 @@ define void @deinterleave_wide_i16_factor2(ptr %ptr) #0 {
; NEON-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP3]], <8 x i16> [[TMP7]], i64 8)
; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN1]], 1
; NEON-NEXT: [[TMP10:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP5]], <8 x i16> [[TMP9]], i64 8)
+; NEON-NEXT: [[TMP11:%.*]] = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> [[TMP8]], 0
+; NEON-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i16>, <16 x i16> } [[TMP11]], <16 x i16> [[TMP10]], 1
+; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <16 x i16>, <16 x i16> } [[TMP12]], 0
+; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <16 x i16>, <16 x i16> } [[TMP12]], 1
; NEON-NEXT: ret void
;
; SVE-FIXED-LABEL: define void @deinterleave_wide_i16_factor2
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
index 436389ba5b991..d7649801ea2fc 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
@@ -8,8 +8,8 @@ define void @deinterleave_nxi8_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @deinterleave_nxi8_factor2
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> splat (i1 true), ptr [[PTR]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[LDN]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[LDN]], 0
; CHECK-NEXT: ret void
;
%load = load <vscale x 32 x i8>, ptr %ptr, align 1
@@ -23,8 +23,8 @@ define void @deinterleave_nxi16_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @deinterleave_nxi16_factor2
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 16 x i16>, ptr %ptr, align 2
@@ -38,8 +38,8 @@ define void @deinterleave_nx8xi32_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @deinterleave_nx8xi32_factor2
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 8 x i32>, ptr %ptr, align 4
@@ -53,8 +53,8 @@ define void @deinterleave_nxi64_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @deinterleave_nxi64_factor2
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 4 x i64>, ptr %ptr, align 8
@@ -68,8 +68,8 @@ define void @deinterleave_nxfloat_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @deinterleave_nxfloat_factor2
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 8 x float>, ptr %ptr, align 4
@@ -83,8 +83,8 @@ define void @deinterleave_nxdouble_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @deinterleave_nxdouble_factor2
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 4 x double>, ptr %ptr, align 8
@@ -98,8 +98,8 @@ define void @deinterleave_nxptr_factor2(ptr %ptr) #0 {
; CHECK-LABEL: define void @deinterleave_nxptr_factor2
; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @llvm.aarch64.sve.ld2.sret.nxv2p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x ptr>, <vscale x 2 x ptr> } [[LDN]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x ptr>, <vscale x 2 x ptr> } [[LDN]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 2 x ptr>, <vscale x 2 x ptr> } [[LDN]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 2 x ptr>, <vscale x 2 x ptr> } [[LDN]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 4 x ptr>, ptr %ptr, align 8
@@ -215,6 +215,10 @@ define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 {
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP13]], <vscale x 4 x i32> [[TMP17]], i64 12)
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN3]], 1
; CHECK-NEXT: [[TMP20:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP15]], <vscale x 4 x i32> [[TMP19]], i64 12)
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } poison, <vscale x 16 x i32> [[TMP18]], 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } [[TMP21]], <vscale x 16 x i32> [[TMP20]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } [[TMP22]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } [[TMP22]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 32 x i32>, ptr %ptr, align 4
@@ -239,6 +243,10 @@ define void @deinterleave_wide_nxdouble_factor2(ptr %ptr) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP7]], i64 2)
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 1
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP5]], <vscale x 2 x double> [[TMP9]], i64 2)
+; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } poison, <vscale x 4 x double> [[TMP8]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP11]], <vscale x 4 x double> [[TMP10]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP12]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP12]], 1
; CHECK-NEXT: ret void
;
%load = load <vscale x 8 x double>, ptr %ptr, align 8
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll
index c565066541d1d..58c0bccc3be38 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll
@@ -49,8 +49,16 @@ define void @wide_deinterleave4(ptr %src) {
; CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP15]], i64 4)
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 3
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP9]], <vscale x 4 x i32> [[TMP17]], i64 4)
-; CHECK-NEXT: [[SUM:%.*]] = add <vscale x 8 x i32> [[TMP12]], [[TMP14]]
-; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 8 x i32> [[TMP16]], [[TMP18]]
+; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } poison, <vscale x 8 x i32> [[TMP12]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } [[TMP19]], <vscale x 8 x i32> [[TMP14]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } [[TMP20]], <vscale x 8 x i32> [[TMP16]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } [[TMP21]], <vscale x 8 x i32> [[TMP18]], 3
+; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } [[TMP22]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } [[TMP22]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } [[TMP22]], 2
+; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32> } [[TMP22]], 3
+; CHECK-NEXT: [[SUM:%.*]] = add <vscale x 8 x i32> [[TMP23]], [[TMP24]]
+; CHECK-NEXT: [[SUB:%.*]] = sub <vscale x 8 x i32> [[TMP25]], [[TMP26]]
; CHECK-NEXT: ret void
;
%load = load <vscale x 32 x i32>, ptr %src, align 4
@@ -73,8 +81,8 @@ define void @mix_deinterleave4_deinterleave2(ptr %src) {
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3
; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[SRC]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 1
+; CHECK-NEXT: [[LD2_1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 0
+; CHECK-NEXT: [[LD2_2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 1
; CHECK-NEXT: ret void
;
@@ -95,12 +103,11 @@ define void @mix_deinterleave4_deinterleave2(ptr %src) {
define void @negative_deinterleave4_test(ptr %src) {
; CHECK-LABEL: define void @negative_deinterleave4_test
; CHECK-SAME: (ptr [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i32>, ptr [[SRC]], align 4
-; CHECK-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> [[LOAD]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[DEINTERLEAVE]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[DEINTERLEAVE]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[DEINTERLEAVE]], 2
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[DEINTERLEAVE]], 2
+; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[SRC]])
+; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2
; CHECK-NEXT: ret void
;
%load = load <vscale x 16 x i32>, ptr %src, align 4
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
index b109448bd5d7c..1418ca09c0d61 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
@@ -606,6 +606,10 @@ define void @deinterleave_nxptr_factor2(ptr %ptr) #2 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP7]], i64 2)
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN2]], 1
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP5]], <vscale x 2 x double> [[TMP9]], i64 2)
+; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } poison, <vscale x 4 x double> [[TMP8]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP11]], <vscale x 4 x double> [[TMP10]], 1
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP12]], 0
+; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP12]], 1
; CHECK-NEXT: ret void
;
%wide.vec = load <vscale x 8 x double>, ptr %ptr, align 8
>From 5106d8b9552e22e8af4415d98f2b3dadc914cfb5 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Mon, 14 Jul 2025 13:28:41 -0700
Subject: [PATCH 2/5] fixup! [IA] Do not require ExtractValueInst as users for
deinterleave intrinsics
---
llvm/include/llvm/CodeGen/TargetLowering.h | 5 +++--
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 +++++---
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f3cf784afb540..b6e0fa13a99f6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3229,7 +3229,8 @@ class LLVM_ABI TargetLoweringBase {
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
- /// \p DeinterleaveRes is a list of deinterleaved results.
+ /// \p VD represents either a deinterleave intrinsic or a list of
+ /// deinterleaved values.
virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
const VectorDeinterleaving &VD) const {
return false;
@@ -3251,7 +3252,7 @@ class LLVM_ABI TargetLoweringBase {
/// llvm.vector.deinterleave{2,3,5,7}
///
/// \p LI is the accompanying load instruction.
- /// \p DeinterleaveValues contains the deinterleaved values.
+ /// \p DI represents the deinterleave intrinsic.
virtual bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
IntrinsicInst *DI) const {
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ff156f4f04c5d..11ae8605c5a71 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17477,14 +17477,14 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
LoadInst *LI, IntrinsicInst *DI) const {
- ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
- const unsigned Factor = DISubtypes.size();
+ VectorDeinterleaving VD(DI);
+ const unsigned Factor = VD.getFactor();
if (Factor != 2 && Factor != 4) {
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
return false;
}
- VectorType *VTy = cast<VectorType>(DISubtypes[0]);
+ VectorType *VTy = cast<VectorType>(VD.getDeinterleavedType());
const DataLayout &DL = LI->getModule()->getDataLayout();
bool UseScalable;
@@ -17536,6 +17536,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
}
+ // Merge the values from different factors.
for (unsigned J = 0; J < Factor; ++J)
Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
} else {
@@ -17545,6 +17546,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
}
+ // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
DI->replaceAllUsesWith(Result);
return true;
}
>From fd0de02dae233ac3e68c099e5748124d6272116c Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Mon, 14 Jul 2025 14:40:13 -0700
Subject: [PATCH 3/5] fixup! Address review comments
---
llvm/lib/Analysis/VectorUtils.cpp | 15 +++++++--------
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 +--
.../RISCV/rvv/fixed-vectors-deinterleave-load.ll | 2 +-
.../RISCV/rvv/vp-vector-interleaved-access.ll | 2 +-
4 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index cd2ab0edf6fd3..01b3aecf57ac3 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -310,19 +310,18 @@ unsigned VectorDeinterleaving::getFactor() const {
assert(isValid());
if (DI)
return getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
- else
- return Values.size();
+
+ return Values.size();
}
Type *VectorDeinterleaving::getDeinterleavedType() const {
assert(getFactor() > 0);
- if (DI) {
+ if (DI)
return *DI->getType()->subtype_begin();
- } else {
- Value *FirstActive =
- *llvm::find_if(Values, [](Value *V) { return V != nullptr; });
- return FirstActive->getType();
- }
+
+ Value *FirstActive =
+ *llvm::find_if(Values, [](Value *V) { return V != nullptr; });
+ return FirstActive->getType();
}
/// Given a vector and an element number, see if the scalar value is
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 11ae8605c5a71..6278a73e7ddfb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17514,8 +17514,6 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Value *BaseAddr = LI->getPointerOperand();
Value *Result = nullptr;
if (NumLoads > 1) {
- Result = PoisonValue::get(DI->getType());
-
// Create multiple legal small ldN.
SmallVector<Value *, 4> ExtractedLdValues(Factor, PoisonValue::get(VTy));
for (unsigned I = 0; I < NumLoads; ++I) {
@@ -17537,6 +17535,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
// Merge the values from different factors.
+ Result = PoisonValue::get(DI->getType());
for (unsigned J = 0; J < Factor; ++J)
Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
} else {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index 861423026b2eb..807651c9b40c6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -290,7 +290,7 @@ define { <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_partial(ptr %p)
ret { <8 x i8>, <8 x i8> } %res1
}
-; InterleavedAccess should kick in even if the users of deinterleave intrinsic is not extractvalue.
+; InterleavedAccess should kick in even if the users of deinterleave intrinsic are not extractvalue.
define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_no_extract(ptr %p, ptr %p1, i1 %c) {
; CHECK-LABEL: vector_deinterleave_load_factor3_no_extract:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 0fa8e05acc8ff..27ecbe56bda42 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -104,7 +104,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_partial(ptr %ptr,
ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
}
-; InterleavedAccess should kick in even if the users of deinterleave intrinsic is not extractvalue.
+; InterleavedAccess should kick in even if the users of deinterleave intrinsic are not extractvalue.
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_no_extract(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor3_no_extract:
; RV32: # %bb.0:
>From 445a187684db80d7cfe7a19cb1ca0f2d4c0c5cad Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 16 Jul 2025 11:06:52 -0700
Subject: [PATCH 4/5] fixup! Rebase to latest TLI hooks
---
llvm/include/llvm/Analysis/VectorUtils.h | 22 ++----------
llvm/include/llvm/CodeGen/TargetLowering.h | 6 ++--
llvm/lib/Analysis/VectorUtils.cpp | 23 ++++--------
llvm/lib/CodeGen/InterleavedAccessPass.cpp | 11 +++---
.../Target/AArch64/AArch64ISelLowering.cpp | 5 ++-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +-
.../Target/RISCV/RISCVInterleavedAccess.cpp | 35 +++++++++----------
7 files changed, 36 insertions(+), 68 deletions(-)
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 8d7407949b3f0..00cbfde107db7 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -23,8 +23,8 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
-class IntrinsicInst;
class TargetLibraryInfo;
+class IntrinsicInst;
/// The Vector Function Database.
///
@@ -189,24 +189,8 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);
/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID);
-/// A vector can either be deinterleaved through an intrinsic or a combination
-/// of shufflevector instructions. This is a thin abstraction layer to provide
-/// some common information like the deinterleaving factor.
-struct VectorDeinterleaving {
- IntrinsicInst *DI = nullptr;
- ArrayRef<Value *> Values;
-
- unsigned getFactor() const;
-
- Type *getDeinterleavedType() const;
-
- explicit VectorDeinterleaving(IntrinsicInst *DI) : DI(DI) {}
- explicit VectorDeinterleaving(ArrayRef<Value *> Values) : Values(Values) {}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- bool isValid() const { return (DI != nullptr) ^ !Values.empty(); }
-#endif
-};
+/// Given a deinterleaveN intrinsic, return the (narrow) type of each factor.
+LLVM_ABI Type *getDeinterleavedVectorType(IntrinsicInst *DI);
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 25106a7fb1f25..06aa66670c7e7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -96,7 +96,6 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class TargetTransformInfo;
class Value;
-struct VectorDeinterleaving;
class VPIntrinsic;
namespace Sched {
@@ -3229,10 +3228,9 @@ class LLVM_ABI TargetLoweringBase {
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
- /// \p VD represents either a deinterleave intrinsic or a list of
- /// deinterleaved values.
+ /// \p DeinterleaveRes is a list of deinterleaved results.
virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
- const VectorDeinterleaving &VD) const {
+ ArrayRef<Value *> DeinterleaveRes) const {
return false;
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 01b3aecf57ac3..a505743caf655 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -306,22 +306,13 @@ unsigned llvm::getDeinterleaveIntrinsicFactor(Intrinsic::ID ID) {
}
}
-unsigned VectorDeinterleaving::getFactor() const {
- assert(isValid());
- if (DI)
- return getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
-
- return Values.size();
-}
-
-Type *VectorDeinterleaving::getDeinterleavedType() const {
- assert(getFactor() > 0);
- if (DI)
- return *DI->getType()->subtype_begin();
-
- Value *FirstActive =
- *llvm::find_if(Values, [](Value *V) { return V != nullptr; });
- return FirstActive->getType();
+Type *llvm::getDeinterleavedVectorType(IntrinsicInst *DI) {
+ [[maybe_unused]] unsigned Factor =
+ getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
+ ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
+ assert(Factor && Factor == DISubtypes.size() &&
+ "unexpected deinterleave factor or result type");
+ return DISubtypes[0];
}
/// Given a vector and an element number, see if the scalar value is
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index b77874b9a1fbb..4db2f87b4777d 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -381,8 +381,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
SmallVector<Value *, 4> ShuffleValues(Factor, nullptr);
for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
- VectorDeinterleaving VD(ShuffleValues);
- if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, VD))
+ if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues))
// If Extracts is not empty, tryReplaceExtracts made changes earlier.
return !Extracts.empty() || BinOpShuffleChanged;
} else {
@@ -616,17 +615,17 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
return false;
- VectorDeinterleaving VD(DI);
- const unsigned Factor = VD.getFactor();
+ const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
assert(Factor && "unexpected deinterleave intrinsic");
+ Value *Mask = nullptr;
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
return false;
// Check mask operand. Handle both all-true/false and interleaved mask.
Value *WideMask = VPLoad->getOperand(1);
- Value *Mask =
- getMask(WideMask, Factor, cast<VectorType>(VD.getDeinterleavedType()));
+ Mask = getMask(WideMask, Factor,
+ cast<VectorType>(getDeinterleavedVectorType(DI)));
if (!Mask)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5a6d45c5900d8..fa5d0a509d66a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17487,8 +17487,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
- VectorDeinterleaving VD(DI);
- const unsigned Factor = VD.getFactor();
+ const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
if (Factor != 2 && Factor != 4) {
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
return false;
@@ -17498,7 +17497,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
return false;
assert(!Mask && "Unexpected mask on a load\n");
- VectorType *VTy = cast<VectorType>(VD.getDeinterleavedType());
+ VectorType *VTy = cast<VectorType>(getDeinterleavedVectorType(DI));
const DataLayout &DL = LI->getModule()->getDataLayout();
bool UseScalable;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5fc7b57a7ea3c..e8adf561c9c35 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -444,7 +444,7 @@ class RISCVTargetLowering : public TargetLowering {
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
- const VectorDeinterleaving &VD) const override;
+ ArrayRef<Value *> DeinterleaveRes) const override;
bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
ArrayRef<Value *> InterleaveOps) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index a4b68bf201bf3..7b79d203e962e 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -258,15 +258,14 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
- VectorDeinterleaving VD(DI);
- const unsigned Factor = VD.getFactor();
+ const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
assert(Factor && "unexpected deinterleaving factor");
if (Factor > 8)
return false;
IRBuilder<> Builder(Load);
- VectorType *ResVTy = cast<VectorType>(VD.getDeinterleavedType());
+ VectorType *ResVTy = cast<VectorType>(getDeinterleavedVectorType(DI));
const DataLayout &DL = Load->getDataLayout();
auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
@@ -438,14 +437,16 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
/// dealing with factor of 2 (extractvalue is still required for most of other
/// factors though).
bool RISCVTargetLowering::lowerInterleavedVPLoad(
- VPIntrinsic *Load, Value *Mask, const VectorDeinterleaving &VD) const {
+ VPIntrinsic *Load, Value *Mask,
+ ArrayRef<Value *> DeinterleaveResults) const {
+ const unsigned Factor = DeinterleaveResults.size();
assert(Mask && "Expect a valid mask");
assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
"Unexpected intrinsic");
- const unsigned Factor = VD.getFactor();
- assert(Factor && "unexpected vector deinterleaving");
- VectorType *VTy = cast<VectorType>(VD.getDeinterleavedType());
+ Value *FirstActive = *llvm::find_if(DeinterleaveResults,
+ [](Value *V) { return V != nullptr; });
+ VectorType *VTy = cast<VectorType>(FirstActive->getType());
auto &DL = Load->getModule()->getDataLayout();
Align Alignment = Load->getParamAlign(0).value_or(
@@ -509,18 +510,14 @@ bool RISCVTargetLowering::lowerInterleavedVPLoad(
}
}
- if (VD.DI) {
- VD.DI->replaceAllUsesWith(Return);
- } else {
- for (auto [Idx, DIO] : enumerate(VD.Values)) {
- if (!DIO)
- continue;
- // We have to create a brand new ExtractValue to replace each
- // of these old ExtractValue instructions.
- Value *NewEV =
- Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
- DIO->replaceAllUsesWith(NewEV);
- }
+ for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
+ if (!DIO)
+ continue;
+ // We have to create a brand new ExtractValue to replace each
+ // of these old ExtractValue instructions.
+ Value *NewEV =
+ Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
+ DIO->replaceAllUsesWith(NewEV);
}
return true;
>From aec05c4084e8a25a2d9077784cc181637af1cff1 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 16 Jul 2025 11:16:19 -0700
Subject: [PATCH 5/5] fixup! fixup! Rebase to latest TLI hooks
---
llvm/include/llvm/CodeGen/TargetLowering.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 06aa66670c7e7..238d07a20eec8 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3251,7 +3251,7 @@ class LLVM_ABI TargetLoweringBase {
///
/// \p Load is the accompanying load instruction. Can be either a plain load
/// instruction or a vp.load intrinsic.
- /// \p DeinterleaveValues contains the deinterleaved values.
+ /// \p DI represents the deinterleaveN intrinsic.
virtual bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const {
return false;
More information about the llvm-commits
mailing list