[llvm] [IA] Generalize the support for power-of-two (de)interleave intrinsics (PR #123863)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 11:31:50 PST 2025
================
@@ -17464,142 +17464,9 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
return true;
}
-bool getDeinterleave2Values(
- Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
- SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
- if (!DI->hasNUses(2))
- return false;
- auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
- auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
- if (!Extr1 || !Extr2)
- return false;
-
- DeinterleavedValues.resize(2);
- // Place the values into the vector in the order of extraction:
- DeinterleavedValues[0x1 & (Extr1->getIndices()[0])] = Extr1;
- DeinterleavedValues[0x1 & (Extr2->getIndices()[0])] = Extr2;
- if (!DeinterleavedValues[0] || !DeinterleavedValues[1])
- return false;
-
- // Make sure that the extracted values match the deinterleave tree pattern
- if (!match(DeinterleavedValues[0], m_ExtractValue<0>((m_Specific(DI)))) ||
- !match(DeinterleavedValues[1], m_ExtractValue<1>((m_Specific(DI))))) {
- LLVM_DEBUG(dbgs() << "matching deinterleave2 failed\n");
- return false;
- }
- // DeinterleavedValues will be replace by output of ld2
- DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
- DeinterleavedValues.begin(),
- DeinterleavedValues.end());
- return true;
-}
-
-/*
-DeinterleaveIntrinsic tree:
- [DI]
- / \
- [Extr<0>] [Extr<1>]
- | |
- [DI] [DI]
- / \ / \
- [Extr<0>][Extr<1>] [Extr<0>][Extr<1>]
- | | | |
-roots: A C B D
-roots in correct order of DI4 will be: A B C D.
-Returns true if `DI` is the top of an IR tree that represents a theoretical
-vector.deinterleave4 intrinsic. When true is returned, \p `DeinterleavedValues`
-vector is populated with the results such an intrinsic would return: (i.e. {A,
-B, C, D } = vector.deinterleave4(...))
-*/
-bool getDeinterleave4Values(
- Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
- SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
- if (!DI->hasNUses(2))
- return false;
- auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
- auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
- if (!Extr1 || !Extr2)
- return false;
-
- if (!Extr1->hasOneUse() || !Extr2->hasOneUse())
- return false;
- auto *DI1 = *(Extr1->user_begin());
- auto *DI2 = *(Extr2->user_begin());
-
- if (!DI1->hasNUses(2) || !DI2->hasNUses(2))
- return false;
- // Leaf nodes of the deinterleave tree:
- auto *A = dyn_cast<ExtractValueInst>(*(DI1->user_begin()));
- auto *C = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
- auto *B = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
- auto *D = dyn_cast<ExtractValueInst>(*(++DI2->user_begin()));
- // Make sure that the A,B,C and D are ExtractValue instructions before getting
- // the extract index
- if (!A || !B || !C || !D)
- return false;
-
- DeinterleavedValues.resize(4);
- // Place the values into the vector in the order of deinterleave4:
- DeinterleavedValues[0x3 &
- ((A->getIndices()[0] * 2) + Extr1->getIndices()[0])] = A;
- DeinterleavedValues[0x3 &
- ((B->getIndices()[0] * 2) + Extr2->getIndices()[0])] = B;
- DeinterleavedValues[0x3 &
- ((C->getIndices()[0] * 2) + Extr1->getIndices()[0])] = C;
- DeinterleavedValues[0x3 &
- ((D->getIndices()[0] * 2) + Extr2->getIndices()[0])] = D;
- if (!DeinterleavedValues[0] || !DeinterleavedValues[1] ||
- !DeinterleavedValues[2] || !DeinterleavedValues[3])
- return false;
-
- // Make sure that A,B,C,D match the deinterleave tree pattern
- if (!match(DeinterleavedValues[0], m_ExtractValue<0>(m_Deinterleave2(
- m_ExtractValue<0>(m_Specific(DI))))) ||
- !match(DeinterleavedValues[1], m_ExtractValue<0>(m_Deinterleave2(
- m_ExtractValue<1>(m_Specific(DI))))) ||
- !match(DeinterleavedValues[2], m_ExtractValue<1>(m_Deinterleave2(
- m_ExtractValue<0>(m_Specific(DI))))) ||
- !match(DeinterleavedValues[3], m_ExtractValue<1>(m_Deinterleave2(
- m_ExtractValue<1>(m_Specific(DI)))))) {
- LLVM_DEBUG(dbgs() << "matching deinterleave4 failed\n");
- return false;
- }
-
- // These Values will not be used anymore,
- // DI4 will be created instead of nested DI1 and DI2
- DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
- DeinterleavedValues.begin(),
- DeinterleavedValues.end());
- DeInterleaveDeadInsts.push_back(cast<Instruction>(DI1));
- DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr1));
- DeInterleaveDeadInsts.push_back(cast<Instruction>(DI2));
- DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr2));
-
- return true;
-}
-
-bool getDeinterleavedValues(
- Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
- SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
- if (getDeinterleave4Values(DI, DeinterleavedValues, DeInterleaveDeadInsts))
- return true;
- return getDeinterleave2Values(DI, DeinterleavedValues, DeInterleaveDeadInsts);
-}
-
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
IntrinsicInst *DI, LoadInst *LI,
- SmallVectorImpl<Instruction *> &DeadInsts) const {
- // Only deinterleave2 supported at present.
- if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
- return false;
-
- SmallVector<Instruction *, 4> DeinterleavedValues;
- SmallVector<Instruction *, 8> DeInterleaveDeadInsts;
-
- if (!getDeinterleavedValues(DI, DeinterleavedValues, DeInterleaveDeadInsts)) {
- LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
- return false;
- }
+ ArrayRef<Value *> DeinterleavedValues) const {
unsigned Factor = DeinterleavedValues.size();
assert((Factor == 2 || Factor == 4) &&
"Currently supported Factor is 2 or 4 only");
----------------
mshockwave wrote:
You're right, I've turned this into a check and brought back the debug print from the original code.
https://github.com/llvm/llvm-project/pull/123863
More information about the llvm-commits
mailing list