[llvm] [IA]: Construct (de)interleave4 out of (de)interleave2 (PR #89276)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 8 05:44:07 PDT 2024
================
@@ -16906,17 +16906,148 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
return true;
}
+bool getDeinterleave2Values(
+ Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
+ SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
+ if (!DI->hasNUses(2))
+ return false;
+ auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
+ auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
+ if (!Extr1 || !Extr2)
+ return false;
+
+ DeinterleavedValues.resize(2);
+ // Place the values into the vector in the order of extraction:
+ DeinterleavedValues[0x1 & (Extr1->getIndices()[0])] = Extr1;
+ DeinterleavedValues[0x1 & (Extr2->getIndices()[0])] = Extr2;
+ if (!DeinterleavedValues[0] || !DeinterleavedValues[1])
+ return false;
+
+ // Make sure that the extracted values match the deinterleave tree pattern
+ if (!match(DeinterleavedValues[0], m_ExtractValue<0>((m_Specific(DI)))) ||
+ !match(DeinterleavedValues[1], m_ExtractValue<1>((m_Specific(DI))))) {
+ LLVM_DEBUG(dbgs() << "matching deinterleave2 failed\n");
+ return false;
+ }
+ // DeinterleavedValues will be replace by output of ld2
+ DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
+ DeinterleavedValues.begin(),
+ DeinterleavedValues.end());
+ return true;
+}
+
+/*
+DeinterleaveIntrinsic tree:
+ [DI]
+ / \
+ [Extr<0>] [Extr<1>]
+ | |
+ [DI] [DI]
+ / \ / \
+ [Extr<0>][Extr<1>] [Extr<0>][Extr<1>]
+ | | | |
+roots: A C B D
+roots in correct order of DI4 will be: A B C D.
+Returns true if `DI` is the top of an IR tree that represents a theoretical
+vector.deinterleave4 intrinsic. When true is returned, `DeinterleavedValues`
+vector is populated with the results such an intrinsic would return: (i.e. {A,
+B, C, D } = vector.deinterleave4(...))
+*/
+bool getDeinterleave4Values(
+ Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
+ SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
+ if (!DI->hasNUses(2))
+ return false;
+ auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
+ auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
+ if (!Extr1 || !Extr2)
+ return false;
+
+ if (!Extr1->hasOneUse() || !Extr2->hasOneUse())
+ return false;
+ auto *DI1 = *(Extr1->user_begin());
+ auto *DI2 = *(Extr2->user_begin());
+
+ if (!DI1->hasNUses(2) || !DI2->hasNUses(2))
+ return false;
+ // Leaf nodes of the deinterleave tree:
+ auto *A = dyn_cast<ExtractValueInst>(*(DI1->user_begin()));
+ auto *C = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
+ auto *B = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
+ auto *D = dyn_cast<ExtractValueInst>(*(++DI2->user_begin()));
+ // Make sure that the A,B,C and D are ExtractValue instructions before getting
+ // the extract index
+ if (!A || !B || !C || !D)
+ return false;
+
+ DeinterleavedValues.resize(4);
+ // Place the values into the vector in the order of deinterleave4:
+ DeinterleavedValues[0x3 &
+ ((A->getIndices()[0] * 2) + Extr1->getIndices()[0])] = A;
+ DeinterleavedValues[0x3 &
+ ((B->getIndices()[0] * 2) + Extr2->getIndices()[0])] = B;
+ DeinterleavedValues[0x3 &
+ ((C->getIndices()[0] * 2) + Extr1->getIndices()[0])] = C;
+ DeinterleavedValues[0x3 &
+ ((D->getIndices()[0] * 2) + Extr2->getIndices()[0])] = D;
+ if (!DeinterleavedValues[0] || !DeinterleavedValues[1] ||
+ !DeinterleavedValues[2] || !DeinterleavedValues[3])
+ return false;
+
+ // Make sure that A,B,C,D match the deinterleave tree pattern
+ if (!match(DeinterleavedValues[0], m_ExtractValue<0>(m_Deinterleave2(
+ m_ExtractValue<0>(m_Specific(DI))))) ||
+ !match(DeinterleavedValues[1], m_ExtractValue<0>(m_Deinterleave2(
+ m_ExtractValue<1>(m_Specific(DI))))) ||
+ !match(DeinterleavedValues[2], m_ExtractValue<1>(m_Deinterleave2(
+ m_ExtractValue<0>(m_Specific(DI))))) ||
+ !match(DeinterleavedValues[3], m_ExtractValue<1>(m_Deinterleave2(
+ m_ExtractValue<1>(m_Specific(DI)))))) {
+ LLVM_DEBUG(dbgs() << "matching deinterleave4 failed\n");
+ return false;
+ }
+
+ // These Values will not be used anymore,
+ // DI4 will be created instead of nested DI1 and DI2
+ DeInterleaveDeadInsts.insert(DeInterleaveDeadInsts.end(),
+ DeinterleavedValues.begin(),
+ DeinterleavedValues.end());
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(DI1));
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr1));
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(DI2));
+ DeInterleaveDeadInsts.push_back(cast<Instruction>(Extr2));
+
+ return true;
+}
+
+bool getDeinterleavedValues(
+ Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues,
+ SmallVectorImpl<Instruction *> &DeInterleaveDeadInsts) {
+ if (getDeinterleave4Values(DI, DeinterleavedValues, DeInterleaveDeadInsts))
+ return true;
+ return getDeinterleave2Values(DI, DeinterleavedValues, DeInterleaveDeadInsts);
+}
+
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
- IntrinsicInst *DI, LoadInst *LI) const {
+ IntrinsicInst *DI, LoadInst *LI,
+ SmallVectorImpl<Instruction *> &DeadInsts) const {
// Only deinterleave2 supported at present.
if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
return false;
- // Only a factor of 2 supported at present.
- const unsigned Factor = 2;
+ SmallVector<Instruction *, 4> DeinterleavedValues;
+ SmallVector<Instruction *, 4> DeInterleaveDeadInsts;
----------------
paulwalker-arm wrote:
Is 8 a better size for `DeInterleaveDeadInsts` since that's the number of dead instructions expected for deinterleave4?
https://github.com/llvm/llvm-project/pull/89276
More information about the llvm-commits
mailing list