[llvm] [IA] Generalize the support for power-of-two (de)interleave intrinsics (PR #123863)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 11:30:52 PST 2025
================
@@ -478,23 +479,184 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
return true;
}
+// For an (de)interleave tree like this:
+//
+// A C B D
+// |___| |___|
+// |_____|
+// |
+// A B C D
+//
+// We will get ABCD at the end while the leaf operands/results
+// are ACBD, which are also what we initially collected in
+// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
+// hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
+// to reorder them by interleaving these values.
+static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
+ int NumLeaves = SubLeaves.size();
+ if (NumLeaves == 2)
+ return;
+
+ assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
+
+ const int HalfLeaves = NumLeaves / 2;
+ // Visit the sub-trees.
+ interleaveLeafValues(SubLeaves.take_front(HalfLeaves));
+ interleaveLeafValues(SubLeaves.drop_front(HalfLeaves));
+
+ SmallVector<Value *, 8> Buffer;
+ // The step is alternating between +half and -half+1. We exit the
+ // loop right before the last element because given the fact that
+ // SubLeaves always has an even number of elements, the last element
+ // will never be moved and the last to be visited. This simplifies
+ // the exit condition.
+ for (int i = 0; i < NumLeaves - 1;
+ (i < HalfLeaves) ? i += HalfLeaves : i += (1 - HalfLeaves))
+ Buffer.push_back(SubLeaves[i]);
+
+ llvm::copy(Buffer, SubLeaves.begin());
+}
+
+static bool
+getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
+ SmallVectorImpl<Instruction *> &DeadInsts) {
+ if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
+ return false;
+
+ // Visit with BFS
+ SmallVector<IntrinsicInst *, 8> Queue;
+ Queue.push_back(II);
+ while (!Queue.empty()) {
+ IntrinsicInst *Current = Queue.front();
+ Queue.erase(Queue.begin());
+
+ // All the intermediate intrinsics will be deleted.
+ DeadInsts.push_back(Current);
+
+ for (unsigned I = 0; I < 2; ++I) {
+ Value *Op = Current->getOperand(I);
+ if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
+ if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
+ Queue.push_back(OpII);
+ continue;
+ }
+
+ // If this is not a perfectly balanced tree, the leaf
+ // result types would be different.
+ if (!Operands.empty() && Op->getType() != Operands.back()->getType())
+ return false;
+
+ Operands.push_back(Op);
+ }
+ }
+
+ const unsigned Factor = Operands.size();
+ // Currently we only recognize power-of-two factors.
+ // FIXME: should we assert here instead?
+ if (Factor <= 1 || !isPowerOf2_32(Factor))
+ return false;
+
+ interleaveLeafValues(Operands);
+ return true;
+}
+
+static bool
+getVectorDeinterleaveFactor(IntrinsicInst *II,
+ SmallVectorImpl<Value *> &Results,
+ SmallVectorImpl<Instruction *> &DeadInsts) {
+ using namespace PatternMatch;
+ if (II->getIntrinsicID() != Intrinsic::vector_deinterleave2 ||
----------------
mshockwave wrote:
Yes, since we've already checked the intrinsic ID before calling this function. It's fixed now.
https://github.com/llvm/llvm-project/pull/123863
More information about the llvm-commits
mailing list