[llvm] [IA][RISCV] Support VP intrinsics in InterleavedAccessPass (PR #120490)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 15:54:42 PST 2024
================
@@ -248,6 +249,186 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
+// For an (de)interleave tree like this:
+//
+// A C B D
+// |___| |___|
+// |_____|
+// |
+// A B C D
+//
+// We will get ABCD at the end while the leave operands/results
+// are ACBD, which are also what we initially collected in
+// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
+// hooks (e.g. lowerInterleavedScalableLoad) expect ABCD, so we need
+// to reorder them by interleaving these values.
+static void interleaveLeafValues(SmallVectorImpl<Value *> &Leaves) {
+ unsigned Factor = Leaves.size();
+ assert(isPowerOf2_32(Factor) && Factor <= 8 && Factor > 1);
+
+ if (Factor == 2)
+ return;
+
+ SmallVector<Value *, 8> Buffer;
+ if (Factor == 4) {
+ for (unsigned SrcIdx : {0, 2, 1, 3})
+ Buffer.push_back(Leaves[SrcIdx]);
+ } else {
+ // Factor of 8.
+ //
+ // A E C G B F D H
+ // |_| |_| |_| |_|
+ // |___| |___|
+ // |_______|
+ // |
+ // A B C D E F G H
+ for (unsigned SrcIdx : {0, 4, 2, 6, 1, 5, 3, 7})
+ Buffer.push_back(Leaves[SrcIdx]);
+ }
+
+ llvm::copy(Buffer, Leaves.begin());
+}
+
+static unsigned getVectorInterleaveFactor(IntrinsicInst *II,
+ SmallVectorImpl<Value *> &Operands) {
+ if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
+ return 0;
+
+ unsigned Factor = 0;
+
+ // Visit with BFS
+ SmallVector<IntrinsicInst *, 8> Queue;
+ Queue.push_back(II);
+ while (!Queue.empty()) {
+ IntrinsicInst *Current = Queue.front();
+ Queue.erase(Queue.begin());
+
+ for (unsigned I = 0; I < 2; ++I) {
+ Value *Op = Current->getOperand(I);
+ if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
+ if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
+ Queue.push_back(OpII);
+ continue;
+ }
+
+ ++Factor;
+ Operands.push_back(Op);
+ }
+ }
+
+ // Currently we only recognize power-of-two factors.
+ // FIXME: should we assert here instead?
+ if (Factor > 1 && isPowerOf2_32(Factor)) {
+ interleaveLeafValues(Operands);
+ return Factor;
+ }
+ return 0;
+}
+
+/// Check the interleaved mask
+///
+/// - if a value within the optional is non-nullptr, the value corresponds to
+/// deinterleaved mask
+/// - if a value within the option is nullptr, the value corresponds to all-true
+/// mask
+/// - return nullopt if mask cannot be deinterleaved
+static std::optional<Value *> getMask(Value *WideMask, unsigned Factor) {
+ using namespace llvm::PatternMatch;
+ if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
+ SmallVector<Value *, 8> Operands;
+ if (unsigned MaskFactor = getVectorInterleaveFactor(IMI, Operands)) {
+ assert(!Operands.empty());
+ if (MaskFactor == Factor &&
+ std::equal(Operands.begin(), Operands.end(), Operands.begin()))
+ return Operands.front();
+ }
+ }
+ if (match(WideMask, m_AllOnes()))
+ return nullptr;
+ return std::nullopt;
+}
+
+static unsigned getVectorDeInterleaveFactor(IntrinsicInst *II,
+ SmallVectorImpl<Value *> &Results) {
+ using namespace PatternMatch;
+ if (II->getIntrinsicID() != Intrinsic::vector_deinterleave2 ||
+ !II->hasNUses(2))
+ return 0;
+
+ unsigned Factor = 0;
+
+ // Visit with BFS
+ SmallVector<IntrinsicInst *, 8> Queue;
+ Queue.push_back(II);
+ while (!Queue.empty()) {
+ IntrinsicInst *Current = Queue.front();
+ Queue.erase(Queue.begin());
+ assert(Current->hasNUses(2));
+
+ unsigned VisitedIdx = 0;
+ for (User *Usr : Current->users()) {
+ // We're playing safe here and matches only the expression
+ // consisting of a perfectly balanced binary tree in which all
+ // intermediate values are only used once.
+ if (!Usr->hasOneUse() || !isa<ExtractValueInst>(Usr))
+ return 0;
+
+ auto *EV = cast<ExtractValueInst>(Usr);
+ ArrayRef<unsigned> Indices = EV->getIndices();
+ if (Indices.size() != 1 || Indices[0] >= 2)
+ return 0;
+
+ // The idea is that we don't want to have two extractvalue
+ // on the same index. So we XOR (index + 1) onto VisitedIdx
+ // such that if there is any duplication, VisitedIdx will be
+ // zero.
+ VisitedIdx ^= Indices[0] + 1;
----------------
topperc wrote:
Should `Indices[0] + 1` be `1 << Indices[0]`? Seems like you're trying to treat VisitedIdx as a bit vector but then taking shortcuts because there are only 2 bits?
https://github.com/llvm/llvm-project/pull/120490
More information about the llvm-commits
mailing list