[llvm] [IA][RISCV] Support VP intrinsics in InterleavedAccessPass (PR #120490)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 8 21:26:24 PST 2025


================
@@ -248,6 +249,196 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
   return false;
 }
 
+// For an (de)interleave tree like this:
+//
+//   A   C B   D
+//   |___| |___|
+//     |_____|
+//        |
+//     A B C D
+//
+//  We will get ABCD at the end while the leaf operands/results
+//  are ACBD, which are also what we initially collected in
+//  getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
+//  hooks (e.g. lowerInterleavedScalableLoad) expect ABCD, so we need
+//  to reorder them by interleaving these values.
+static void interleaveLeafValues(SmallVectorImpl<Value *> &Leaves) {
+  unsigned Factor = Leaves.size();
+  assert(isPowerOf2_32(Factor) && Factor <= 8 && Factor > 1);
+
+  if (Factor == 2)
+    return;
+
+  SmallVector<Value *, 8> Buffer;
+  if (Factor == 4) {
+    for (unsigned SrcIdx : {0, 2, 1, 3})
+      Buffer.push_back(Leaves[SrcIdx]);
+  } else {
+    // Factor of 8.
+    //
+    //  A E C G B F D H
+    //  |_| |_| |_| |_|
+    //   |___|   |___|
+    //     |_______|
+    //         |
+    //  A B C D E F G H
+    for (unsigned SrcIdx : {0, 4, 2, 6, 1, 5, 3, 7})
+      Buffer.push_back(Leaves[SrcIdx]);
+  }
+
+  llvm::copy(Buffer, Leaves.begin());
+}
+
+static unsigned getVectorInterleaveFactor(IntrinsicInst *II,
+                                          SmallVectorImpl<Value *> &Operands) {
+  if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
+    return 0;
+
+  unsigned Factor = 0;
+
+  // Visit with BFS
+  SmallVector<IntrinsicInst *, 8> Queue;
+  Queue.push_back(II);
+  while (!Queue.empty()) {
+    IntrinsicInst *Current = Queue.front();
+    Queue.erase(Queue.begin());
+
+    for (unsigned I = 0; I < 2; ++I) {
+      Value *Op = Current->getOperand(I);
+      if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
+        if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
+          Queue.push_back(OpII);
+          continue;
+        }
+
+      // If this is not a perfectly balanced tree, the leaf
+      // result types would be different.
+      if (!Operands.empty() && Op->getType() != Operands.back()->getType())
+        return 0;
+
+      ++Factor;
+      Operands.push_back(Op);
+    }
+  }
+
+  // Currently we only recognize power-of-two factors.
+  // FIXME: should we assert here instead?
+  if (Factor <= 1 || !isPowerOf2_32(Factor))
+    return 0;
+
+  interleaveLeafValues(Operands);
+  return Factor;
+}
+
+/// Check the interleaved mask
+///
+/// - if a value within the optional is non-nullptr, the value corresponds to
+///   deinterleaved mask
+/// - if a value within the option is nullptr, the value corresponds to all-true
+///   mask
+/// - return nullopt if mask cannot be deinterleaved
+static std::optional<Value *> getMask(Value *WideMask, unsigned Factor) {
+  using namespace llvm::PatternMatch;
+  if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
+    SmallVector<Value *, 8> Operands;
+    if (unsigned MaskFactor = getVectorInterleaveFactor(IMI, Operands)) {
+      assert(!Operands.empty());
+      if (MaskFactor == Factor &&
+          std::equal(Operands.begin(), Operands.end(), Operands.begin()))
+        return Operands.front();
+    }
+  }
+  if (match(WideMask, m_AllOnes()))
+    return nullptr;
+  return std::nullopt;
+}
+
+static unsigned getVectorDeInterleaveFactor(IntrinsicInst *II,
+                                            SmallVectorImpl<Value *> &Results) {
+  using namespace PatternMatch;
+  if (II->getIntrinsicID() != Intrinsic::vector_deinterleave2 ||
+      !II->hasNUses(2))
+    return 0;
+
+  unsigned Factor = 0;
+
+  // Visit with BFS
+  SmallVector<IntrinsicInst *, 8> Queue;
+  Queue.push_back(II);
+  while (!Queue.empty()) {
+    IntrinsicInst *Current = Queue.front();
+    Queue.erase(Queue.begin());
+    assert(Current->hasNUses(2));
+
+    unsigned VisitedIdx = 0;
+    for (User *Usr : Current->users()) {
+      // We're playing safe here and matching only the expression
+      // consisting of a perfectly balanced binary tree in which all
+      // intermediate values are only used once.
+      if (!Usr->hasOneUse() || !isa<ExtractValueInst>(Usr))
+        return 0;
+
+      auto *EV = cast<ExtractValueInst>(Usr);
+      ArrayRef<unsigned> Indices = EV->getIndices();
+      if (Indices.size() != 1 || Indices[0] >= 2)
+        return 0;
+
+      // The idea is that we don't want to have two extractvalue
+      // on the same index. So we XOR (1 << index) onto VisitedIdx
+      // such that if there is any duplication, VisitedIdx will be
+      // zero.
+      VisitedIdx ^= (1 << Indices[0]);
+      if (!VisitedIdx)
+        return 0;
----------------
lukel97 wrote:

Given that there's only two users, would it be easier to identify the LHS + RHS first? Then we can be explicitly do the BFS from left to right and not have to swap

```c++
ExtractValueInst *LHS, RHS;
for (User *Usr : Current->users()) {
  if (Indices[0] == 0 && !LHS)
    LHS = Usr;
  else if (Indices[0] == 1 && !RHS)
    RHS = Usr;
  else
    return 0;
}

for (ExtractValueInst EV : {LHS, RHS}) {
  // continue the traversal
  if (match(...)) {
    Queue.push_back(EVUsr);
    continue;
  }
  // check if balanced...
  Results.push_back(EV);
}
```

https://github.com/llvm/llvm-project/pull/120490


More information about the llvm-commits mailing list