[llvm] [SLP][AMDGPU] Vectorize operands of non-trivially-vectorizable intrinsic calls (PR #189784)

Fri Apr 3 13:39:42 PDT 2026

================
@@ -29477,6 +29506,31 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       PostProcessCmps.insert(cast<CmpInst>(&*It));
   }
 
+  SmallMapVector<Intrinsic::ID, SmallSetVector<Value *, 4>, 4> IntrinsicSeedOps;
+  for (Instruction &I : *BB) {
+    if (R.isDeleted(&I))
+      continue;
+    // Collect operands of non-trivially vectorizable intrinsic calls (e.g.,
+    // llvm.amdgcn.exp2) and group by intrinsic ID, so their operands can be
+    // vectorized independently.
+    // FIXME: Extend for all non-vectorized functions.
+    SmallVector<Value *, 4> Ops =
+        getNonTriviallyVectorizableIntrinsicCallOperand(&I);
+    if (!Ops.empty())
+      IntrinsicSeedOps[cast<CallInst>(&I)->getIntrinsicID()].insert_range(Ops);
+  }
+  // Try to vectorize per intrinsic call ID.
+  for (auto &[ID, Ops] : IntrinsicSeedOps) {
+    // Sub-group by opcode so we do not get bailed early
+    SmallMapVector<unsigned, SmallVector<Value *, 4>, 4> OpcodeGroups;
+    for (Value *Op : Ops) {
+      if (auto *I = dyn_cast<Instruction>(Op))
+        OpcodeGroups[I->getOpcode()].push_back(Op);
+    }
+    for (auto &[Opc, Group] : OpcodeGroups)
+      Changed |= tryToVectorizeList(Group, R);
+  }
+
----------------
mssefat wrote:

I have merged the loops for grouping. 

https://github.com/llvm/llvm-project/pull/189784