[llvm] [SLP][AMDGPU] Vectorize operands of non-trivially-vectorizable intrinsic calls (PR #189784)

Sun Apr 5 19:01:50 PDT 2026

================
@@ -243,6 +243,36 @@ static const int MinScheduleRegionSize = 16;
 /// Maximum allowed number of operands in the PHI nodes.
 static const unsigned MaxPHINumOperands = 128;
 
+/// For instructions that are not trivially vectorizable, try to vectorize their
+/// operands.
+/// FIXME: Extend for all non-vectorized functions.
+SmallVector<Value *, 4>
+getNonTriviallyVectorizableIntrinsicCallOperand(Value *V) {
+
+  SmallVector<Value *, 4> Operands;
+  auto *CI = dyn_cast<CallInst>(V);
+
+  if (!CI || isAssumeLikeIntrinsic(CI))
+    return {};
+  Intrinsic::ID ID = CI->getIntrinsicID();
+  // Only consider intrinsic calls.
+  // FIXME: We may want to relax this condition in future.
+  if (ID == Intrinsic::not_intrinsic || isTriviallyVectorizable(ID))
+    return {};
+
+  // Skip memory intrinsics (e.g., masked.load, masked.gather etc.)
+  if (CI->mayReadOrWriteMemory())
+    return {};
+
+  for (Value *ArgOp : CI->args()) {
+    if (auto *I = dyn_cast<Instruction>(ArgOp)) {
+      Operands.emplace_back(I);
+    }
----------------
mssefat wrote:

Added a FIXME. 

https://github.com/llvm/llvm-project/pull/189784