[llvm] [SLP]Initial compatibility support for shl v, 1 and add v, v (PR #181168)

Tue Mar 3 14:04:24 PST 2026

================
@@ -23159,9 +23289,17 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
         if (areAllOperandsReplacedByCopyableData(
                 cast<Instruction>(U), BundleMember->getInst(), *SLP, NumOps))
           continue;
-        BundleMember->incDependencies();
+        unsigned Inc = 1;
+        // Increment twice, since the operand was expanded in binop.
+        for (const TreeEntry *UserTE : SLP->getTreeEntries(U)) {
+          if (UserTE->isExpandedBinOp(U))
+            ++Inc;
+        }
+        if (ExpandedOps.contains(U))
+          ++Inc;
----------------
bababuck wrote:

Not sure if "bug" or not, but for the following case. I think its possibly related to the fact that we don't track how many times an operation is expanded.
`opt -passes=slp-vectorizer -mtriple=riscv64 -mattr=+m,+v -S -slp-threshold=-5000`
```
define void @vec_add(ptr %dest, ptr %p) {
entry:
  %inc0 = getelementptr inbounds i16, ptr %p, i64 1
  %inc1 = getelementptr inbounds i16, ptr %p, i64 2
  %inc2 = getelementptr inbounds i16, ptr %p, i64 3
  %e0 = load i16, ptr %p, align 4
  %e1 = load i16, ptr %inc0, align 2
  %e2 = load i16, ptr %inc1, align 2
  %e3 = load i16, ptr %inc2, align 2

  %a0 = add i16 %e0, %e0
  %a1 = shl i16 %e2, 1
  %a2 = shl i16 %e2, 1
  %a3 = add i16 %e3, %e3

  %inc4 = getelementptr inbounds i16, ptr %dest, i64 1
  %inc5 = getelementptr inbounds i16, ptr %dest, i64 2
  %inc6 = getelementptr inbounds i16, ptr %dest, i64 3

  store i16 %a0, ptr %dest, align 4
  store i16 %a1, ptr %inc4, align 2
  store i16 %a2, ptr %inc5, align 2
  store i16 %a3, ptr %inc6, align 2
  ret void
}
```
lowers to
```
define void @vec_add(ptr %dest, ptr %p) #0 {
entry:
  %inc0 = getelementptr inbounds i16, ptr %p, i64 1
  %inc2 = getelementptr inbounds i16, ptr %p, i64 3
  %e1 = load i16, ptr %inc0, align 2
  %e3 = load i16, ptr %inc2, align 2
  %0 = call <3 x i16> @llvm.masked.load.v3i16.p0(ptr align 4 %p, <3 x i1> <i1 true, i1 false, i1 true>, <3 x i16> poison)
  %1 = shufflevector <3 x i16> %0, <3 x i16> poison, <2 x i32> <i32 0, i32 2>
  %2 = shufflevector <3 x i16> %0, <3 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 poison>
  %3 = insertelement <4 x i16> %2, i16 %e3, i32 3
  %4 = add <4 x i16> %3, %3
  store <4 x i16> %4, ptr %dest, align 4
  ret void
}
```
Note the extra shufflevector `%1` and the missing deletion of `%e1`, both of which are unused.

https://github.com/llvm/llvm-project/pull/181168