[llvm] [SLP]Initial compatibility support for shl v, 1 and add v, v (PR #181168)

Tue Mar 3 12:51:44 PST 2026

================
@@ -1004,8 +1004,12 @@ class BinOpSameOpcodeHelper {
     /// preventing us from determining which instruction it should convert to.
     bool trySet(MaskType OpcodeInMaskForm, MaskType InterchangeableMask) {
       if (Mask & InterchangeableMask) {
-        SeenBefore |= OpcodeInMaskForm;
-        Mask &= InterchangeableMask;
+        MaskType TempSeenBefore = SeenBefore | OpcodeInMaskForm;
+        MaskType TempMask = Mask & InterchangeableMask;
+        if (!(TempMask & TempSeenBefore))
+          return false;
+        Mask = TempMask;
+        SeenBefore = TempSeenBefore;
----------------
bababuck wrote:

For a case such as the following, we fail to optimally vectorize because the `sub` and `shl` aren't compatible per this algorithm since at the time of the `shl`, we haven't seen an `add` instruction yet which is the only overlap between `sub` and `shl`. Don't think it needs addressed in this MR. On this note, why do we require this?
```
We cannot create an interchangeable instruction that does not exist in VL
```
```
define void @vec_add(ptr %dest, ptr %p) {
entry:
  %inc0 = getelementptr inbounds i16, ptr %p, i64 1
  %inc1 = getelementptr inbounds i16, ptr %p, i64 2
  %inc2 = getelementptr inbounds i16, ptr %p, i64 3
  %e0 = load i16, ptr %p, align 4
  %e1 = load i16, ptr %inc0, align 2
  %e2 = load i16, ptr %inc1, align 2
  %e3 = load i16, ptr %inc2, align 2

  %a0 = sub i16 %e0, 1
  %a1 = shl i16 %e1, 1
  %a2 = add i16 %e2, 1
  %a3 = add i16 %e3, 1

  %inc4 = getelementptr inbounds i16, ptr %dest, i64 1
  %inc5 = getelementptr inbounds i16, ptr %dest, i64 2
  %inc6 = getelementptr inbounds i16, ptr %dest, i64 3

  store i16 %a0, ptr %dest, align 4
  store i16 %a1, ptr %inc4, align 2
  store i16 %a2, ptr %inc5, align 2
  store i16 %a3, ptr %inc6, align 2
  ret void
}
```
lowers to
```
%tmp0 = load <4 x i16>, ptr %p, align 4
%tmp3 = add <4 x i16> %tmp0, <i16 -1, i16 1, i16 1, i16 1>
%tmp2 = shl <4 x i16> %tmp0, <i16 -1, i16 1, i16 1, i16 1>
%tmp1 = shufflevector <4 x i16> %tmp3, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
store <4 x i16> %tmp1, ptr %dest, align 4
ret void
```

https://github.com/llvm/llvm-project/pull/181168