[llvm] [SLP]Initial compatibility support for shl v, 1 and add v, v (PR #181168)
Ryan Buchner via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 14:04:24 PST 2026
================
@@ -23159,9 +23289,17 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
if (areAllOperandsReplacedByCopyableData(
cast<Instruction>(U), BundleMember->getInst(), *SLP, NumOps))
continue;
- BundleMember->incDependencies();
+ unsigned Inc = 1;
+ // Increment twice, since the operand was expanded in binop.
+ for (const TreeEntry *UserTE : SLP->getTreeEntries(U)) {
+ if (UserTE->isExpandedBinOp(U))
+ ++Inc;
+ }
+ if (ExpandedOps.contains(U))
+ ++Inc;
----------------
bababuck wrote:
Not sure if "bug" or not, but for the following case. I think its possibly related to the fact that we don't track how many times an operation is expanded.
`opt -passes=slp-vectorizer -mtriple=riscv64 -mattr=+m,+v -S -slp-threshold=-5000`
```
define void @vec_add(ptr %dest, ptr %p) {
entry:
%inc0 = getelementptr inbounds i16, ptr %p, i64 1
%inc1 = getelementptr inbounds i16, ptr %p, i64 2
%inc2 = getelementptr inbounds i16, ptr %p, i64 3
%e0 = load i16, ptr %p, align 4
%e1 = load i16, ptr %inc0, align 2
%e2 = load i16, ptr %inc1, align 2
%e3 = load i16, ptr %inc2, align 2
%a0 = add i16 %e0, %e0
%a1 = shl i16 %e2, 1
%a2 = shl i16 %e2, 1
%a3 = add i16 %e3, %e3
%inc4 = getelementptr inbounds i16, ptr %dest, i64 1
%inc5 = getelementptr inbounds i16, ptr %dest, i64 2
%inc6 = getelementptr inbounds i16, ptr %dest, i64 3
store i16 %a0, ptr %dest, align 4
store i16 %a1, ptr %inc4, align 2
store i16 %a2, ptr %inc5, align 2
store i16 %a3, ptr %inc6, align 2
ret void
}
```
lowers to
```
define void @vec_add(ptr %dest, ptr %p) #0 {
entry:
%inc0 = getelementptr inbounds i16, ptr %p, i64 1
%inc2 = getelementptr inbounds i16, ptr %p, i64 3
%e1 = load i16, ptr %inc0, align 2
%e3 = load i16, ptr %inc2, align 2
%0 = call <3 x i16> @llvm.masked.load.v3i16.p0(ptr align 4 %p, <3 x i1> <i1 true, i1 false, i1 true>, <3 x i16> poison)
%1 = shufflevector <3 x i16> %0, <3 x i16> poison, <2 x i32> <i32 0, i32 2>
%2 = shufflevector <3 x i16> %0, <3 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 poison>
%3 = insertelement <4 x i16> %2, i16 %e3, i32 3
%4 = add <4 x i16> %3, %3
store <4 x i16> %4, ptr %dest, align 4
ret void
}
```
Note the extra shufflevector `%1` and the missing deletion of `%e1`, both of which are unused.
https://github.com/llvm/llvm-project/pull/181168
More information about the llvm-commits
mailing list