[llvm] [SLP] Do not skip tiny trees with gathered loads to vectorize (PR #190040)
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 07:12:21 PDT 2026
asb wrote:
@alexey-bataev here's a reproducer:
```llvm
; ModuleID = '/tmp/harris-reduced-snapshot.bc'
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"
define void @_Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_(ptr %Iyy, ptr %Syy, i32 %add226, i32 %_i1214.0, ptr %arrayidx221, i64 %idxprom222, ptr %arrayidx223, float %0, i64 %idxprom233, ptr %arrayidx254, float %1, float %2, ptr %arrayidx264, i64 %idxprom227, ptr %arrayidx267, float %3, ptr %arrayidx274, float %4) {
entry:
br label %for.cond215
for.cond215: ; preds = %for.cond215, %entry
%_i1214.01 = phi i32 [ 0, %entry ], [ %add226, %for.cond215 ]
%arrayidx2212 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 1
%idxprom2223 = sext i32 %_i1214.0 to i64
%arrayidx2234 = getelementptr inbounds [2050 x float], ptr %arrayidx221, i64 0, i64 %idxprom222
%5 = load float, ptr %Iyy, align 4
%add2265 = add nsw i32 %add226, 1
%idxprom2276 = sext i32 %add2265 to i64
%arrayidx228 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 0, i64 %idxprom2276
%6 = load float, ptr %arrayidx228, align 4
%add229 = fadd fast float %0, %6
%add232 = add nsw i32 %add226, 2
%idxprom2337 = sext i32 %add232 to i64
%arrayidx234 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 0, i64 %idxprom2337
%7 = load float, ptr %arrayidx234, align 4
%add235 = fadd fast float %add229, %7
%arrayidx240 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 0, i64 %idxprom2223
%8 = load float, ptr %arrayidx240, align 4
%add241 = fadd fast float %add235, %8
%arrayidx247 = getelementptr inbounds [2050 x float], ptr %arrayidx2212, i64 0, i64 %idxprom2276
%9 = load float, ptr %arrayidx247, align 4
%add248 = fadd fast float %add241, %9
%arrayidx2548 = getelementptr inbounds [2050 x float], ptr %arrayidx221, i64 0, i64 %idxprom233
%10 = load float, ptr %Iyy, align 4
%add255 = fadd fast float %0, %0
%add261 = fadd fast float %0, %0
%arrayidx2649 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 2
%arrayidx26710 = getelementptr inbounds [2050 x float], ptr %arrayidx264, i64 0, i64 %idxprom227
%11 = load float, ptr %Iyy, align 4
%add268 = fadd fast float %0, %0
%arrayidx27411 = getelementptr inbounds [2050 x float], ptr %arrayidx264, i64 0, i64 %idxprom233
%12 = load float, ptr %Iyy, align 4
%add275 = fadd fast float %0, %0
store float %add248, ptr %Iyy, align 4
br label %for.cond215
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(ptr captures(none)) #0
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(ptr captures(none)) #0
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
```
Then `clang --target=riscv64-linux-gnu -march=rva23u64 -O3 -c repro.ll` will run forever.
https://github.com/llvm/llvm-project/pull/190040
More information about the llvm-commits
mailing list