[llvm] [SLP] Do not skip tiny trees with gathered loads to vectorize (PR #190040)

Thu Apr 2 07:12:21 PDT 2026

asb wrote:

@alexey-bataev here's a reproducer:

```llvm
; ModuleID = '/tmp/harris-reduced-snapshot.bc'
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define void @_Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_(ptr %Iyy, ptr %Syy, i32 %add226, i32 %_i1214.0, ptr %arrayidx221, i64 %idxprom222, ptr %arrayidx223, float %0, i64 %idxprom233, ptr %arrayidx254, float %1, float %2, ptr %arrayidx264, i64 %idxprom227, ptr %arrayidx267, float %3, ptr %arrayidx274, float %4) {
entry:
  br label %for.cond215

for.cond215:                                      ; preds = %for.cond215, %entry
  %_i1214.01 = phi i32 [ 0, %entry ], [ %add226, %for.cond215 ]
  %arrayidx2212 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 1
  %idxprom2223 = sext i32 %_i1214.0 to i64
  %arrayidx2234 = getelementptr inbounds [2050 x float], ptr %arrayidx221, i64 0, i64 %idxprom222
  %5 = load float, ptr %Iyy, align 4
  %add2265 = add nsw i32 %add226, 1
  %idxprom2276 = sext i32 %add2265 to i64
  %arrayidx228 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 0, i64 %idxprom2276
  %6 = load float, ptr %arrayidx228, align 4
  %add229 = fadd fast float %0, %6
  %add232 = add nsw i32 %add226, 2
  %idxprom2337 = sext i32 %add232 to i64
  %arrayidx234 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 0, i64 %idxprom2337
  %7 = load float, ptr %arrayidx234, align 4
  %add235 = fadd fast float %add229, %7
  %arrayidx240 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 0, i64 %idxprom2223
  %8 = load float, ptr %arrayidx240, align 4
  %add241 = fadd fast float %add235, %8
  %arrayidx247 = getelementptr inbounds [2050 x float], ptr %arrayidx2212, i64 0, i64 %idxprom2276
  %9 = load float, ptr %arrayidx247, align 4
  %add248 = fadd fast float %add241, %9
  %arrayidx2548 = getelementptr inbounds [2050 x float], ptr %arrayidx221, i64 0, i64 %idxprom233
  %10 = load float, ptr %Iyy, align 4
  %add255 = fadd fast float %0, %0
  %add261 = fadd fast float %0, %0
  %arrayidx2649 = getelementptr inbounds [2050 x float], ptr %Iyy, i64 2
  %arrayidx26710 = getelementptr inbounds [2050 x float], ptr %arrayidx264, i64 0, i64 %idxprom227
  %11 = load float, ptr %Iyy, align 4
  %add268 = fadd fast float %0, %0
  %arrayidx27411 = getelementptr inbounds [2050 x float], ptr %arrayidx264, i64 0, i64 %idxprom233
  %12 = load float, ptr %Iyy, align 4
  %add275 = fadd fast float %0, %0
  store float %add248, ptr %Iyy, align 4
  br label %for.cond215
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(ptr captures(none)) #0

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(ptr captures(none)) #0

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
```

Then `clang --target=riscv64-linux-gnu -march=rva23u64 -O3 -c repro.ll` will run forever.

https://github.com/llvm/llvm-project/pull/190040