[llvm] [LAA] Move scalable vector check into `getStrideFromAddRec()` (PR #154013)

Mon Aug 18 06:20:25 PDT 2025

https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/154013

>From 7126bc1f3ab73e74d38da7b020866136d9461a17 Mon Sep 17 00:00:00 2001
From: MacDue <macdue at dueutil.tech>
Date: Sun, 17 Aug 2025 12:41:22 +0100
Subject: [PATCH 1/2] [LAA] Move scalable vector check into
 `getStrideFromAddRec()`

---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 11 ++++---
 .../Analysis/LoopAccessAnalysis/pr153797.ll   | 32 +++++++++++++++++++
 2 files changed, 38 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 62baf9b632bc7..bceddd0325276 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -936,6 +936,12 @@ class AccessAnalysis {
 static std::optional<int64_t>
 getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
                     Value *Ptr, PredicatedScalarEvolution &PSE) {
+  if (isa<ScalableVectorType>(AccessTy)) {
+    LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
+                      << "\n");
+    return std::nullopt;
+  }
+
   // The access function must stride over the innermost loop.
   if (Lp != AR->getLoop()) {
     LLVM_DEBUG({
@@ -1590,11 +1596,6 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
     return 0;
 
   assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
-  if (isa<ScalableVectorType>(AccessTy)) {
-    LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
-                      << "\n");
-    return std::nullopt;
-  }
 
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
   if (Assume && !AR)
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll b/llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll
new file mode 100644
index 0000000000000..4f22837ba3f52
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll
@@ -0,0 +1,32 @@
+; RUN: opt -mtriple=aarch64-none-elf -mattr=+sve2 -O2 -disable-output
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64--linux-gnueabihf"
+
+; This verifies LAA does not attempt to get a fixed element count on a scalable vector.
+; From issue: https://github.com/llvm/llvm-project/issues/153797
+
+define i32 @gradient_fast_par_for_gradient_fast_s0_x_v18_v22(ptr %gradient_fast, i64 %0, ptr %1) {
+entry:
+  br label %"2_for_gradient_fast.s0.x.v20.v23"
+
+"2_for_gradient_fast.s0.x.v20.v23":               ; preds = %"2_for_gradient_fast.s0.x.v20.v23", %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"2_for_gradient_fast.s0.x.v20.v23" ]
+  %2 = shl i64 %indvars.iv, 1
+  %3 = add i64 %2, %0
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %4, i64 0
+  %6 = getelementptr i32, ptr %gradient_fast, i64 %3
+  store <vscale x 4 x i32> %5, ptr %6, align 4
+  %.reass3 = or i32 %4, 1
+  %7 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %.reass3, i64 0
+  %8 = shufflevector <vscale x 4 x i32> %7, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %9 = getelementptr i32, ptr %1, i64 %3
+  store <vscale x 4 x i32> %8, ptr %9, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %.not = icmp eq i64 %indvars.iv, 16
+  br i1 %.not, label %"2_end_for_gradient_fast.s0.x.v20.v23", label %"2_for_gradient_fast.s0.x.v20.v23"
+
+"2_end_for_gradient_fast.s0.x.v20.v23":           ; preds = %"2_for_gradient_fast.s0.x.v20.v23"
+  ret i32 0
+}

>From 749ca48882472f8b497ba58ab045faac80a6ff11 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 18 Aug 2025 13:18:34 +0000
Subject: [PATCH 2/2] Fixups

---
 .../Analysis/LoopAccessAnalysis/pr153797.ll   | 32 -------------------
 .../scalable-vector-regression-tests.ll       | 26 +++++++++++++++
 2 files changed, 26 insertions(+), 32 deletions(-)
 delete mode 100644 llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll b/llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll
deleted file mode 100644
index 4f22837ba3f52..0000000000000
--- a/llvm/test/Analysis/LoopAccessAnalysis/pr153797.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: opt -mtriple=aarch64-none-elf -mattr=+sve2 -O2 -disable-output
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-target triple = "aarch64--linux-gnueabihf"
-
-; This verifies LAA does not attempt to get a fixed element count on a scalable vector.
-; From issue: https://github.com/llvm/llvm-project/issues/153797
-
-define i32 @gradient_fast_par_for_gradient_fast_s0_x_v18_v22(ptr %gradient_fast, i64 %0, ptr %1) {
-entry:
-  br label %"2_for_gradient_fast.s0.x.v20.v23"
-
-"2_for_gradient_fast.s0.x.v20.v23":               ; preds = %"2_for_gradient_fast.s0.x.v20.v23", %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"2_for_gradient_fast.s0.x.v20.v23" ]
-  %2 = shl i64 %indvars.iv, 1
-  %3 = add i64 %2, %0
-  %4 = trunc i64 %indvars.iv to i32
-  %5 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %4, i64 0
-  %6 = getelementptr i32, ptr %gradient_fast, i64 %3
-  store <vscale x 4 x i32> %5, ptr %6, align 4
-  %.reass3 = or i32 %4, 1
-  %7 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %.reass3, i64 0
-  %8 = shufflevector <vscale x 4 x i32> %7, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
-  %9 = getelementptr i32, ptr %1, i64 %3
-  store <vscale x 4 x i32> %8, ptr %9, align 4
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %.not = icmp eq i64 %indvars.iv, 16
-  br i1 %.not, label %"2_end_for_gradient_fast.s0.x.v20.v23", label %"2_for_gradient_fast.s0.x.v20.v23"
-
-"2_end_for_gradient_fast.s0.x.v20.v23":           ; preds = %"2_for_gradient_fast.s0.x.v20.v23"
-  ret i32 0
-}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
index ffa5b3c868ab0..1d2131b3b5441 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
@@ -61,3 +61,29 @@ vector.body:
 end:
   ret void
 }
+
+; CHECK-LABEL: 'regression_test_is_no_wrap_access_scalable_typesize'
+; CHECK: LAA: Found an analyzable loop: loop
+; CHECK: LAA: Bad stride - Scalable object: <vscale x 4 x i32>
+define void @regression_test_is_no_wrap_access_scalable_typesize(ptr %ptr_a, i64 %n, ptr %ptr_b) {
+entry:
+  br label %loop
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+  %2 = shl i64 %indvars.iv, 1
+  %3 = add i64 %2, %n
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %4, i64 0
+  %6 = getelementptr i32, ptr %ptr_a, i64 %3
+  store <vscale x 4 x i32> %5, ptr %6, align 4
+  %.reass3 = or i32 %4, 1
+  %7 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %.reass3, i64 0
+  %8 = shufflevector <vscale x 4 x i32> %7, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %9 = getelementptr i32, ptr %ptr_b, i64 %3
+  store <vscale x 4 x i32> %8, ptr %9, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %.not = icmp eq i64 %indvars.iv, 16
+  br i1 %.not, label %end, label %loop
+end:
+  ret void
+}