[llvm] LoopLoadElim: don't version single-iteration loops (PR #97599)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 3 09:11:48 PDT 2024


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/97599

It is unnecessary for LoopLoadElim to version single-iteration loops. Don't call LoopVersioning when the BTC is known to be 1.

Fixes #96656.

-- 8< --
Based on #97598.

>From 93c9e035c7b45d253448a6b9301c58f5cba7a57b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 3 Jul 2024 14:57:10 +0100
Subject: [PATCH 1/2] LoopLoadElim: add pre-commit test for #96656

After pr96656.ll were added to LAA and LoopVersioning, it was decided
that the bug is in a caller of LoopVersioning, not in LAA or
LoopVersioning itself. The caller has now been found to be
LoopLoadElim. Hence, re-organize the added tests to avoid confusion, and
add a new reduced-test for #96656 to LoopLoadElim, in preparation to fix
the bug.
---
 .../Analysis/LoopAccessAnalysis/pr96656.ll    | 49 -----------
 .../LoopAccessAnalysis/symbolic-stride.ll     | 48 ++++++++++
 llvm/test/Transforms/LoopLoadElim/pr96656.ll  | 87 +++++++++++++++++++
 .../{pr96656.ll => single-iteration.ll}       | 36 ++++----
 4 files changed, 155 insertions(+), 65 deletions(-)
 delete mode 100644 llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll
 create mode 100644 llvm/test/Transforms/LoopLoadElim/pr96656.ll
 rename llvm/test/Transforms/LoopVersioning/{pr96656.ll => single-iteration.ll} (75%)

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll b/llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll
deleted file mode 100644
index 5b9833553fa02..0000000000000
--- a/llvm/test/Analysis/LoopAccessAnalysis/pr96656.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
-
-define void @false.equal.predicate(ptr %arg, ptr %arg1, i1 %arg2) {
-; CHECK-LABEL: 'false.equal.predicate'
-; CHECK-NEXT:    loop.body:
-; CHECK-NEXT:      Memory dependences are safe
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-NEXT:      Equal predicate: %load == 1
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
-; CHECK-NEXT:      [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul:
-; CHECK-NEXT:        {(8 + %arg1),+,(8 * (sext i32 %load to i64))<nsw>}<%loop.body>
-; CHECK-NEXT:        --> {(8 + %arg1),+,8}<%loop.body>
-;
-entry:
-  %load = load i32, ptr %arg, align 4
-  br i1 %arg2, label %noloop.exit, label %loop.ph
-
-loop.ph:                                          ; preds = %entry
-  %sext7 = sext i32 %load to i64
-  %gep8 = getelementptr i8, ptr %arg1, i64 8
-  br label %loop.body
-
-loop.body:                                        ; preds = %loop.body, %loop.ph
-  %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ]
-  %mul = mul i64 %phi, %sext7
-  %gep10 = getelementptr double, ptr %gep8, i64 %mul
-  %load11 = load double, ptr %gep10, align 8
-  store double %load11, ptr %arg1, align 8
-  %add = add i64 %phi, 1
-  %icmp = icmp eq i64 %phi, 0
-  br i1 %icmp, label %loop.exit, label %loop.body
-
-noloop.exit:                                      ; preds = %entry
-  %sext = sext i32 %load to i64
-  %gep = getelementptr double, ptr %arg1, i64 %sext
-  %load5 = load double, ptr %gep, align 8
-  store double %load5, ptr %arg, align 8
-  ret void
-
-loop.exit:                                        ; preds = %loop.body
-  ret void
-}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 7c1b11e22aef2..0871ae84c3711 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -223,6 +223,54 @@ exit:
   ret void
 }
 
+define double @single_iteration_unknown_stride(i32 %arg, ptr %arg1, i1 %arg2) {
+; CHECK-LABEL: 'single_iteration_unknown_stride'
+; CHECK-NEXT:    loop.body:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-NEXT:      Equal predicate: %arg == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+; CHECK-NEXT:      [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul:
+; CHECK-NEXT:        {(8 + %arg1),+,(8 * (sext i32 %arg to i64))<nsw>}<%loop.body>
+; CHECK-NEXT:        --> {(8 + %arg1),+,8}<%loop.body>
+;
+entry:
+  br i1 %arg2, label %noloop.exit, label %loop.ph
+
+loop.ph:                                          ; preds = %entry
+  %sext7 = sext i32 %arg to i64
+  %gep8 = getelementptr i8, ptr %arg1, i64 8
+  br label %loop.body
+
+loop.body:                                        ; preds = %loop.body, %loop.ph
+  %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop.body ]
+  %mul = mul i64 %phi, %sext7
+  %gep10 = getelementptr double, ptr %gep8, i64 %mul
+  %load11 = load double, ptr %gep10, align 8
+  store double %load11, ptr %arg1, align 8
+  %add = add i64 %phi, 1
+  %icmp = icmp eq i64 %phi, 0
+  br i1 %icmp, label %loop.exit, label %loop.body
+
+noloop.exit:                                      ; preds = %entry
+  %sext = sext i32 %arg to i64
+  %gep = getelementptr double, ptr %arg1, i64 %sext
+  %load5 = load double, ptr %gep, align 8
+  ret double %load5
+
+loop.exit:                                        ; preds = %loop.body
+  %sext2 = sext i32 %arg to i64
+  %gep2 = getelementptr double, ptr %arg1, i64 %sext2
+  %load6 = load double, ptr %gep2, align 8
+  ret double %load6
+}
+
 ; A loop with two symbolic strides.
 define void @two_strides(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) {
 ; CHECK-LABEL: 'two_strides'
diff --git a/llvm/test/Transforms/LoopLoadElim/pr96656.ll b/llvm/test/Transforms/LoopLoadElim/pr96656.ll
new file mode 100644
index 0000000000000..c6b336c0db8d3
--- /dev/null
+++ b/llvm/test/Transforms/LoopLoadElim/pr96656.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s
+
+define void @single_iteration_versioning(ptr %arg, ptr %arg1, i1 %arg2) {
+; CHECK-LABEL: define void @single_iteration_versioning(
+; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[ARG]], align 4
+; CHECK-NEXT:    br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_LVER_CHECK:.*]]
+; CHECK:       [[LOOP_LVER_CHECK]]:
+; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[LOAD]] to i64
+; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr i8, ptr [[ARG1]], i64 8
+; CHECK-NEXT:    [[GEP9:%.*]] = getelementptr i8, ptr [[ARG1]], i64 16
+; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[LOAD]], 1
+; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label %[[LOOP_PH_LVER_ORIG:.*]], label %[[LOOP_PH:.*]]
+; CHECK:       [[LOOP_PH_LVER_ORIG]]:
+; CHECK-NEXT:    br label %[[LOOP_LVER_ORIG:.*]]
+; CHECK:       [[LOOP_LVER_ORIG]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ 0, %[[LOOP_PH_LVER_ORIG]] ], [ [[ADD:%.*]], %[[LOOP_LVER_ORIG]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[PHI]], [[SEXT7]]
+; CHECK-NEXT:    [[GEP10:%.*]] = getelementptr double, ptr [[GEP8]], i64 [[MUL]]
+; CHECK-NEXT:    [[LOAD11:%.*]] = load double, ptr [[GEP10]], align 8
+; CHECK-NEXT:    [[GEP13_LVER_ORIG:%.*]] = getelementptr double, ptr [[GEP9]], i64 [[MUL]]
+; CHECK-NEXT:    store double [[LOAD11]], ptr [[GEP13_LVER_ORIG]], align 8
+; CHECK-NEXT:    [[ADD]] = add i64 [[PHI]], 1
+; CHECK-NEXT:    [[ICMP_LVER_ORIG:%.*]] = icmp eq i64 [[PHI]], 1
+; CHECK-NEXT:    br i1 [[ICMP_LVER_ORIG]], label %[[EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[LOOP_LVER_ORIG]]
+; CHECK:       [[LOOP_PH]]:
+; CHECK-NEXT:    [[LOAD_INITIAL:%.*]] = load double, ptr [[GEP8]], align 8
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[STORE_FORWARDED:%.*]] = phi double [ [[LOAD_INITIAL]], %[[LOOP_PH]] ], [ [[STORE_FORWARDED]], %[[LOOP]] ]
+; CHECK-NEXT:    [[PHI1:%.*]] = phi i64 [ 0, %[[LOOP_PH]] ], [ [[ADD1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i64 [[PHI1]], [[SEXT7]]
+; CHECK-NEXT:    [[GEP11:%.*]] = getelementptr double, ptr [[GEP8]], i64 [[MUL1]]
+; CHECK-NEXT:    [[LOAD12:%.*]] = load double, ptr [[GEP11]], align 8
+; CHECK-NEXT:    [[GEP13:%.*]] = getelementptr double, ptr [[GEP9]], i64 [[MUL1]]
+; CHECK-NEXT:    store double [[STORE_FORWARDED]], ptr [[GEP13]], align 8
+; CHECK-NEXT:    [[ADD1]] = add i64 [[PHI1]], 1
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i64 [[PHI1]], 1
+; CHECK-NEXT:    br i1 [[ICMP]], label %[[EXIT_LOOPEXIT_LOOPEXIT1:.*]], label %[[LOOP]]
+; CHECK:       [[NOLOOP_EXIT]]:
+; CHECK-NEXT:    [[SEXT2:%.*]] = sext i32 [[LOAD]] to i64
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT2]]
+; CHECK-NEXT:    [[LOAD6:%.*]] = load double, ptr [[GEP2]], align 8
+; CHECK-NEXT:    store double [[LOAD6]], ptr [[ARG]], align 8
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT_LOOPEXIT_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[EXIT_LOOPEXIT:.*]]
+; CHECK:       [[EXIT_LOOPEXIT_LOOPEXIT1]]:
+; CHECK-NEXT:    br label %[[EXIT_LOOPEXIT]]
+; CHECK:       [[EXIT_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %load = load i32, ptr %arg, align 4
+  br i1 %arg2, label %noloop.exit, label %loop.ph
+
+loop.ph:                                              ; preds = %entry
+  %sext7 = sext i32 %load to i64
+  %gep8 = getelementptr i8, ptr %arg1, i64 8
+  %gep9 = getelementptr i8, ptr %arg1, i64 16
+  br label %loop
+
+loop:                                                 ; preds = %loop, %loop.ph
+  %phi = phi i64 [ 0, %loop.ph ], [ %add, %loop ]
+  %mul = mul i64 %phi, %sext7
+  %gep11 = getelementptr double, ptr %gep8, i64 %mul
+  %load12 = load double, ptr %gep11, align 8
+  %gep13 = getelementptr double, ptr %gep9, i64 %mul
+  store double %load12, ptr %gep13, align 8
+  %add = add i64 %phi, 1
+  %icmp = icmp eq i64 %phi, 1
+  br i1 %icmp, label %exit, label %loop
+
+noloop.exit:                                          ; preds = %loop.ph
+  %sext = sext i32 %load to i64
+  %gep = getelementptr double, ptr %arg1, i64 %sext
+  %load5 = load double, ptr %gep, align 8
+  store double %load5, ptr %arg, align 8
+  br label %exit
+
+exit:                                                 ; preds = %loop.body
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVersioning/pr96656.ll b/llvm/test/Transforms/LoopVersioning/single-iteration.ll
similarity index 75%
rename from llvm/test/Transforms/LoopVersioning/pr96656.ll
rename to llvm/test/Transforms/LoopVersioning/single-iteration.ll
index 0264fe40a9430..7be34f0678585 100644
--- a/llvm/test/Transforms/LoopVersioning/pr96656.ll
+++ b/llvm/test/Transforms/LoopVersioning/single-iteration.ll
@@ -1,16 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -passes=loop-versioning -S %s | FileCheck %s
 
-define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) {
-; CHECK-LABEL: define void @lver.check.unnecessary(
-; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) {
+; Callers should not call LoopVersioning on single-iteration loops, but LoopVersioning faithfully versions the loop when the stride is unknown and there is just a single iteration.
+
+define double @single_iteration_unknown_stride(i32 %arg, ptr %arg1, i1 %arg2) {
+; CHECK-LABEL: define double @single_iteration_unknown_stride(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[ARG]], align 4
 ; CHECK-NEXT:    br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_BODY_LVER_CHECK:.*]]
 ; CHECK:       [[LOOP_BODY_LVER_CHECK]]:
-; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[LOAD]] to i64
+; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[ARG]] to i64
 ; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr i8, ptr [[ARG1]], i64 8
-; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[LOAD]], 1
+; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[ARG]], 1
 ; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label %[[LOOP_BODY_PH_LVER_ORIG:.*]], label %[[LOOP_BODY_PH:.*]]
 ; CHECK:       [[LOOP_BODY_PH_LVER_ORIG]]:
 ; CHECK-NEXT:    br label %[[LOOP_BODY_LVER_ORIG:.*]]
@@ -35,24 +36,25 @@ define void @lver.check.unnecessary(ptr %arg, ptr %arg1, i1 %arg2) {
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i64 [[PHI]], 0
 ; CHECK-NEXT:    br i1 [[ICMP]], label %[[LOOP_EXIT_LOOPEXIT1:.*]], label %[[LOOP_BODY]]
 ; CHECK:       [[NOLOOP_EXIT]]:
-; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[LOAD]] to i64
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[ARG]] to i64
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT]]
 ; CHECK-NEXT:    [[LOAD5:%.*]] = load double, ptr [[GEP]], align 8
-; CHECK-NEXT:    store double [[LOAD5]], ptr [[ARG]], align 8
-; CHECK-NEXT:    ret void
+; CHECK-NEXT:    ret double [[LOAD5]]
 ; CHECK:       [[LOOP_EXIT_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[LOOP_EXIT:.*]]
 ; CHECK:       [[LOOP_EXIT_LOOPEXIT1]]:
 ; CHECK-NEXT:    br label %[[LOOP_EXIT]]
 ; CHECK:       [[LOOP_EXIT]]:
-; CHECK-NEXT:    ret void
+; CHECK-NEXT:    [[SEXT2:%.*]] = sext i32 [[ARG]] to i64
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT2]]
+; CHECK-NEXT:    [[LOAD6:%.*]] = load double, ptr [[GEP2]], align 8
+; CHECK-NEXT:    ret double [[LOAD6]]
 ;
 entry:
-  %load = load i32, ptr %arg, align 4
   br i1 %arg2, label %noloop.exit, label %loop.ph
 
 loop.ph:                                          ; preds = %entry
-  %sext7 = sext i32 %load to i64
+  %sext7 = sext i32 %arg to i64
   %gep8 = getelementptr i8, ptr %arg1, i64 8
   br label %loop.body
 
@@ -67,12 +69,14 @@ loop.body:                                        ; preds = %loop.body, %loop.ph
   br i1 %icmp, label %loop.exit, label %loop.body
 
 noloop.exit:                                      ; preds = %entry
-  %sext = sext i32 %load to i64
+  %sext = sext i32 %arg to i64
   %gep = getelementptr double, ptr %arg1, i64 %sext
   %load5 = load double, ptr %gep, align 8
-  store double %load5, ptr %arg, align 8
-  ret void
+  ret double %load5
 
 loop.exit:                                        ; preds = %loop.body
-  ret void
+  %sext2 = sext i32 %arg to i64
+  %gep2 = getelementptr double, ptr %arg1, i64 %sext2
+  %load6 = load double, ptr %gep2, align 8
+  ret double %load6
 }

>From 604c223cc1b46d39d5d342e7296b7ccd4681ed76 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 3 Jul 2024 15:45:19 +0100
Subject: [PATCH 2/2] LoopLoadElim: don't version single-iteration loops

It is unnecessary for LoopLoadElim to version single-iteration loops.
Don't call LoopVersioning when the BTC is known to be 1.

Fixes #96656.
---
 .../Transforms/Scalar/LoopLoadElimination.cpp |  9 ++++---
 .../invalidate-laa-after-versioning.ll        | 25 ++++++++++---------
 llvm/test/Transforms/LoopLoadElim/pr96656.ll  | 25 +++----------------
 3 files changed, 21 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 489f12e689d31..058a749b9b703 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -598,10 +598,11 @@ class LoadEliminationForLoop {
       }
 
       // Point of no-return, start the transformation.  First, version the loop
-      // if necessary.
-
-      LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
-      LV.versionLoop();
+      // if it's not a single-iteration loop.
+      if (!PSE.getBackedgeTakenCount()->isOne()) {
+        LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
+        LV.versionLoop();
+      }
 
       // After versioning, some of the candidates' pointers could stop being
       // SCEVAddRecs. We need to filter them out.
diff --git a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll
index 10e10653a431d..4d8a58feae750 100644
--- a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll
+++ b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll
@@ -12,7 +12,7 @@ define void @test(ptr %arg, i64 %arg1) {
 ; CHECK-NEXT:    br label [[INNER_1_LVER_CHECK:%.*]]
 ; CHECK:       inner.1.lver.check:
 ; CHECK-NEXT:    [[PTR_PHI:%.*]] = phi ptr [ [[ARG:%.*]], [[BB:%.*]] ], [ @glob.1, [[OUTER_LATCH:%.*]] ]
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 3
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 3
 ; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[ARG1:%.*]], 1
 ; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label [[INNER_1_PH_LVER_ORIG:%.*]], label [[INNER_1_PH:%.*]]
 ; CHECK:       inner.1.ph.lver.orig:
@@ -28,7 +28,7 @@ define void @test(ptr %arg, i64 %arg1) {
 ; CHECK-NEXT:    [[TMP29_LVER_ORIG:%.*]] = load double, ptr [[GEP_4_LVER_ORIG]], align 8
 ; CHECK-NEXT:    [[PTR_IV_1_NEXT_LVER_ORIG]] = getelementptr inbounds double, ptr [[PTR_IV_1_LVER_ORIG]], i64 1
 ; CHECK-NEXT:    [[IV_NEXT_LVER_ORIG]] = add nuw nsw i64 [[IV_1_LVER_ORIG]], 1
-; CHECK-NEXT:    [[C_1_LVER_ORIG:%.*]] = icmp eq i64 [[IV_1_LVER_ORIG]], 1
+; CHECK-NEXT:    [[C_1_LVER_ORIG:%.*]] = icmp eq i64 [[IV_1_LVER_ORIG]], 2
 ; CHECK-NEXT:    br i1 [[C_1_LVER_ORIG]], label [[INNER_1_EXIT_LOOPEXIT:%.*]], label [[INNER_1_LVER_ORIG]]
 ; CHECK:       inner.1.ph:
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR_PHI]], i64 16
@@ -46,7 +46,7 @@ define void @test(ptr %arg, i64 %arg1) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = load double, ptr [[GEP_4]], align 8
 ; CHECK-NEXT:    [[PTR_IV_1_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_1]], i64 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV_1]], 1
-; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i64 [[IV_1]], 1
+; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i64 [[IV_1]], 2
 ; CHECK-NEXT:    br i1 [[C_1]], label [[INNER_1_EXIT_LOOPEXIT1:%.*]], label [[INNER_1]]
 ; CHECK:       inner.1.exit.loopexit:
 ; CHECK-NEXT:    [[LCSSA_PTR_IV_1_PH:%.*]] = phi ptr [ [[PTR_IV_1_LVER_ORIG]], [[INNER_1_LVER_ORIG]] ]
@@ -72,7 +72,8 @@ define void @test(ptr %arg, i64 %arg1) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[INDVAR_LCSSA]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], 24
 ; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]]
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[GEP_1]]
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[PTR_PHI]], i64 32
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[SCEVGEP4]]
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP3]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[INNER_3_PH_LVER_ORIG:%.*]], label [[INNER_3_PH:%.*]]
@@ -86,13 +87,13 @@ define void @test(ptr %arg, i64 %arg1) {
 ; CHECK-NEXT:    [[GEP_9_LVER_ORIG:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[IV_2_LVER_ORIG]]
 ; CHECK-NEXT:    [[TMP18_LVER_ORIG:%.*]] = load double, ptr [[GEP_9_LVER_ORIG]], align 8
 ; CHECK-NEXT:    [[IV_2_NEXT_LVER_ORIG]] = add nuw nsw i64 [[IV_2_LVER_ORIG]], 1
-; CHECK-NEXT:    [[C_2_LVER_ORIG:%.*]] = icmp eq i64 [[IV_2_LVER_ORIG]], 1
+; CHECK-NEXT:    [[C_2_LVER_ORIG:%.*]] = icmp eq i64 [[IV_2_LVER_ORIG]], 2
 ; CHECK-NEXT:    br i1 [[C_2_LVER_ORIG]], label [[OUTER_LATCH_LOOPEXIT:%.*]], label [[INNER_3_LVER_ORIG]]
 ; CHECK:       inner.3.ph:
-; CHECK-NEXT:    [[LOAD_INITIAL5:%.*]] = load double, ptr [[PTR_PHI]], align 8
+; CHECK-NEXT:    [[LOAD_INITIAL6:%.*]] = load double, ptr [[PTR_PHI]], align 8
 ; CHECK-NEXT:    br label [[INNER_3:%.*]]
 ; CHECK:       inner.3:
-; CHECK-NEXT:    [[STORE_FORWARDED6:%.*]] = phi double [ [[LOAD_INITIAL5]], [[INNER_3_PH]] ], [ 0.000000e+00, [[INNER_3]] ]
+; CHECK-NEXT:    [[STORE_FORWARDED7:%.*]] = phi double [ [[LOAD_INITIAL6]], [[INNER_3_PH]] ], [ 0.000000e+00, [[INNER_3]] ]
 ; CHECK-NEXT:    [[IV_2:%.*]] = phi i64 [ 0, [[INNER_3_PH]] ], [ [[IV_2_NEXT:%.*]], [[INNER_3]] ]
 ; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr inbounds double, ptr [[GEP_6]], i64 [[IV_2]]
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_7]], align 8
@@ -100,11 +101,11 @@ define void @test(ptr %arg, i64 %arg1) {
 ; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[IV_2]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = load double, ptr [[GEP_9]], align 8
 ; CHECK-NEXT:    [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 1
-; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i64 [[IV_2]], 1
-; CHECK-NEXT:    br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT4:%.*]], label [[INNER_3]]
+; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i64 [[IV_2]], 2
+; CHECK-NEXT:    br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT5:%.*]], label [[INNER_3]]
 ; CHECK:       outer.latch.loopexit:
 ; CHECK-NEXT:    br label [[OUTER_LATCH]]
-; CHECK:       outer.latch.loopexit4:
+; CHECK:       outer.latch.loopexit5:
 ; CHECK-NEXT:    br label [[OUTER_LATCH]]
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    br label [[INNER_1_LVER_CHECK]]
@@ -128,7 +129,7 @@ inner.1:
   %tmp29 = load double, ptr %gep.4, align 8
   %ptr.iv.1.next = getelementptr inbounds double, ptr %ptr.iv.1, i64 1
   %iv.next = add nuw nsw i64 %iv.1, 1
-  %c.1 = icmp eq i64 %iv.1, 1
+  %c.1 = icmp eq i64 %iv.1, 2
   br i1 %c.1, label %inner.1.exit, label %inner.1
 
 inner.1.exit:                                              ; preds = %bb22
@@ -155,7 +156,7 @@ inner.3:                                             ; preds = %bb14, %bb10
   %gep.9 = getelementptr double, ptr %ptr.phi, i64 %iv.2
   %tmp18 = load double, ptr %gep.9, align 8
   %iv.2.next = add nuw nsw i64 %iv.2, 1
-  %c.2 = icmp eq i64 %iv.2, 1
+  %c.2 = icmp eq i64 %iv.2, 2
   br i1 %c.2, label %outer.latch, label %inner.3
 
 outer.latch:
diff --git a/llvm/test/Transforms/LoopLoadElim/pr96656.ll b/llvm/test/Transforms/LoopLoadElim/pr96656.ll
index c6b336c0db8d3..8e3f2f1f48e9a 100644
--- a/llvm/test/Transforms/LoopLoadElim/pr96656.ll
+++ b/llvm/test/Transforms/LoopLoadElim/pr96656.ll
@@ -6,26 +6,11 @@ define void @single_iteration_versioning(ptr %arg, ptr %arg1, i1 %arg2) {
 ; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i1 [[ARG2:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[ARG]], align 4
-; CHECK-NEXT:    br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_LVER_CHECK:.*]]
-; CHECK:       [[LOOP_LVER_CHECK]]:
+; CHECK-NEXT:    br i1 [[ARG2]], label %[[NOLOOP_EXIT:.*]], label %[[LOOP_PH:.*]]
+; CHECK:       [[LOOP_PH]]:
 ; CHECK-NEXT:    [[SEXT7:%.*]] = sext i32 [[LOAD]] to i64
 ; CHECK-NEXT:    [[GEP8:%.*]] = getelementptr i8, ptr [[ARG1]], i64 8
 ; CHECK-NEXT:    [[GEP9:%.*]] = getelementptr i8, ptr [[ARG1]], i64 16
-; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[LOAD]], 1
-; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label %[[LOOP_PH_LVER_ORIG:.*]], label %[[LOOP_PH:.*]]
-; CHECK:       [[LOOP_PH_LVER_ORIG]]:
-; CHECK-NEXT:    br label %[[LOOP_LVER_ORIG:.*]]
-; CHECK:       [[LOOP_LVER_ORIG]]:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ 0, %[[LOOP_PH_LVER_ORIG]] ], [ [[ADD:%.*]], %[[LOOP_LVER_ORIG]] ]
-; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[PHI]], [[SEXT7]]
-; CHECK-NEXT:    [[GEP10:%.*]] = getelementptr double, ptr [[GEP8]], i64 [[MUL]]
-; CHECK-NEXT:    [[LOAD11:%.*]] = load double, ptr [[GEP10]], align 8
-; CHECK-NEXT:    [[GEP13_LVER_ORIG:%.*]] = getelementptr double, ptr [[GEP9]], i64 [[MUL]]
-; CHECK-NEXT:    store double [[LOAD11]], ptr [[GEP13_LVER_ORIG]], align 8
-; CHECK-NEXT:    [[ADD]] = add i64 [[PHI]], 1
-; CHECK-NEXT:    [[ICMP_LVER_ORIG:%.*]] = icmp eq i64 [[PHI]], 1
-; CHECK-NEXT:    br i1 [[ICMP_LVER_ORIG]], label %[[EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[LOOP_LVER_ORIG]]
-; CHECK:       [[LOOP_PH]]:
 ; CHECK-NEXT:    [[LOAD_INITIAL:%.*]] = load double, ptr [[GEP8]], align 8
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
@@ -38,17 +23,13 @@ define void @single_iteration_versioning(ptr %arg, ptr %arg1, i1 %arg2) {
 ; CHECK-NEXT:    store double [[STORE_FORWARDED]], ptr [[GEP13]], align 8
 ; CHECK-NEXT:    [[ADD1]] = add i64 [[PHI1]], 1
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i64 [[PHI1]], 1
-; CHECK-NEXT:    br i1 [[ICMP]], label %[[EXIT_LOOPEXIT_LOOPEXIT1:.*]], label %[[LOOP]]
+; CHECK-NEXT:    br i1 [[ICMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]]
 ; CHECK:       [[NOLOOP_EXIT]]:
 ; CHECK-NEXT:    [[SEXT2:%.*]] = sext i32 [[LOAD]] to i64
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr double, ptr [[ARG1]], i64 [[SEXT2]]
 ; CHECK-NEXT:    [[LOAD6:%.*]] = load double, ptr [[GEP2]], align 8
 ; CHECK-NEXT:    store double [[LOAD6]], ptr [[ARG]], align 8
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
-; CHECK:       [[EXIT_LOOPEXIT_LOOPEXIT]]:
-; CHECK-NEXT:    br label %[[EXIT_LOOPEXIT:.*]]
-; CHECK:       [[EXIT_LOOPEXIT_LOOPEXIT1]]:
-; CHECK-NEXT:    br label %[[EXIT_LOOPEXIT]]
 ; CHECK:       [[EXIT_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:



More information about the llvm-commits mailing list