[llvm] [LV] Fix emission of debug message in legality check (PR #101924)

Madhur Amilkanthwar via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 04:51:34 PDT 2024


https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/101924

>From 6c3d29e019ca1c3b04a0acb3ad45757f0cd493e6 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 31 Jul 2024 22:17:56 +0530
Subject: [PATCH 1/4] [LV] Fix emission of debug message in legality check

Successful vectorization message is emitted even
after "Result" is false.  "Result" = false indicates
failure of one of the legality check and thus
successful message should not be printed.
---
 .../Vectorize/LoopVectorizationLegality.cpp    | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 66a779da8c25bc..7545f23960ff44 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1451,10 +1451,12 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   // Check whether the loop-related control flow in the loop nest is expected by
   // vectorizer.
   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
-    if (DoExtraAnalysis)
+    if (DoExtraAnalysis) {
+      LLVM_DEBUG(dbgs() << "LV legality check failed: loop nest");
       Result = false;
-    else
+    } else {
       return false;
+    }
   }
 
   // We need to have a loop header.
@@ -1519,11 +1521,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
-  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
-                    << (LAI->getRuntimePointerChecking()->Need
-                            ? " (with a runtime bound check)"
-                            : "")
-                    << "!\n");
 
   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
@@ -1538,6 +1535,13 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
     else
       return false;
   }
+  if (Result) {
+    LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
+                      << (LAI->getRuntimePointerChecking()->Need
+                              ? " (with a runtime bound check)"
+                              : "")
+                      << "!\n");
+  }
 
   // Okay! We've done all the tests. If any have failed, return false. Otherwise
   // we can vectorize, and at this point we don't have any other mem analysis

>From c159433de950aa7d6de6c2ecf69392e78485bece Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 8 Aug 2024 21:23:14 +0530
Subject: [PATCH 2/4] Add test

---
 .../LoopVectorize/check-no-vectorize.ll       | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll

diff --git a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
new file mode 100644
index 00000000000000..361200563e1a43
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
@@ -0,0 +1,36 @@
+; This test asserts that we don't emit both
+; successful and unsuccessful message about vectorization.
+
+; RUN: opt -passes=loop-vectorize -debug -disable-output -pass-remarks-missed=loop-vectorize %s 2>&1 | FileCheck %s
+; CHECK-NOT: LV: We can vectorize this loop
+; CHECK: LV: Not vectorizing: Cannot prove legality
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at a = dso_local global [32000 x i32] zeroinitializer, align 4
+ at b = dso_local global [32000 x i32] zeroinitializer, align 4
+
+define dso_local void @foo() local_unnamed_addr {
+entry:
+  %.pre = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
+  %.pre17 = load i32, ptr @a, align 4
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %0 = phi i32 [ %.pre17, %entry ], [ %add6, %for.body ]
+  %1 = phi i32 [ %.pre, %entry ], [ %2, %for.body ]
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [32000 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx2 = getelementptr inbounds [32000 x i32], ptr @a, i64 0, i64 %indvars.iv.next
+  %2 = load i32, ptr %arrayidx2, align 4
+  %add3 = add nsw i32 %2, %1
+  %add6 = add nsw i32 %add3, %0
+  store i32 %add6, ptr %arrayidx, align 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 31999
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}

>From e3e75028d4bdc3fc832d0deefc3b180e00f7bbb7 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 21 Aug 2024 16:06:18 +0530
Subject: [PATCH 3/4] Address review comments

---
 .../Vectorize/LoopVectorizationLegality.cpp    | 18 ++++++++++--------
 .../LoopVectorize/check-no-vectorize.ll        |  8 ++++----
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7545f23960ff44..f1e36f5b8e7a2f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1521,12 +1521,21 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
-
   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
 
+  if (Result) {
+    LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
+                      << (LAI->getRuntimePointerChecking()->Need
+                              ? " (with a runtime bound check)"
+                              : "")
+                      << "!\n");
+  }
+
   if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
+    LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
+                         "due to SCEVThreshold");
     reportVectorizationFailure("Too many SCEV checks needed",
         "Too many SCEV assumptions need to be made and checked at runtime",
         "TooManySCEVRunTimeChecks", ORE, TheLoop);
@@ -1535,13 +1544,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
     else
       return false;
   }
-  if (Result) {
-    LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
-                      << (LAI->getRuntimePointerChecking()->Need
-                              ? " (with a runtime bound check)"
-                              : "")
-                      << "!\n");
-  }
 
   // Okay! We've done all the tests. If any have failed, return false. Otherwise
   // we can vectorize, and at this point we don't have any other mem analysis
diff --git a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
index 361200563e1a43..83880da4a7a469 100644
--- a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
@@ -4,14 +4,14 @@
 ; RUN: opt -passes=loop-vectorize -debug -disable-output -pass-remarks-missed=loop-vectorize %s 2>&1 | FileCheck %s
 ; CHECK-NOT: LV: We can vectorize this loop
 ; CHECK: LV: Not vectorizing: Cannot prove legality
+; CHECK-NOT: LV: We can vectorize this loop
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-target triple = "aarch64-unknown-linux-gnu"
 
- at a = dso_local global [32000 x i32] zeroinitializer, align 4
- at b = dso_local global [32000 x i32] zeroinitializer, align 4
+ at a = global [32000 x i32] zeroinitializer, align 4
+ at b = global [32000 x i32] zeroinitializer, align 4
 
-define dso_local void @foo() local_unnamed_addr {
+define void @foo() {
 entry:
   %.pre = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
   %.pre17 = load i32, ptr @a, align 4

>From 01a81a07b0d4a532f2e3bba4f4caee0a0d9fdf07 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Sat, 24 Aug 2024 12:02:01 +0530
Subject: [PATCH 4/4] Address review comments

---
 .../Vectorize/LoopVectorizationLegality.cpp         |  2 +-
 .../Transforms/LoopVectorize/check-no-vectorize.ll  | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f1e36f5b8e7a2f..ff5bd2ab5dddd6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1452,7 +1452,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   // vectorizer.
   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
     if (DoExtraAnalysis) {
-      LLVM_DEBUG(dbgs() << "LV legality check failed: loop nest");
+      LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
       Result = false;
     } else {
       return false;
diff --git a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
index 83880da4a7a469..f64775ded1ea1f 100644
--- a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
@@ -1,28 +1,27 @@
 ; This test asserts that we don't emit both
 ; successful and unsuccessful message about vectorization.
 
-; RUN: opt -passes=loop-vectorize -debug -disable-output -pass-remarks-missed=loop-vectorize %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -debug -disable-output < %s 2>&1 | FileCheck %s
 ; CHECK-NOT: LV: We can vectorize this loop
 ; CHECK: LV: Not vectorizing: Cannot prove legality
 ; CHECK-NOT: LV: We can vectorize this loop
 
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-
 @a = global [32000 x i32] zeroinitializer, align 4
 @b = global [32000 x i32] zeroinitializer, align 4
 
 define void @foo() {
 entry:
-  %.pre = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
-  %.pre17 = load i32, ptr @a, align 4
+  %load_a_gep = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
+  %val_a = load i32, ptr @a, align 4
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
   ret void
 
 for.body:                                         ; preds = %entry, %for.body
-  %0 = phi i32 [ %.pre17, %entry ], [ %add6, %for.body ]
-  %1 = phi i32 [ %.pre, %entry ], [ %2, %for.body ]
+  %0 = phi i32 [ %val_a, %entry ], [ %add6, %for.body ]
+  %1 = phi i32 [ %load_a_gep, %entry ], [ %2, %for.body ]
   %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds [32000 x i32], ptr @a, i64 0, i64 %indvars.iv
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1



More information about the llvm-commits mailing list