[llvm] [LV] Fix emission of debug message in legality check (PR #101924)
Madhur Amilkanthwar via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 04:51:34 PDT 2024
https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/101924
>From 6c3d29e019ca1c3b04a0acb3ad45757f0cd493e6 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 31 Jul 2024 22:17:56 +0530
Subject: [PATCH 1/4] [LV] Fix emission of debug message in legality check
Successful vectorization message is emitted even
after "Result" is false. "Result" = false indicates
failure of one of the legality check and thus
successful message should not be printed.
---
.../Vectorize/LoopVectorizationLegality.cpp | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 66a779da8c25bc..7545f23960ff44 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1451,10 +1451,12 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
// Check whether the loop-related control flow in the loop nest is expected by
// vectorizer.
if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
- if (DoExtraAnalysis)
+ if (DoExtraAnalysis) {
+ LLVM_DEBUG(dbgs() << "LV legality check failed: loop nest");
Result = false;
- else
+ } else {
return false;
+ }
}
// We need to have a loop header.
@@ -1519,11 +1521,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
}
- LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
- << (LAI->getRuntimePointerChecking()->Need
- ? " (with a runtime bound check)"
- : "")
- << "!\n");
unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
@@ -1538,6 +1535,13 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
else
return false;
}
+ if (Result) {
+ LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
+ << (LAI->getRuntimePointerChecking()->Need
+ ? " (with a runtime bound check)"
+ : "")
+ << "!\n");
+ }
// Okay! We've done all the tests. If any have failed, return false. Otherwise
// we can vectorize, and at this point we don't have any other mem analysis
>From c159433de950aa7d6de6c2ecf69392e78485bece Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 8 Aug 2024 21:23:14 +0530
Subject: [PATCH 2/4] Add test
---
.../LoopVectorize/check-no-vectorize.ll | 36 +++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
diff --git a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
new file mode 100644
index 00000000000000..361200563e1a43
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
@@ -0,0 +1,36 @@
+; This test asserts that we don't emit both
+; successful and unsuccessful message about vectorization.
+
+; RUN: opt -passes=loop-vectorize -debug -disable-output -pass-remarks-missed=loop-vectorize %s 2>&1 | FileCheck %s
+; CHECK-NOT: LV: We can vectorize this loop
+; CHECK: LV: Not vectorizing: Cannot prove legality
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at a = dso_local global [32000 x i32] zeroinitializer, align 4
+ at b = dso_local global [32000 x i32] zeroinitializer, align 4
+
+define dso_local void @foo() local_unnamed_addr {
+entry:
+ %.pre = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
+ %.pre17 = load i32, ptr @a, align 4
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %0 = phi i32 [ %.pre17, %entry ], [ %add6, %for.body ]
+ %1 = phi i32 [ %.pre, %entry ], [ %2, %for.body ]
+ %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [32000 x i32], ptr @a, i64 0, i64 %indvars.iv
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds [32000 x i32], ptr @a, i64 0, i64 %indvars.iv.next
+ %2 = load i32, ptr %arrayidx2, align 4
+ %add3 = add nsw i32 %2, %1
+ %add6 = add nsw i32 %add3, %0
+ store i32 %add6, ptr %arrayidx, align 4
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 31999
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
>From e3e75028d4bdc3fc832d0deefc3b180e00f7bbb7 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 21 Aug 2024 16:06:18 +0530
Subject: [PATCH 3/4] Address review comments
---
.../Vectorize/LoopVectorizationLegality.cpp | 18 ++++++++++--------
.../LoopVectorize/check-no-vectorize.ll | 8 ++++----
2 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7545f23960ff44..f1e36f5b8e7a2f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1521,12 +1521,21 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
}
-
unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
+ if (Result) {
+ LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
+ << (LAI->getRuntimePointerChecking()->Need
+ ? " (with a runtime bound check)"
+ : "")
+ << "!\n");
+ }
+
if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
+ LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
+ "due to SCEVThreshold");
reportVectorizationFailure("Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
"TooManySCEVRunTimeChecks", ORE, TheLoop);
@@ -1535,13 +1544,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
else
return false;
}
- if (Result) {
- LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
- << (LAI->getRuntimePointerChecking()->Need
- ? " (with a runtime bound check)"
- : "")
- << "!\n");
- }
// Okay! We've done all the tests. If any have failed, return false. Otherwise
// we can vectorize, and at this point we don't have any other mem analysis
diff --git a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
index 361200563e1a43..83880da4a7a469 100644
--- a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
@@ -4,14 +4,14 @@
; RUN: opt -passes=loop-vectorize -debug -disable-output -pass-remarks-missed=loop-vectorize %s 2>&1 | FileCheck %s
; CHECK-NOT: LV: We can vectorize this loop
; CHECK: LV: Not vectorizing: Cannot prove legality
+; CHECK-NOT: LV: We can vectorize this loop
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-target triple = "aarch64-unknown-linux-gnu"
- at a = dso_local global [32000 x i32] zeroinitializer, align 4
- at b = dso_local global [32000 x i32] zeroinitializer, align 4
+ at a = global [32000 x i32] zeroinitializer, align 4
+ at b = global [32000 x i32] zeroinitializer, align 4
-define dso_local void @foo() local_unnamed_addr {
+define void @foo() {
entry:
%.pre = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
%.pre17 = load i32, ptr @a, align 4
>From 01a81a07b0d4a532f2e3bba4f4caee0a0d9fdf07 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Sat, 24 Aug 2024 12:02:01 +0530
Subject: [PATCH 4/4] Address review comments
---
.../Vectorize/LoopVectorizationLegality.cpp | 2 +-
.../Transforms/LoopVectorize/check-no-vectorize.ll | 13 ++++++-------
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f1e36f5b8e7a2f..ff5bd2ab5dddd6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1452,7 +1452,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
// vectorizer.
if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
if (DoExtraAnalysis) {
- LLVM_DEBUG(dbgs() << "LV legality check failed: loop nest");
+ LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
Result = false;
} else {
return false;
diff --git a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
index 83880da4a7a469..f64775ded1ea1f 100644
--- a/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/check-no-vectorize.ll
@@ -1,28 +1,27 @@
; This test asserts that we don't emit both
; successful and unsuccessful message about vectorization.
-; RUN: opt -passes=loop-vectorize -debug -disable-output -pass-remarks-missed=loop-vectorize %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -debug -disable-output < %s 2>&1 | FileCheck %s
; CHECK-NOT: LV: We can vectorize this loop
; CHECK: LV: Not vectorizing: Cannot prove legality
; CHECK-NOT: LV: We can vectorize this loop
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-
@a = global [32000 x i32] zeroinitializer, align 4
@b = global [32000 x i32] zeroinitializer, align 4
define void @foo() {
entry:
- %.pre = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
- %.pre17 = load i32, ptr @a, align 4
+ %load_a_gep = load i32, ptr getelementptr inbounds (i8, ptr @a, i64 4), align 4
+ %val_a = load i32, ptr @a, align 4
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
- %0 = phi i32 [ %.pre17, %entry ], [ %add6, %for.body ]
- %1 = phi i32 [ %.pre, %entry ], [ %2, %for.body ]
+ %0 = phi i32 [ %val_a, %entry ], [ %add6, %for.body ]
+ %1 = phi i32 [ %load_a_gep, %entry ], [ %2, %for.body ]
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [32000 x i32], ptr @a, i64 0, i64 %indvars.iv
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
More information about the llvm-commits
mailing list