[llvm] LSV: forbid load-cycles when vectorizing; fix bug (PR #104815)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 12:04:07 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/104815
>From 6bfb08b3559bfbfce6643e588bcd6a832aa0c96d Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 19 Aug 2024 16:53:31 +0100
Subject: [PATCH 1/3] LSV: forbid load-cycles when vectorizing; fix bug
Forbid zero SCEV diff in getConstantOffset, hence forbidding cycles
which would crash LoadStoreVectorizer when vectorizing.
Fixes #37865.
---
.../Vectorize/LoadStoreVectorizer.cpp | 6 ++++++
.../LoadStoreVectorizer/AArch64/pr37865.ll | 17 +++++++++++++----
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index c35ea431296b70..385349b338af93 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1499,6 +1499,12 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
// Try to compute B - A.
const SCEV *DistScev = SE.getMinusSCEV(SE.getSCEV(PtrB), SE.getSCEV(PtrA));
+ if (DistScev->isZero()) {
+ // A load in the chain is dependent on another load in the chain, and
+ // attempting to vectorize this chain would create a cycle.
+ LLVM_DEBUG(dbgs() << "LSV: SCEV diff is zero; not vectorizing\n");
+ return std::nullopt;
+ }
if (DistScev != SE.getCouldNotCompute()) {
LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");
ConstantRange DistRange = SE.getSignedRange(DistScev);
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
index 833e70814c2917..bf5d8a555bd908 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
@@ -1,9 +1,18 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -mtriple=aarch64 -passes=load-store-vectorizer \
-; RUN: -disable-output %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=aarch64 -passes=load-store-vectorizer -S %s | FileCheck %s
define i32 @load_cycle(ptr %x) {
-; CHECK: Unexpected cycle while re-ordering instructions
+; CHECK-LABEL: define i32 @load_cycle(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 0, i32 1
+; CHECK-NEXT: [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[LOAD_X_1]], 1
+; CHECK-NEXT: [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[REM]], i32 0
+; CHECK-NEXT: [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
+; CHECK-NEXT: [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
entry:
%gep.x.1 = getelementptr inbounds [2 x i32], ptr %x, i32 0, i32 1
%load.x.1 = load i32, ptr %gep.x.1
>From 4465fef283cc626ec26a8352cd06d450ae5f5659 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 19 Aug 2024 18:11:43 +0100
Subject: [PATCH 2/3] LSV: address review
---
llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 1 +
llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 385349b338af93..187a6d3641dead 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1505,6 +1505,7 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
LLVM_DEBUG(dbgs() << "LSV: SCEV diff is zero; not vectorizing\n");
return std::nullopt;
}
+
if (DistScev != SE.getCouldNotCompute()) {
LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");
ConstantRange DistRange = SE.getSignedRange(DistScev);
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
index bf5d8a555bd908..d657ea03cff82e 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=aarch64 -passes=load-store-vectorizer -S %s | FileCheck %s
+; LSV was attempting to vectorize this earlier, but crashed while re-ordering
+; instructions due to the load-load cycle. Now, the chain is no longer a
+; candidate for vectorization.
+
define i32 @load_cycle(ptr %x) {
; CHECK-LABEL: define i32 @load_cycle(
; CHECK-SAME: ptr [[X:%.*]]) {
>From cb2ab8578ebe44bf6157ceb64605aee0c8155cee Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 20 Aug 2024 19:36:49 +0100
Subject: [PATCH 3/3] LSV: redo patch
---
.../Vectorize/LoadStoreVectorizer.cpp | 34 +++++++---
.../LoadStoreVectorizer/AArch64/pr37865.ll | 62 ++++++++++++++++++-
2 files changed, 85 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 187a6d3641dead..888bedf700d7fc 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -996,10 +996,32 @@ bool Vectorizer::isSafeToMove(
LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
<< *ChainBegin << ")\n");
- assert(isa<LoadInst>(ChainElem) == IsLoadChain);
+ assert(isa<LoadInst>(ChainElem) == IsLoadChain &&
+ isa<LoadInst>(ChainBegin) == IsLoadChain);
+
if (ChainElem == ChainBegin)
return true;
+ if constexpr (IsLoadChain) {
+ // If ChainElem depends on ChainBegin, they're not safe to reorder.
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.emplace_back(ChainElem);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ for (Use &O : I->operands()) {
+ if (isa<PHINode>(O))
+ continue;
+ if (auto *J = dyn_cast<Instruction>(O)) {
+ if (J == ChainBegin) {
+ LLVM_DEBUG(dbgs() << "LSV: dependent loads; not safe to reorder\n");
+ return false;
+ }
+ Worklist.emplace_back(J);
+ }
+ }
+ }
+ }
+
// Invariant loads can always be reordered; by definition they are not
// clobbered by stores.
if (isInvariantLoad(ChainElem))
@@ -1028,7 +1050,8 @@ bool Vectorizer::isSafeToMove(
if (!I->mayReadOrWriteMemory())
continue;
- // Loads can be reordered with other loads.
+ // Loads can be reordered with other loads. We've already checked that
+ // they're independent.
if (IsLoadChain && isa<LoadInst>(I))
continue;
@@ -1499,13 +1522,6 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
// Try to compute B - A.
const SCEV *DistScev = SE.getMinusSCEV(SE.getSCEV(PtrB), SE.getSCEV(PtrA));
- if (DistScev->isZero()) {
- // A load in the chain is dependent on another load in the chain, and
- // attempting to vectorize this chain would create a cycle.
- LLVM_DEBUG(dbgs() << "LSV: SCEV diff is zero; not vectorizing\n");
- return std::nullopt;
- }
-
if (DistScev != SE.getCouldNotCompute()) {
LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");
ConstantRange DistRange = SE.getSignedRange(DistScev);
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
index d657ea03cff82e..0beca8c15305f6 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AArch64/pr37865.ll
@@ -2,8 +2,8 @@
; RUN: opt -mtriple=aarch64 -passes=load-store-vectorizer -S %s | FileCheck %s
; LSV was attempting to vectorize this earlier, but crashed while re-ordering
-; instructions due to the load-load cycle. Now, the chain is no longer a
-; candidate for vectorization.
+; instructions due to the load-load cycle. Now, the candidate loads are no
+; longer considered safe for reordering.
define i32 @load_cycle(ptr %x) {
; CHECK-LABEL: define i32 @load_cycle(
@@ -26,3 +26,61 @@ entry:
%ret = add i32 %load.x.2, %load.x.1
ret i32 %ret
}
+
+define i32 @load_cycle2(ptr %x, i32 %y) {
+; CHECK-LABEL: define i32 @load_cycle2(
+; CHECK-SAME: ptr [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[Y]], i32 1
+; CHECK-NEXT: [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[LOAD_X_1]], 2
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[Y]], [[MUL]]
+; CHECK-NEXT: [[SUB_1:%.*]] = sub i32 [[ADD]], [[LOAD_X_1]]
+; CHECK-NEXT: [[SUB_2:%.*]] = sub i32 [[SUB_1]], [[LOAD_X_1]]
+; CHECK-NEXT: [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[SUB_2]], i32 0
+; CHECK-NEXT: [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
+; CHECK-NEXT: [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+entry:
+ %gep.x.1 = getelementptr inbounds [2 x i32], ptr %x, i32 %y, i32 1
+ %load.x.1 = load i32, ptr %gep.x.1
+ %mul = mul i32 %load.x.1, 2
+ %add = add i32 %y, %mul
+ %sub.1 = sub i32 %add, %load.x.1
+ %sub.2 = sub i32 %sub.1, %load.x.1
+ %gep.x.2 = getelementptr inbounds [2 x i32], ptr %x, i32 %sub.2, i32 0
+ %load.x.2 = load i32, ptr %gep.x.2
+ %ret = add i32 %load.x.2, %load.x.1
+ ret i32 %ret
+}
+
+ at global.1 = global i32 0
+ at global.2 = global [1 x [3 x i32]] zeroinitializer
+
+define i16 @load_cycle3() {
+; CHECK-LABEL: define i16 @load_cycle3() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, ptr @global.1, align 4
+; CHECK-NEXT: [[UREM_1:%.*]] = urem i32 [[LOAD_1]], 1
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_1]]
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_1]], i32 0, i32 2
+; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, ptr [[GEP_2]], align 4
+; CHECK-NEXT: [[UREM_2:%.*]] = urem i32 [[LOAD_2]], 1
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_2]]
+; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_3]], i32 0, i32 1
+; CHECK-NEXT: [[LOAD_3:%.*]] = load i32, ptr [[GEP_4]], align 4
+; CHECK-NEXT: ret i16 0
+;
+entry:
+ %load.1 = load i32, ptr @global.1
+ %urem.1 = urem i32 %load.1, 1
+ %gep.1 = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 %urem.1
+ %gep.2 = getelementptr inbounds [3 x i32], ptr %gep.1, i32 0, i32 2
+ %load.2 = load i32, ptr %gep.2
+ %urem.2 = urem i32 %load.2, 1
+ %gep.3 = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 %urem.2
+ %gep.4 = getelementptr inbounds [3 x i32], ptr %gep.3, i32 0, i32 1
+ %load.3 = load i32, ptr %gep.4
+ ret i16 0
+}
More information about the llvm-commits
mailing list