[llvm-branch-commits] [llvm] ac593de - [LoopReroll] Fix rerolling loop with extra instructions
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu May 6 17:10:40 PDT 2021
Author: KAWASHIMA Takahiro
Date: 2021-05-06T17:10:04-07:00
New Revision: ac593de16cc5282630ce44dd8378ae5b7b91644c
URL: https://github.com/llvm/llvm-project/commit/ac593de16cc5282630ce44dd8378ae5b7b91644c
DIFF: https://github.com/llvm/llvm-project/commit/ac593de16cc5282630ce44dd8378ae5b7b91644c.diff
LOG: [LoopReroll] Fix rerolling loop with extra instructions
Fixes PR47627
This fix suppresses rerolling a loop which has an unrerollable
instruction.
Sample IR for the explanation below:
```
define void @foo([2 x i32]* nocapture %a) {
entry:
br label %loop
loop:
; base instruction
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
; unrerollable instructions
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvar, i64 0
store i32 999, i32* %stptrx, align 4
; extra simple arithmetic operations, used by root instructions
%plus20 = add nuw nsw i64 %indvar, 20
%plus10 = add nuw nsw i64 %indvar, 10
; root instruction 0
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
%value0 = load i32, i32* %ldptr0, align 4
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
store i32 %value0, i32* %stptr0, align 4
; root instruction 1
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
%value1 = load i32, i32* %ldptr1, align 4
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
store i32 %value1, i32* %stptr1, align 4
; loop-increment and latch
%indvar.next = add nuw nsw i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 5
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
```
In the loop rerolling pass, `%indvar` and `%indvar.next` are appended
to the `LoopIncs` vector in the `LoopReroll::DAGRootTracker::findRoots`
function.
Before this fix, two instructions with `unrerollable instructions`
comment above are marked as `IL_All` at the end of the
`LoopReroll::DAGRootTracker::collectUsedInstructions` function,
as well as instructions with `extra simple arithmetic operations`
comment and `loop-increment and latch` comment. It is incorrect
because `IL_All` means that the instruction should be executed in all
iterations of the rerolled loop but the `store` instruction should
not.
This fix rejects instructions which may have side effects and don't
belong to def-use chains of any root instructions and reductions.
See https://bugs.llvm.org/show_bug.cgi?id=47627 for more information.
(cherry picked from commit d9a9c992d190dd6645ea911b66cf0cadba0dadc3)
Added:
llvm/test/Transforms/LoopReroll/extra_instr.ll
Modified:
llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index b3bae47e96de2..65a6205f03020 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1081,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
DenseSet<Instruction*> V;
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
for (auto *I : V) {
+ if (I->mayHaveSideEffects()) {
+ LLVM_DEBUG(dbgs() << "LRR: Aborting - "
+ << "An instruction which does not belong to any root "
+ << "sets must not have side effects: " << *I);
+ return false;
+ }
Uses[I].set(IL_All);
}
diff --git a/llvm/test/Transforms/LoopReroll/extra_instr.ll b/llvm/test/Transforms/LoopReroll/extra_instr.ll
new file mode 100644
index 0000000000000..aae29079ade7f
--- /dev/null
+++ b/llvm/test/Transforms/LoopReroll/extra_instr.ll
@@ -0,0 +1,268 @@
+; RUN: opt -S -loop-reroll %s | FileCheck %s
+target triple = "aarch64--linux-gnu"
+
+define void @rerollable1([2 x i32]* nocapture %a) {
+entry:
+ br label %loop
+
+loop:
+
+; CHECK-LABEL: loop:
+; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv
+; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
+; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4
+
+ ; base instruction
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+
+ ; NO unrerollable instructions
+
+ ; extra simple arithmetic operations, used by root instructions
+ %plus20 = add nuw nsw i64 %iv, 20
+ %plus10 = add nuw nsw i64 %iv, 10
+
+ ; root instruction 0
+ %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
+ %value0 = load i32, i32* %ldptr0, align 4
+ %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
+ store i32 %value0, i32* %stptr0, align 4
+
+ ; root instruction 1
+ %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
+ %value1 = load i32, i32* %ldptr1, align 4
+ %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
+ store i32 %value1, i32* %stptr1, align 4
+
+ ; loop-increment
+ %iv.next = add nuw nsw i64 %iv, 1
+
+ ; latch
+ %exitcond = icmp eq i64 %iv.next, 5
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @unrerollable1([2 x i32]* nocapture %a) {
+entry:
+ br label %loop
+
+loop:
+
+; CHECK-LABEL: loop:
+; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
+; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
+
+ ; base instruction
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+
+ ; unrerollable instructions using %iv
+ %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
+ store i32 999, i32* %stptrx, align 4
+
+ ; extra simple arithmetic operations, used by root instructions
+ %plus20 = add nuw nsw i64 %iv, 20
+ %plus10 = add nuw nsw i64 %iv, 10
+
+ ; root instruction 0
+ %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
+ %value0 = load i32, i32* %ldptr0, align 4
+ %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
+ store i32 %value0, i32* %stptr0, align 4
+
+ ; root instruction 1
+ %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
+ %value1 = load i32, i32* %ldptr1, align 4
+ %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
+ store i32 %value1, i32* %stptr1, align 4
+
+ ; loop-increment
+ %iv.next = add nuw nsw i64 %iv, 1
+
+ ; latch
+ %exitcond = icmp eq i64 %iv.next, 5
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @unrerollable2([2 x i32]* nocapture %a) {
+entry:
+ br label %loop
+
+loop:
+
+; CHECK-LABEL: loop:
+; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
+; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
+; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
+
+ ; base instruction
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+
+ ; loop-increment
+ %iv.next = add nuw nsw i64 %iv, 1
+
+ ; unrerollable instructions using %iv.next
+ %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
+ store i32 999, i32* %stptrx, align 4
+
+ ; extra simple arithmetic operations, used by root instructions
+ %plus20 = add nuw nsw i64 %iv, 20
+ %plus10 = add nuw nsw i64 %iv, 10
+
+ ; root instruction 0
+ %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
+ %value0 = load i32, i32* %ldptr0, align 4
+ %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
+ store i32 %value0, i32* %stptr0, align 4
+
+ ; root instruction 1
+ %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
+ %value1 = load i32, i32* %ldptr1, align 4
+ %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
+ store i32 %value1, i32* %stptr1, align 4
+
+ ; latch
+ %exitcond = icmp eq i64 %iv.next, 5
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define dso_local void @rerollable2() {
+entry:
+ br label %loop
+
+loop:
+
+; CHECK-LABEL: loop:
+; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
+; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
+
+ ; induction variable
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+
+ ; scale instruction
+ %iv.mul3 = mul nuw nsw i32 %iv, 3
+
+ ; extra simple arithmetic operations, used by root instructions
+ %iv.scaled = add nuw nsw i32 %iv.mul3, 20
+
+ ; NO unrerollable instructions
+
+ ; root set 1
+
+ ; base instruction
+ %iv.scaled.div5 = udiv i32 %iv.scaled, 5
+ tail call void @bar(i32 %iv.scaled.div5)
+ ; root instruction 0
+ %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
+ %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
+ tail call void @bar(i32 %iv.scaled.add1.div5)
+ ; root instruction 2
+ %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
+ %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
+ tail call void @bar(i32 %iv.scaled.add2.div5)
+
+ ; root set 2
+
+ ; base instruction
+ %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
+ %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
+ tail call void @bar(i32 %iv.scaled.add4.div5)
+ ; root instruction 0
+ %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
+ %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
+ tail call void @bar(i32 %iv.scaled.add5.div5)
+ ; root instruction 2
+ %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
+ %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
+ tail call void @bar(i32 %iv.scaled.add6.div5)
+
+ ; loop-increment
+ %iv.next = add nuw nsw i32 %iv, 1
+
+ ; latch
+ %cmp = icmp ult i32 %iv.next, 3
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define dso_local void @unrerollable3() {
+entry:
+ br label %loop
+
+loop:
+
+; CHECK-LABEL: loop:
+; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3
+; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20
+; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7
+; CHECK-NEXT: tail call void @bar(i32 %iv.mul7)
+
+ ; induction variable
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+
+ ; scale instruction
+ %iv.mul3 = mul nuw nsw i32 %iv, 3
+
+ ; extra simple arithmetic operations, used by root instructions
+ %iv.scaled = add nuw nsw i32 %iv.mul3, 20
+
+ ; unrerollable instructions using %iv
+ %iv.mul7 = mul nuw nsw i32 %iv, 7
+ tail call void @bar(i32 %iv.mul7)
+
+ ; root set 1
+
+ ; base instruction
+ %iv.scaled.div5 = udiv i32 %iv.scaled, 5
+ tail call void @bar(i32 %iv.scaled.div5)
+ ; root instruction 0
+ %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
+ %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
+ tail call void @bar(i32 %iv.scaled.add1.div5)
+ ; root instruction 2
+ %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
+ %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
+ tail call void @bar(i32 %iv.scaled.add2.div5)
+
+ ; root set 2
+
+ ; base instruction
+ %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
+ %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
+ tail call void @bar(i32 %iv.scaled.add4.div5)
+ ; root instruction 0
+ %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
+ %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
+ tail call void @bar(i32 %iv.scaled.add5.div5)
+ ; root instruction 2
+ %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
+ %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
+ tail call void @bar(i32 %iv.scaled.add6.div5)
+
+ ; loop-increment
+ %iv.next = add nuw nsw i32 %iv, 1
+
+ ; latch
+ %cmp = icmp ult i32 %iv.next, 3
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+declare dso_local void @bar(i32)
More information about the llvm-branch-commits
mailing list