[llvm] b636e7d - [NFC][PhaseOrdering] Add a test demonstrating pitfails of common code hoisting on loop rotation
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 16 13:54:03 PDT 2020
Author: Roman Lebedev
Date: 2020-07-16T23:53:26+03:00
New Revision: b636e7d1fc61635d214edc81fd98b3717add8aef
URL: https://github.com/llvm/llvm-project/commit/b636e7d1fc61635d214edc81fd98b3717add8aef
DIFF: https://github.com/llvm/llvm-project/commit/b636e7d1fc61635d214edc81fd98b3717add8aef.diff
LOG: [NFC][PhaseOrdering] Add a test demonstrating pitfails of common code hoisting on loop rotation
Depending on the -rotation-max-header-size=?,
hoisting common code early makes loop rotation impossible.
Added:
llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
new file mode 100644
index 000000000000..1d8cce6879e9
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O3 -rotation-max-header-size=0 -S < %s | FileCheck %s --check-prefixes=HOIST,THR0,FALLBACK0
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s | FileCheck %s --check-prefixes=HOIST,THR0,FALLBACK1
+
+; RUN: opt -O3 -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK2
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK3
+
+; RUN: opt -O3 -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK4
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK5
+
+; RUN: opt -O3 -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK6
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK7
+
+; RUN: opt -O3 -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK8
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK9
+
+; This example is produced from a very basic C code:
+;
+; void f0();
+; void f1();
+; void f2();
+;
+; void loop(int width) {
+; if(width < 1)
+; return;
+; for(int i = 0; i < width - 1; ++i) {
+; f0();
+; f1();
+; }
+; f0();
+; f2();
+; }
+
+; We have a choice here. We can either
+; * hoist the f0() call into loop header,
+; * which potentially makes loop rotation unprofitable since loop header might
+; have grown above certain threshold, and such unrotated loops will be
+; ignored by LoopVectorizer, preventing vectorization
+; * or loop rotation will succeed, resulting in some weird PHIs that will also
+; harm vectorization
+; * or not hoist f0() call before performing loop rotation,
+; at the cost of potential code bloat and/or potentially successfully rotating
+; the loops, vectorizing them at the cost of compile time.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @f0()
+declare void @f1()
+declare void @f2()
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+define void @_Z4loopi(i32 %width) {
+; HOIST-LABEL: @_Z4loopi(
+; HOIST-NEXT: entry:
+; HOIST-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; HOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; HOIST: for.cond.preheader:
+; HOIST-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; HOIST-NEXT: br label [[FOR_COND:%.*]]
+; HOIST: for.cond:
+; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ]
+; HOIST-NEXT: tail call void @f0()
+; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]]
+; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; HOIST: for.cond.cleanup:
+; HOIST-NEXT: tail call void @f2()
+; HOIST-NEXT: br label [[RETURN]]
+; HOIST: for.body:
+; HOIST-NEXT: tail call void @f1()
+; HOIST-NEXT: [[INC]] = add nuw i32 [[I_0]], 1
+; HOIST-NEXT: br label [[FOR_COND]]
+; HOIST: return:
+; HOIST-NEXT: ret void
+;
+; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi(
+; ROTATED_LATER_OLDPM-NEXT: entry:
+; ROTATED_LATER_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATED_LATER_OLDPM: for.cond.preheader:
+; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATED_LATER_OLDPM-NEXT: tail call void @f0()
+; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
+; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; ROTATED_LATER_OLDPM: for.cond.cleanup:
+; ROTATED_LATER_OLDPM-NEXT: tail call void @f2()
+; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]]
+; ROTATED_LATER_OLDPM: for.body:
+; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ]
+; ROTATED_LATER_OLDPM-NEXT: tail call void @f1()
+; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
+; ROTATED_LATER_OLDPM-NEXT: tail call void @f0()
+; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
+; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; ROTATED_LATER_OLDPM: return:
+; ROTATED_LATER_OLDPM-NEXT: ret void
+;
+; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi(
+; ROTATED_LATER_NEWPM-NEXT: entry:
+; ROTATED_LATER_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATED_LATER_NEWPM: for.cond.preheader:
+; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATED_LATER_NEWPM-NEXT: tail call void @f0()
+; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
+; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]]
+; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge:
+; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1
+; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]]
+; ROTATED_LATER_NEWPM: for.cond.cleanup:
+; ROTATED_LATER_NEWPM-NEXT: tail call void @f2()
+; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]]
+; ROTATED_LATER_NEWPM: for.body:
+; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ]
+; ROTATED_LATER_NEWPM-NEXT: tail call void @f1()
+; ROTATED_LATER_NEWPM-NEXT: tail call void @f0()
+; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
+; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
+; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge:
+; ROTATED_LATER_NEWPM-NEXT: [[INC_0]] = add nuw i32 [[INC_PHI]], 1
+; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY]]
+; ROTATED_LATER_NEWPM: return:
+; ROTATED_LATER_NEWPM-NEXT: ret void
+;
+; ROTATE_OLDPM-LABEL: @_Z4loopi(
+; ROTATE_OLDPM-NEXT: entry:
+; ROTATE_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATE_OLDPM: for.cond.preheader:
+; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
+; ROTATE_OLDPM-NEXT: tail call void @f0()
+; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; ROTATE_OLDPM: for.body.preheader:
+; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]]
+; ROTATE_OLDPM: for.cond.cleanup:
+; ROTATE_OLDPM-NEXT: tail call void @f2()
+; ROTATE_OLDPM-NEXT: br label [[RETURN]]
+; ROTATE_OLDPM: for.body:
+; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; ROTATE_OLDPM-NEXT: tail call void @f1()
+; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1
+; ROTATE_OLDPM-NEXT: tail call void @f0()
+; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
+; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; ROTATE_OLDPM: return:
+; ROTATE_OLDPM-NEXT: ret void
+;
+; ROTATE_NEWPM-LABEL: @_Z4loopi(
+; ROTATE_NEWPM-NEXT: entry:
+; ROTATE_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATE_NEWPM: for.cond.preheader:
+; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
+; ROTATE_NEWPM-NEXT: tail call void @f0()
+; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; ROTATE_NEWPM: for.body.preheader:
+; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1
+; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]]
+; ROTATE_NEWPM: for.cond.cleanup:
+; ROTATE_NEWPM-NEXT: tail call void @f2()
+; ROTATE_NEWPM-NEXT: br label [[RETURN]]
+; ROTATE_NEWPM: for.body:
+; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ]
+; ROTATE_NEWPM-NEXT: tail call void @f1()
+; ROTATE_NEWPM-NEXT: tail call void @f0()
+; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
+; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
+; ROTATE_NEWPM: for.body.for.body_crit_edge:
+; ROTATE_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1
+; ROTATE_NEWPM-NEXT: br label [[FOR_BODY]]
+; ROTATE_NEWPM: return:
+; ROTATE_NEWPM-NEXT: ret void
+;
+entry:
+ %width.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 %width, i32* %width.addr, align 4
+ %i1 = load i32, i32* %width.addr, align 4
+ %cmp = icmp slt i32 %i1, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ br label %return
+
+if.end:
+ %i2 = bitcast i32* %i to i8*
+ call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2)
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond:
+ %i3 = load i32, i32* %i, align 4
+ %i4 = load i32, i32* %width.addr, align 4
+ %sub = sub nsw i32 %i4, 1
+ %cmp1 = icmp slt i32 %i3, %sub
+ br i1 %cmp1, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ %i5 = bitcast i32* %i to i8*
+ call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5)
+ br label %for.end
+
+for.body:
+ call void @f0()
+ call void @f1()
+ br label %for.inc
+
+for.inc:
+ %i6 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %i6, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end:
+ call void @f0()
+ call void @f2()
+ br label %return
+
+return:
+ ret void
+}
More information about the llvm-commits
mailing list