[llvm] r361610 - StructurizeCFG: Relax uniformity checks.
Neil Henning via llvm-commits
llvm-commits at lists.llvm.org
Fri May 24 01:59:17 PDT 2019
Author: sheredom
Date: Fri May 24 01:59:17 2019
New Revision: 361610
URL: http://llvm.org/viewvc/llvm-project?rev=361610&view=rev
Log:
StructurizeCFG: Relax uniformity checks.
This change relaxes the checks for hasOnlyUniformBranches such that our
region is uniform if:
1. All conditional branches that are direct children are uniform.
2. And either:
a. All sub-regions are uniform.
b. There is one or less conditional branches among the direct
children.
Differential Revision: https://reviews.llvm.org/D62198
Modified:
llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp
llvm/trunk/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
Modified: llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp?rev=361610&r1=361609&r2=361610&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp Fri May 24 01:59:17 2019
@@ -62,6 +62,11 @@ static cl::opt<bool> ForceSkipUniformReg
cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
cl::init(false));
+static cl::opt<bool>
+ RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
+ cl::desc("Allow relaxed uniform region checks"),
+ cl::init(false));
+
// Definition of the complex types used in this pass.
using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -936,6 +941,11 @@ void StructurizeCFG::rebuildSSA() {
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
const LegacyDivergenceAnalysis &DA) {
+ // Bool for if all sub-regions are uniform.
+ bool SubRegionsAreUniform = true;
+ // Count of how many direct children are conditional.
+ unsigned ConditionalDirectChildren = 0;
+
for (auto E : R->elements()) {
if (!E->isSubRegion()) {
auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
@@ -944,6 +954,10 @@ static bool hasOnlyUniformBranches(Regio
if (!DA.isUniform(Br))
return false;
+
+ // One of our direct children is conditional.
+ ConditionalDirectChildren++;
+
LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
<< " has uniform terminator\n");
} else {
@@ -961,12 +975,25 @@ static bool hasOnlyUniformBranches(Regio
if (!Br || !Br->isConditional())
continue;
- if (!Br->getMetadata(UniformMDKindID))
- return false;
+ if (!Br->getMetadata(UniformMDKindID)) {
+ // Early exit if we cannot have relaxed uniform regions.
+ if (!RelaxedUniformRegions)
+ return false;
+
+ SubRegionsAreUniform = false;
+ break;
+ }
}
}
}
- return true;
+
+ // Our region is uniform if:
+ // 1. All conditional branches that are direct children are uniform (checked
+ // above).
+ // 2. And either:
+ // a. All sub-regions are uniform.
+ // b. There is one or less conditional branches among the direct children.
+ return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
}
/// Run the transformation for each region found
Modified: llvm/trunk/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll?rev=361610&r1=361609&r2=361610&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll (original)
+++ llvm/trunk/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll Fri May 24 01:59:17 2019
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions -structurizecfg-relaxed-uniform-regions < %s | FileCheck %s
define amdgpu_cs void @uniform(i32 inreg %v) {
; CHECK-LABEL: @uniform(
@@ -76,6 +76,112 @@ if:
br label %end
end:
+ ret void
+}
+
+define amdgpu_cs void @uniform_branch_to_nonuniform_subregions(i32 addrspace(4)* %ptr, i32 inreg %data) {
+; CHECK-LABEL: @uniform_branch_to_nonuniform_subregions(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[DATA:%.*]], 42
+; CHECK-NEXT: br i1 [[C]], label [[UNIFORM_FOR_BODY:%.*]], label [[FOR_BODY:%.*]], !structurizecfg.uniform !0
+; CHECK: for.body:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW1:%.*]] ]
+; CHECK-NEXT: [[CC:%.*]] = icmp ult i32 [[I]], 4
+; CHECK-NEXT: br i1 [[CC]], label [[MID_LOOP:%.*]], label [[FLOW1]]
+; CHECK: mid.loop:
+; CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[CC2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: br i1 [[CC2]], label [[END_LOOP:%.*]], label [[FLOW2:%.*]]
+; CHECK: Flow1:
+; CHECK-NEXT: [[TMP0]] = phi i32 [ [[TMP2:%.*]], [[FLOW2]] ], [ undef, [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP3:%.*]], [[FLOW2]] ], [ true, [[FOR_BODY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: end.loop:
+; CHECK-NEXT: [[I_INC:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT: br label [[FLOW2]]
+; CHECK: Flow2:
+; CHECK-NEXT: [[TMP2]] = phi i32 [ [[I_INC]], [[END_LOOP]] ], [ undef, [[MID_LOOP]] ]
+; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[END_LOOP]] ], [ true, [[MID_LOOP]] ]
+; CHECK-NEXT: br label [[FLOW1]]
+; CHECK: for.end:
+; CHECK-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[FLOW:%.*]]
+; CHECK: if:
+; CHECK-NEXT: br label [[FLOW]]
+; CHECK: uniform.for.body:
+; CHECK-NEXT: [[UNIFORM_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP4:%.*]], [[FLOW4:%.*]] ]
+; CHECK-NEXT: [[UNIFORM_CC:%.*]] = icmp ult i32 [[UNIFORM_I]], 4
+; CHECK-NEXT: br i1 [[UNIFORM_CC]], label [[UNIFORM_MID_LOOP:%.*]], label [[FLOW4]]
+; CHECK: uniform.mid.loop:
+; CHECK-NEXT: [[UNIFORM_V:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[UNIFORM_CC2:%.*]] = icmp eq i32 [[UNIFORM_V]], 0
+; CHECK-NEXT: br i1 [[UNIFORM_CC2]], label [[UNIFORM_END_LOOP:%.*]], label [[FLOW5:%.*]]
+; CHECK: Flow4:
+; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP6:%.*]], [[FLOW5]] ], [ undef, [[UNIFORM_FOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW5]] ], [ true, [[UNIFORM_FOR_BODY]] ]
+; CHECK-NEXT: br i1 [[TMP5]], label [[UNIFORM_FOR_END:%.*]], label [[UNIFORM_FOR_BODY]]
+; CHECK: uniform.end.loop:
+; CHECK-NEXT: [[UNIFORM_I_INC:%.*]] = add i32 [[UNIFORM_I]], 1
+; CHECK-NEXT: br label [[FLOW5]]
+; CHECK: Flow5:
+; CHECK-NEXT: [[TMP6]] = phi i32 [ [[UNIFORM_I_INC]], [[UNIFORM_END_LOOP]] ], [ undef, [[UNIFORM_MID_LOOP]] ]
+; CHECK-NEXT: [[TMP7]] = phi i1 [ false, [[UNIFORM_END_LOOP]] ], [ true, [[UNIFORM_MID_LOOP]] ]
+; CHECK-NEXT: br label [[FLOW4]]
+; CHECK: uniform.for.end:
+; CHECK-NEXT: br i1 [[UNIFORM_CC]], label [[UNIFORM_IF:%.*]], label [[FLOW3:%.*]]
+; CHECK: uniform.if:
+; CHECK-NEXT: br label [[FLOW3]]
+; CHECK: Flow:
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: Flow3:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %c = icmp eq i32 %data, 42
+ br i1 %c, label %uniform.for.body, label %for.body
+
+for.body:
+ %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+ %cc = icmp ult i32 %i, 4
+ br i1 %cc, label %mid.loop, label %for.end
+
+mid.loop:
+ %v = call i32 @llvm.amdgcn.workitem.id.x()
+ %cc2 = icmp eq i32 %v, 0
+ br i1 %cc2, label %end.loop, label %for.end
+
+end.loop:
+ %i.inc = add i32 %i, 1
+ br label %for.body
+
+for.end:
+ br i1 %cc, label %if, label %end
+
+if:
+ br label %end
+
+uniform.for.body:
+ %uniform.i = phi i32 [0, %entry], [%uniform.i.inc, %uniform.end.loop]
+ %uniform.cc = icmp ult i32 %uniform.i, 4
+ br i1 %uniform.cc, label %uniform.mid.loop, label %uniform.for.end
+
+uniform.mid.loop:
+ %uniform.v = call i32 @llvm.amdgcn.workitem.id.x()
+ %uniform.cc2 = icmp eq i32 %uniform.v, 0
+ br i1 %uniform.cc2, label %uniform.end.loop, label %uniform.for.end
+
+uniform.end.loop:
+ %uniform.i.inc = add i32 %uniform.i, 1
+ br label %uniform.for.body
+
+uniform.for.end:
+ br i1 %uniform.cc, label %uniform.if, label %end
+
+uniform.if:
+ br label %end
+
+end:
ret void
}
More information about the llvm-commits
mailing list