[llvm] r350532 - AMDGPU: test for uniformity of branch instruction, not its condition
Rhys Perry via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 7 07:52:28 PST 2019
Author: pendingchaos
Date: Mon Jan 7 07:52:28 2019
New Revision: 350532
URL: http://llvm.org/viewvc/llvm-project?rev=350532&view=rev
Log:
AMDGPU: test for uniformity of branch instruction, not its condition
Summary:
If a divergent branch instruction is marked as divergent by propagation
rule 2 in DivergencePropagator::exploreSyncDependency() and its condition
is uniform, that branch would incorrectly be assumed to be uniform.
Reviewers: arsenm, tstellar
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D56331
Added:
llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp?rev=350532&r1=350531&r2=350532&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp Mon Jan 7 07:52:28 2019
@@ -117,14 +117,8 @@ bool AMDGPUAnnotateUniformValues::isClob
}
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
- if (I.isUnconditional())
- return;
-
- Value *Cond = I.getCondition();
- if (!DA->isUniform(Cond))
- return;
-
- setUniformMetadata(I.getParent()->getTerminator());
+ if (DA->isUniform(&I))
+ setUniformMetadata(I.getParent()->getTerminator());
}
void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Modified: llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp?rev=350532&r1=350531&r2=350532&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp Mon Jan 7 07:52:28 2019
@@ -155,7 +155,7 @@ bool SIAnnotateControlFlow::doInitializa
/// Is the branch condition uniform or did the StructurizeCFG pass
/// consider it as such?
bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
- return DA->isUniform(T->getCondition()) ||
+ return DA->isUniform(T) ||
T->getMetadata("structurizecfg.uniform") != nullptr;
}
Added: llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll?rev=350532&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll Mon Jan 7 07:52:28 2019
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
+
+; This module creates a divergent branch. The branch is marked as divergent by
+; the divergence analysis but the condition is not. This test ensures that the
+; divergence of the branch is tested, not its condition, so that branch is
+; correctly emitted as divergent.
+
+target triple = "amdgcn-mesa-mesa3d"
+
+define amdgpu_ps void @main(i32, float) {
+; CHECK-LABEL: main:
+; CHECK: ; %bb.0: ; %start
+; CHECK-NEXT: v_readfirstlane_b32 s0, v0
+; CHECK-NEXT: s_mov_b32 m0, s0
+; CHECK-NEXT: s_mov_b64 s[4:5], 0
+; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
+; CHECK-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
+; CHECK-NEXT: BB0_1: ; %loop
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
+; CHECK-NEXT: s_and_b64 vcc, exec, vcc
+; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
+; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT: s_cbranch_vccz BB0_5
+; CHECK-NEXT: ; %bb.2: ; %endif1
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: s_mov_b64 s[6:7], -1
+; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
+; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
+; CHECK-NEXT: ; mask branch BB0_4
+; CHECK-NEXT: BB0_3: ; %endif2
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
+; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
+; CHECK-NEXT: BB0_4: ; %Flow1
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT: s_branch BB0_6
+; CHECK-NEXT: BB0_5: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: ; implicit-def: $vgpr1
+; CHECK-NEXT: BB0_6: ; %Flow
+; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
+; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_cbranch_execnz BB0_1
+; CHECK-NEXT: ; %bb.7: ; %Flow2
+; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; this is the divergent branch with the condition not marked as divergent
+; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
+; CHECK-NEXT: ; mask branch BB0_9
+; CHECK-NEXT: BB0_8: ; %if1
+; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
+; CHECK-NEXT: BB0_9: ; %endloop
+; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
+; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
+; CHECK-NEXT: s_endpgm
+start:
+ %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
+ br label %loop
+
+loop:
+ %v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ]
+ %v2 = icmp ugt i32 %v1, 31
+ br i1 %v2, label %if1, label %endif1
+
+if1:
+ %v3 = call float @llvm.sqrt.f32(float %v0)
+ br label %endloop
+
+endif1:
+ %v4 = fcmp ogt float %v0, 0.000000e+00
+ br i1 %v4, label %endloop, label %endif2
+
+endif2:
+ %v5 = add i32 %v1, 1
+ br label %loop
+
+endloop:
+ %v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ]
+ call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
+ ret void
+}
+
+declare float @llvm.sqrt.f32(float) #1
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
+declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
More information about the llvm-commits
mailing list