[llvm] 0c8a218 - [AMDGPU] improve fragile test for divergent branches

Thu Feb 27 10:01:46 PST 2020

Author: Sameer Sahasrabuddhe
Date: 2020-02-27T23:31:03+05:30
New Revision: 0c8a21879872c8ca8aa5966263f9d67207b3108e

URL: https://github.com/llvm/llvm-project/commit/0c8a21879872c8ca8aa5966263f9d67207b3108e
DIFF: https://github.com/llvm/llvm-project/commit/0c8a21879872c8ca8aa5966263f9d67207b3108e.diff

LOG: [AMDGPU] improve fragile test for divergent branches

Summary:
The affected LIT test intends to test the correct use of divergence
analysis to detect a divergent branch with a uniform predicate. The
passes involved are LLVM IR passes, but the test runs llc and tries to
match against generated ISA, which makes it hard to demonstrate that
the intended behavior was really tested. Replaced this with a test
that invokes opt on the required passes and then checks for the
appropriate changes in the LLVM IR.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D75267

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 55841aa66f35..a9a91803b08e 100644

--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -1,97 +1,78 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
+; RUN: opt --amdgpu-annotate-uniform -S %s |  FileCheck %s -check-prefix=UNIFORM
+; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s |  FileCheck %s -check-prefix=CONTROLFLOW
 
-; This module creates a divergent branch. The branch is marked as divergent by
-; the divergence analysis but the condition is not. This test ensures that the
-; divergence of the branch is tested, not its condition, so that branch is
-; correctly emitted as divergent.
+; This module creates a divergent branch in block Flow2. The branch is
+; marked as divergent by the divergence analysis but the condition is
+; not. This test ensures that the divergence of the branch is tested,
+; not its condition, so that branch is correctly emitted as divergent.
 
 target triple = "amdgcn-mesa-mesa3d"
 
-define amdgpu_ps void @main(i32, float) {
-; CHECK-LABEL: main:
-; CHECK:       ; %bb.0: ; %start
-; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
-; CHECK-NEXT:    s_mov_b32 m0, s0
-; CHECK-NEXT:    s_mov_b32 s0, 0
-; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
-; CHECK-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; CHECK-NEXT:    s_mov_b64 s[2:3], 0
-; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
-; CHECK-NEXT:    ; implicit-def: $sgpr4_sgpr5
-; CHECK-NEXT:    s_branch BB0_3
-; CHECK-NEXT:  BB0_1: ; %Flow1
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[8:9], 0
-; CHECK-NEXT:  BB0_2: ; %Flow
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_and_b64 s[10:11], exec, s[6:7]
-; CHECK-NEXT:    s_or_b64 s[2:3], s[10:11], s[2:3]
-; CHECK-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
-; CHECK-NEXT:    s_and_b64 s[8:9], s[8:9], exec
-; CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; CHECK-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; CHECK-NEXT:    s_cbranch_execz BB0_6
-; CHECK-NEXT:  BB0_3: ; %loop
-; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
-; CHECK-NEXT:    s_cmp_lt_u32 s0, 32
-; CHECK-NEXT:    s_mov_b64 s[8:9], -1
-; CHECK-NEXT:    s_cbranch_scc0 BB0_2
-; CHECK-NEXT:  ; %bb.4: ; %endif1
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_mov_b64 s[6:7], -1
-; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_execz BB0_1
-; CHECK-NEXT:  ; %bb.5: ; %endif2
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_add_i32 s0, s0, 1
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
-; CHECK-NEXT:    s_branch BB0_1
-; CHECK-NEXT:  BB0_6: ; %Flow2
-; CHECK-NEXT:    s_or_b64 exec, exec, s[2:3]
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[4:5]
-; CHECK-NEXT:  ; %bb.7: ; %if1
-; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT:  ; %bb.8: ; %endloop
-; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
-; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
-; CHECK-NEXT:    s_endpgm
-
-; this is the divergent branch with the condition not marked as divergent
+define amdgpu_ps void @main(i32 %0, float %1) {
 start:
   %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
   br label %loop
 
-loop:
-  %v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ]
+loop:                                             ; preds = %Flow, %start
+  %v1 = phi i32 [ 0, %start ], [ %6, %Flow ]
   %v2 = icmp ugt i32 %v1, 31
-  br i1 %v2, label %if1, label %endif1
+  %2 = xor i1 %v2, true
+  br i1 %2, label %endif1, label %Flow
 
-if1:
+Flow1:                                            ; preds = %endif2, %endif1
+  %3 = phi i32 [ %v5, %endif2 ], [ undef, %endif1 ]
+  %4 = phi i1 [ false, %endif2 ], [ true, %endif1 ]
+  br label %Flow
+
+; UNIFORM-LABEL: Flow2:
+; UNIFORM-NEXT: br i1 %8, label %if1, label %endloop
+; UNIFORM-NOT: !amdgpu.uniform
+; UNIFORM: if1:
+
+; CONTROLFLOW-LABEL: Flow2:
+; CONTROLFLOW-NEXT:  call void @llvm.amdgcn.end.cf.i64(i64 %{{.*}})
+; CONTROLFLOW-NEXT:  [[IF:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %{{.*}})
+; CONTROLFLOW-NEXT:  [[COND:%.*]] = extractvalue { i1, i64 } [[IF]], 0
+; CONTROLFLOW-NEXT:  %{{.*}} = extractvalue { i1, i64 } [[IF]], 1
+; CONTROLFLOW-NEXT:  br i1 [[COND]], label %if1, label %endloop
+
+Flow2:                                            ; preds = %Flow
+  br i1 %8, label %if1, label %endloop
+
+if1:                                              ; preds = %Flow2
   %v3 = call float @llvm.sqrt.f32(float %v0)
   br label %endloop
 
-endif1:
+endif1:                                           ; preds = %loop
   %v4 = fcmp ogt float %v0, 0.000000e+00
-  br i1 %v4, label %endloop, label %endif2
+  %5 = xor i1 %v4, true
+  br i1 %5, label %endif2, label %Flow1
 
-endif2:
+Flow:                                             ; preds = %Flow1, %loop
+  %6 = phi i32 [ %3, %Flow1 ], [ undef, %loop ]
+  %7 = phi i1 [ %4, %Flow1 ], [ true, %loop ]
+  %8 = phi i1 [ false, %Flow1 ], [ true, %loop ]
+  br i1 %7, label %Flow2, label %loop
+
+endif2:                                           ; preds = %endif1
   %v5 = add i32 %v1, 1
-  br label %loop
+  br label %Flow1
 
-endloop:
-  %v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ]
-  call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
+endloop:                                          ; preds = %if1, %Flow2
+  %v6 = phi float [ 0.000000e+00, %Flow2 ], [ %v3, %if1 ]
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
   ret void
 }
 
-declare float @llvm.sqrt.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0
+; Function Attrs: nounwind readnone speculatable willreturn
+declare float @llvm.sqrt.f32(float) #0
+
+; Function Attrs: nounwind readnone speculatable
+declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1
+
+; Function Attrs: inaccessiblememonly nounwind writeonly
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone speculatable willreturn }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { inaccessiblememonly nounwind writeonly }