[llvm] r288744 - AMDGPU: Don't required structured CFG

Mon Dec 5 17:02:51 PST 2016

Author: arsenm
Date: Mon Dec  5 19:02:51 2016
New Revision: 288744

URL: http://llvm.org/viewvc/llvm-project?rev=288744&view=rev
Log:
AMDGPU: Don't required structured CFG

The structured CFG is just an aid to inserting exec
mask modification instructions, once that is done
we don't really need it anymore. We also
do not analyze blocks with terminators that
modify exec, so this should only be impacting
true branches.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
    llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
    llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
    llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
    llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
    llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll
    llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
    llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
    llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Dec  5 19:02:51 2016
@@ -162,7 +162,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine
                       FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
     TLOF(createTLOF(getTargetTriple())),
     IntrinsicInfo() {
-  setRequiresStructuredCFG(true);
   initAsmInfo();
 }
 
@@ -191,7 +190,9 @@ R600TargetMachine::R600TargetMachine(con
                                      TargetOptions Options,
                                      Optional<Reloc::Model> RM,
                                      CodeModel::Model CM, CodeGenOpt::Level OL)
-  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+  setRequiresStructuredCFG(true);
+}
 
 const R600Subtarget *R600TargetMachine::getSubtargetImpl(
   const Function &F) const {

Modified: llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll Mon Dec  5 19:02:51 2016
@@ -8,13 +8,15 @@
 ; GCNNOOPT: v_writelane_b32
 ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
 
+
 ; GCN: ; BB#1
 ; GCNNOOPT: v_readlane_b32
 ; GCNNOOPT: v_readlane_b32
 ; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
 
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
 ; GCN: s_endpgm
 define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) #0 {
   %cmp = icmp ne i32 %val, 0
@@ -35,9 +37,10 @@ end:
 ; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]]
 
 ; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
 
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
 ; GCN: s_endpgm
 define void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
   %cmp0 = icmp ne i1 %val, 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll Mon Dec  5 19:02:51 2016
@@ -12,9 +12,10 @@
 ; GCN: s_cbranch_vccnz
 
 ; GCN: one{{$}}
-; SI:  v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
+; SI: s_branch
+; VI: buffer_store_short
+; VI: s_endpgm
 
 ; GCN: two{{$}}
 ; SI:  v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
@@ -47,17 +48,19 @@ two:
 ; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
 ; SI:  v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
 ; VI:  v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
-; GCN: s_cbranch_vccnz
+; SI: s_cbranch_vccz
+; VI: s_cbranch_vccnz
 
-; GCN: one{{$}}
-; VI:  v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; VI: one{{$}}
+; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
 
 ; GCN: two{{$}}
 ; SI:  v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
-; GCN: buffer_store_short v[[B_F16]]
-; GCN: s_endpgm
+
+; SI: one{{$}}
+; SI: buffer_store_short v[[A_F16]]
+; SI: s_endpgm
+
 define void @br_cc_f16_imm_a(
     half addrspace(1)* %r,
     half addrspace(1)* %b) {
@@ -87,8 +90,6 @@ two:
 
 ; GCN: one{{$}}
 ; SI:  v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
 
 ; GCN: two{{$}}
 ; VI:  v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}

Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll Mon Dec  5 19:02:51 2016
@@ -475,14 +475,13 @@ ret:
 
 ; GCN-LABEL: {{^}}long_branch_hang:
 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
-; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: s_branch  [[SHORTB:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
 
-; GCN-NEXT: [[LONG_BR_0]]:
 ; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
 ; GCN: s_setpc_b64
 
-; GCN: [[SHORTB]]:
+; GCN-NEXT: [[LONG_BR_0]]:
 ; GCN-DAG: v_cmp_lt_i32
 ; GCN-DAG: v_cmp_gt_i32
 ; GCN: s_cbranch_vccnz
@@ -492,7 +491,6 @@ ret:
 
 ; GCN: [[LONG_BR_DEST0]]
 ; GCN: v_cmp_ne_u32_e32
-; GCN-NEXT: ; implicit-def
 ; GCN-NEXT: s_cbranch_vccz
 ; GCN: s_setpc_b64
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Mon Dec  5 19:02:51 2016
@@ -506,11 +506,13 @@ bb:
 bb1:
   %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
   %tmp3 = extractelement <4 x float> %tmp2, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
   br label %bb7
 
 bb4:
   %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
   %tmp6 = extractelement <4 x float> %tmp5, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
   br label %bb7
 
 bb7:
@@ -554,11 +556,13 @@ bb:
 bb1:                                              ; preds = %bb
   %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
   %tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) #0 ; Prevent block optimize out
   br label %bb7
 
 bb4:                                              ; preds = %bb
   %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
   %tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) #0 ; Prevent block optimize out
   br label %bb7
 
 bb7:                                              ; preds = %bb4, %bb1
@@ -745,6 +749,8 @@ bb8:
 }
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare void @llvm.amdgcn.s.barrier() #2
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind convergent }

Modified: llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll Mon Dec  5 19:02:51 2016
@@ -1,8 +1,5 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-; FIXME: Enabling critical edge splitting will fix this.
-; XFAIL: *
-
 ; Make sure that m0 is not reinitialized in the loop.
 
 ; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
@@ -12,7 +9,9 @@
 ; GCN: s_mov_b32 m0, -1
 
 ; GCN: BB0_2:
+; GCN-NOT: m0
 ; GCN: ds_read_b32
+; GCN-NOT: m0
 ; GCN: buffer_store_dword
 
 ; GCN: s_cbranch_scc0 BB0_2

Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll Mon Dec  5 19:02:51 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ;
 ;
 ; Most SALU instructions ignore control flow, so we need to make sure
@@ -9,7 +9,9 @@
 ; about instructions in different blocks overwriting each other.
 ; SI-LABEL: {{^}}sgpr_if_else_salu_br:
 ; SI: s_add
-; SI: s_add
+; SI: s_branch
+
+; SI: s_sub
 
 define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 entry:
@@ -17,6 +19,45 @@ entry:
   br i1 %0, label %if, label %else
 
 if:
+  %1 = sub i32 %b, %c
+  br label %endif
+
+else:
+  %2 = add i32 %d, %e
+  br label %endif
+
+endif:
+  %3 = phi i32 [%1, %if], [%2, %else]
+  %4 = add i32 %3, %a
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}sgpr_if_else_salu_br_opt:
+; SI: s_cmp_lg_u32
+; SI: s_cbranch_scc0 [[IF:BB[0-9]+_[0-9]+]]
+
+; SI: ; BB#1: ; %else
+; SI: s_load_dword [[LOAD0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xe
+; SI: s_load_dword [[LOAD1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xf
+; SI-NOT: add
+; SI: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
+
+; SI: [[IF]]: ; %if
+; SI: s_load_dword [[LOAD0]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_load_dword [[LOAD1]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-NOT: add
+
+; SI: [[ENDIF]]: ; %endif
+; SI: s_add_i32 s{{[0-9]+}}, [[LOAD0]], [[LOAD1]]
+; SI: buffer_store_dword
+; SI-NEXT: s_endpgm
+define void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+  %0 = icmp eq i32 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
   %1 = add i32 %b, %c
   br label %endif
 
@@ -67,7 +108,7 @@ endif:
 ; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
 ; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
 
-; SI: BB2_2:
+; SI: BB{{[0-9]+}}_2:
 ; SI: buffer_load_dword [[AVAL:v[0-9]+]]
 ; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
 ; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]

Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Mon Dec  5 19:02:51 2016
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
 
 ; This test checks that no VGPR to SGPR copies are created by the register
 ; allocator.
@@ -223,8 +223,15 @@ declare i32 @llvm.SI.packf16(float, floa
 ; an assertion failure.
 
 ; CHECK-LABEL: {{^}}sample_v3:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11
+; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 13
+; CHECK: s_branch
+
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7
+
+; CHECK: BB{{[0-9]+_[0-9]+}}:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
 ; CHECK: exp
 ; CHECK: s_endpgm
 define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
@@ -241,14 +248,14 @@ entry:
   br i1 %tmp27, label %if, label %else
 
 if:                                               ; preds = %entry
-  %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 11, i32 13>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %val.if.0 = extractelement <4 x float> %val.if, i32 0
   %val.if.1 = extractelement <4 x float> %val.if, i32 1
   %val.if.2 = extractelement <4 x float> %val.if, i32 2
   br label %endif
 
 else:                                             ; preds = %entry
-  %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1, i32 0>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 5, i32 7>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
   %val.else.0 = extractelement <4 x float> %val.else, i32 0
   %val.else.1 = extractelement <4 x float> %val.else, i32 1
   %val.else.2 = extractelement <4 x float> %val.else, i32 2
@@ -317,9 +324,15 @@ ENDIF69:
 
 ; This test checks that image_sample resource descriptors aren't loaded into
 ; vgprs.  The verifier will fail if this happens.
-; CHECK-LABEL:{{^}}sample_rsrc:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK-LABEL:{{^}}sample_rsrc
+
+; CHECK: s_cmp_eq_u32
+; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
+
+; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
+
+; [[END]]:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
 ; CHECK: s_endpgm
 define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
 bb:

Modified: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Mon Dec  5 19:02:51 2016
@@ -308,10 +308,8 @@ end:
 ; CHECK: s_mov_b64 exec, 0
 
 ; CHECK: [[SKIPKILL]]:
-; CHECK: v_cmp_nge_f32
-; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
-
-; CHECK: [[UNREACHABLE]]:
+; CHECK: v_cmp_nge_f32_e32 vcc
+; CHECK-NEXT: BB#3: ; %bb5
 ; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
 define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
 bb:

Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll Mon Dec  5 19:02:51 2016
@@ -197,15 +197,15 @@ if.end:
 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
 ; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
 
-; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
-; GCN: buffer_store_dword [[TWO]]
+; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2
 ; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
 
 ; GCN: [[IF_LABEL]]:
-; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
-; GCN: buffer_store_dword [[ONE]]
+; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1
+
+; GCN-NEXT: [[ENDIF_LABEL]]:
+; GCN: buffer_store_dword [[IMM_REG]]
 
-; GCN: [[ENDIF_LABEL]]:
 ; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
 ; GCN: buffer_store_dword [[THREE]]
 ; GCN: s_endpgm