[llvm] r288744 - AMDGPU: Don't required structured CFG
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 5 17:02:51 PST 2016
Author: arsenm
Date: Mon Dec 5 19:02:51 2016
New Revision: 288744
URL: http://llvm.org/viewvc/llvm-project?rev=288744&view=rev
Log:
AMDGPU: Don't required structured CFG
The structured CFG is just an aid to inserting exec
mask modification instructions, once that is done
we don't really need it anymore. We also
do not analyze blocks with terminators that
modify exec, so this should only be impacting
true branches.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Dec 5 19:02:51 2016
@@ -162,7 +162,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine
FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
TLOF(createTLOF(getTargetTriple())),
IntrinsicInfo() {
- setRequiresStructuredCFG(true);
initAsmInfo();
}
@@ -191,7 +190,9 @@ R600TargetMachine::R600TargetMachine(con
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+ setRequiresStructuredCFG(true);
+}
const R600Subtarget *R600TargetMachine::getSubtargetImpl(
const Function &F) const {
Modified: llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll Mon Dec 5 19:02:51 2016
@@ -8,13 +8,15 @@
; GCNNOOPT: v_writelane_b32
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
+
; GCN: ; BB#1
; GCNNOOPT: v_readlane_b32
; GCNNOOPT: v_readlane_b32
; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
; GCN: s_endpgm
define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) #0 {
%cmp = icmp ne i32 %val, 0
@@ -35,9 +37,10 @@ end:
; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]]
; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
; GCN: s_endpgm
define void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
%cmp0 = icmp ne i1 %val, 0
Modified: llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/br_cc.f16.ll Mon Dec 5 19:02:51 2016
@@ -12,9 +12,10 @@
; GCN: s_cbranch_vccnz
; GCN: one{{$}}
-; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
+; SI: s_branch
+; VI: buffer_store_short
+; VI: s_endpgm
; GCN: two{{$}}
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
@@ -47,17 +48,19 @@ two:
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
-; GCN: s_cbranch_vccnz
+; SI: s_cbranch_vccz
+; VI: s_cbranch_vccnz
-; GCN: one{{$}}
-; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; VI: one{{$}}
+; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
; GCN: two{{$}}
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
-; GCN: buffer_store_short v[[B_F16]]
-; GCN: s_endpgm
+
+; SI: one{{$}}
+; SI: buffer_store_short v[[A_F16]]
+; SI: s_endpgm
+
define void @br_cc_f16_imm_a(
half addrspace(1)* %r,
half addrspace(1)* %b) {
@@ -87,8 +90,6 @@ two:
; GCN: one{{$}}
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
; GCN: two{{$}}
; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll Mon Dec 5 19:02:51 2016
@@ -475,14 +475,13 @@ ret:
; GCN-LABEL: {{^}}long_branch_hang:
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
-; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: s_branch [[SHORTB:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
-; GCN-NEXT: [[LONG_BR_0]]:
; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
; GCN: s_setpc_b64
-; GCN: [[SHORTB]]:
+; GCN-NEXT: [[LONG_BR_0]]:
; GCN-DAG: v_cmp_lt_i32
; GCN-DAG: v_cmp_gt_i32
; GCN: s_cbranch_vccnz
@@ -492,7 +491,6 @@ ret:
; GCN: [[LONG_BR_DEST0]]
; GCN: v_cmp_ne_u32_e32
-; GCN-NEXT: ; implicit-def
; GCN-NEXT: s_cbranch_vccz
; GCN: s_setpc_b64
Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Mon Dec 5 19:02:51 2016
@@ -506,11 +506,13 @@ bb:
bb1:
%tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp3 = extractelement <4 x float> %tmp2, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
br label %bb7
bb4:
%tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp6 = extractelement <4 x float> %tmp5, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
br label %bb7
bb7:
@@ -554,11 +556,13 @@ bb:
bb1: ; preds = %bb
%tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) #0 ; Prevent block optimize out
br label %bb7
bb4: ; preds = %bb
%tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) #0 ; Prevent block optimize out
br label %bb7
bb7: ; preds = %bb4, %bb1
@@ -745,6 +749,8 @@ bb8:
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare void @llvm.amdgcn.s.barrier() #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind convergent }
Modified: llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll Mon Dec 5 19:02:51 2016
@@ -1,8 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; FIXME: Enabling critical edge splitting will fix this.
-; XFAIL: *
-
; Make sure that m0 is not reinitialized in the loop.
; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
@@ -12,7 +9,9 @@
; GCN: s_mov_b32 m0, -1
; GCN: BB0_2:
+; GCN-NOT: m0
; GCN: ds_read_b32
+; GCN-NOT: m0
; GCN: buffer_store_dword
; GCN: s_cbranch_scc0 BB0_2
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-control-flow.ll Mon Dec 5 19:02:51 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
;
;
; Most SALU instructions ignore control flow, so we need to make sure
@@ -9,7 +9,9 @@
; about instructions in different blocks overwriting each other.
; SI-LABEL: {{^}}sgpr_if_else_salu_br:
; SI: s_add
-; SI: s_add
+; SI: s_branch
+
+; SI: s_sub
define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
@@ -17,6 +19,45 @@ entry:
br i1 %0, label %if, label %else
if:
+ %1 = sub i32 %b, %c
+ br label %endif
+
+else:
+ %2 = add i32 %d, %e
+ br label %endif
+
+endif:
+ %3 = phi i32 [%1, %if], [%2, %else]
+ %4 = add i32 %3, %a
+ store i32 %4, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}sgpr_if_else_salu_br_opt:
+; SI: s_cmp_lg_u32
+; SI: s_cbranch_scc0 [[IF:BB[0-9]+_[0-9]+]]
+
+; SI: ; BB#1: ; %else
+; SI: s_load_dword [[LOAD0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xe
+; SI: s_load_dword [[LOAD1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xf
+; SI-NOT: add
+; SI: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
+
+; SI: [[IF]]: ; %if
+; SI: s_load_dword [[LOAD0]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_load_dword [[LOAD1]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-NOT: add
+
+; SI: [[ENDIF]]: ; %endif
+; SI: s_add_i32 s{{[0-9]+}}, [[LOAD0]], [[LOAD1]]
+; SI: buffer_store_dword
+; SI-NEXT: s_endpgm
+define void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+ %0 = icmp eq i32 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
%1 = add i32 %b, %c
br label %endif
@@ -67,7 +108,7 @@ endif:
; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
-; SI: BB2_2:
+; SI: BB{{[0-9]+}}_2:
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Mon Dec 5 19:02:51 2016
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; This test checks that no VGPR to SGPR copies are created by the register
; allocator.
@@ -223,8 +223,15 @@ declare i32 @llvm.SI.packf16(float, floa
; an assertion failure.
; CHECK-LABEL: {{^}}sample_v3:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11
+; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 13
+; CHECK: s_branch
+
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7
+
+; CHECK: BB{{[0-9]+_[0-9]+}}:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
@@ -241,14 +248,14 @@ entry:
br i1 %tmp27, label %if, label %else
if: ; preds = %entry
- %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 11, i32 13>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%val.if.0 = extractelement <4 x float> %val.if, i32 0
%val.if.1 = extractelement <4 x float> %val.if, i32 1
%val.if.2 = extractelement <4 x float> %val.if, i32 2
br label %endif
else: ; preds = %entry
- %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1, i32 0>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 5, i32 7>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%val.else.0 = extractelement <4 x float> %val.else, i32 0
%val.else.1 = extractelement <4 x float> %val.else, i32 1
%val.else.2 = extractelement <4 x float> %val.else, i32 2
@@ -317,9 +324,15 @@ ENDIF69:
; This test checks that image_sample resource descriptors aren't loaded into
; vgprs. The verifier will fail if this happens.
-; CHECK-LABEL:{{^}}sample_rsrc:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK-LABEL:{{^}}sample_rsrc
+
+; CHECK: s_cmp_eq_u32
+; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
+
+; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
+
+; [[END]]:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
Modified: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Mon Dec 5 19:02:51 2016
@@ -308,10 +308,8 @@ end:
; CHECK: s_mov_b64 exec, 0
; CHECK: [[SKIPKILL]]:
-; CHECK: v_cmp_nge_f32
-; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
-
-; CHECK: [[UNREACHABLE]]:
+; CHECK: v_cmp_nge_f32_e32 vcc
+; CHECK-NEXT: BB#3: ; %bb5
; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
bb:
Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll?rev=288744&r1=288743&r2=288744&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll Mon Dec 5 19:02:51 2016
@@ -197,15 +197,15 @@ if.end:
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
-; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
-; GCN: buffer_store_dword [[TWO]]
+; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2
; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
; GCN: [[IF_LABEL]]:
-; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
-; GCN: buffer_store_dword [[ONE]]
+; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1
+
+; GCN-NEXT: [[ENDIF_LABEL]]:
+; GCN: buffer_store_dword [[IMM_REG]]
-; GCN: [[ENDIF_LABEL]]:
; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
; GCN: buffer_store_dword [[THREE]]
; GCN: s_endpgm
More information about the llvm-commits
mailing list