[llvm] r309185 - [AMDGPU] Optimize SI_IF lowering for simple if regions
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 26 14:29:15 PDT 2017
Author: rampitec
Date: Wed Jul 26 14:29:15 2017
New Revision: 309185
URL: http://llvm.org/viewvc/llvm-project?rev=309185&view=rev
Log:
[AMDGPU] Optimize SI_IF lowering for simple if regions
Currently SI_IF results in a s_and_saveexec_b64 followed by s_xor_b64.
The xor is used to extract only the changed bits. In case of a simple
if region where the only use of that value is in the SI_END_CF to
restore the old exec mask, we can omit the xor and perform an or of
the exec mask with the original exec value saved by the
s_and_saveexec_b64.
Differential Revision: https://reviews.llvm.org/D35861
Modified:
llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll
llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll
llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Wed Jul 26 14:29:15 2017
@@ -149,9 +149,19 @@ void SILowerControlFlow::emitIf(MachineI
MachineOperand &ImpDefSCC = MI.getOperand(4);
assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
+ // If there is only one use of save exec register and that use is SI_END_CF,
+ // we can optimize SI_IF by returning the full saved exec mask instead of
+ // just cleared bits.
+ bool SimpleIf = false;
+ auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
+ SimpleIf = U != MRI->use_instr_nodbg_end() &&
+ std::next(U) == MRI->use_instr_nodbg_end() &&
+ U->getOpcode() == AMDGPU::SI_END_CF;
+
// Add an implicit def of exec to discourage scheduling VALU after this which
// will interfere with trying to form s_and_saveexec_b64 later.
- unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned CopyReg = SimpleIf ? SaveExecReg
+ : MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
MachineInstr *CopyExec =
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
.addReg(AMDGPU::EXEC)
@@ -166,11 +176,14 @@ void SILowerControlFlow::emitIf(MachineI
.addReg(Cond.getReg());
setImpSCCDefDead(*And, true);
- MachineInstr *Xor =
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
- .addReg(Tmp)
- .addReg(CopyReg);
- setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
+ MachineInstr *Xor = nullptr;
+ if (!SimpleIf) {
+ Xor =
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
+ .addReg(Tmp)
+ .addReg(CopyReg);
+ setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
+ }
// Use a copy that is a terminator to get correct spill code placement it with
// fast regalloc.
@@ -194,7 +207,8 @@ void SILowerControlFlow::emitIf(MachineI
// register.
LIS->ReplaceMachineInstrInMaps(MI, *And);
- LIS->InsertMachineInstrInMaps(*Xor);
+ if (!SimpleIf)
+ LIS->InsertMachineInstrInMaps(*Xor);
LIS->InsertMachineInstrInMaps(*SetExec);
LIS->InsertMachineInstrInMaps(*NewBr);
@@ -207,7 +221,8 @@ void SILowerControlFlow::emitIf(MachineI
LIS->removeInterval(SaveExecReg);
LIS->createAndComputeVirtRegInterval(SaveExecReg);
LIS->createAndComputeVirtRegInterval(Tmp);
- LIS->createAndComputeVirtRegInterval(CopyReg);
+ if (!SimpleIf)
+ LIS->createAndComputeVirtRegInterval(CopyReg);
}
void SILowerControlFlow::emitElse(MachineInstr &MI) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll Wed Jul 26 14:29:15 2017
@@ -14,7 +14,6 @@
; GCN-DAG: v_cmp_lt_f32_e32 vcc,
; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]]
; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; GCN: s_xor_b64 {{s\[[0-9]+:[0-9]+\]}}, exec, [[SAVED]]
; GCN: ; mask branch [[BB5:BB[0-9]+_[0-9]+]]
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %bb4
Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-relaxation.ll Wed Jul 26 14:29:15 2017
@@ -141,7 +141,6 @@ bb3:
; GCN: buffer_load_dword
; GCN: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; GCN: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN: s_xor_b64 [[SAVE]], exec, [[SAVE]]
; GCN: v_nop_e64
; GCN: v_nop_e64
@@ -385,7 +384,6 @@ bb3:
; GCN-LABEL: {{^}}uniform_inside_divergent:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]]
@@ -436,7 +434,6 @@ endif:
; GCN-LABEL: {{^}}analyze_mask_branch:
; GCN: v_cmp_lt_f32_e32 vcc
; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: s_xor_b64 [[MASK]], exec, [[MASK]]
; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execz [[BRANCH_SKIP:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_branch [[LOOP_BODY:BB[0-9]+_[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll Wed Jul 26 14:29:15 2017
@@ -20,7 +20,6 @@
; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
-; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
; Spill saved exec
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
@@ -101,7 +100,6 @@ endif:
; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
-; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
; Spill load
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
Modified: llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/i1-copy-phi.ll Wed Jul 26 14:29:15 2017
@@ -4,11 +4,9 @@
; SI-LABEL: {{^}}br_i1_phi:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; SI: s_and_saveexec_b64
-; SI: s_xor_b64
; SI: v_mov_b32_e32 [[REG]], -1{{$}}
; SI: v_cmp_ne_u32_e32 vcc, 0, [[REG]]
; SI: s_and_saveexec_b64
-; SI: s_xor_b64
; SI: s_endpgm
define amdgpu_kernel void @br_i1_phi(i32 %arg) {
bb:
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll Wed Jul 26 14:29:15 2017
@@ -137,7 +137,6 @@ define amdgpu_kernel void @test_div_fmas
; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
-; SI: s_xor_b64 [[SAVE]], exec, [[SAVE]]
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
; SI: v_cmp_ne_u32_e32 vcc, 0, [[LOAD]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll Wed Jul 26 14:29:15 2017
@@ -81,7 +81,6 @@
; GCN-NEXT: s_or_b64 exec, exec
; GCN: v_cmp_ne_u32_e32 vcc, 0
; GCN-NEXT: s_and_saveexec_b64
-; GCN-NEXT: s_xor_b64
; GCN: ; %exit0
; GCN: buffer_store_dword
Modified: llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ret_jump.ll Wed Jul 26 14:29:15 2017
@@ -11,7 +11,6 @@
; GCN-NEXT: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: s_xor_b64 [[XOR_EXEC:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_EXEC]]
; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
; GCN: BB{{[0-9]+_[0-9]+}}: ; %unreachable.bb
@@ -60,7 +59,6 @@ ret.bb:
; GCN: ; BB#{{[0-9]+}}: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: s_xor_b64 [[XOR_EXEC:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_EXEC]]
; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9]+_[0-9]+]]
; GCN-NEXT: ; %unreachable.bb
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll Wed Jul 26 14:29:15 2017
@@ -3,7 +3,6 @@
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator:
; GCN: v_cmp_eq_u32
; GCN: s_and_saveexec_b64
-; GCN: s_xor_b64
; GCN: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %unreachable
@@ -31,7 +30,6 @@ ret:
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order:
; GCN: v_cmp_ne_u32
; GCN: s_and_saveexec_b64
-; GCN: s_xor_b64
; GCN: ; mask branch [[RETURN:BB[0-9]+_[0-9]+]]
; GCN-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %unreachable
Modified: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Wed Jul 26 14:29:15 2017
@@ -202,7 +202,6 @@ exit:
; CHECK-LABEL: {{^}}test_kill_divergent_loop:
; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
; CHECK-NEXT: s_cbranch_execz [[EXIT]]
@@ -337,7 +336,6 @@ bb7:
; CHECK-LABEL: {{^}}if_after_kill_block:
; CHECK: ; BB#0:
; CHECK: s_and_saveexec_b64
-; CHECK: s_xor_b64
; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
; CHECK: v_cmpx_le_f32_e32 vcc, 0,
@@ -347,7 +345,6 @@ bb7:
; CHECK: v_cmp_neq_f32_e32 vcc, 0,
; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
-; CHECK: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, exec
; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
; CHECK-NOT: branch
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll Wed Jul 26 14:29:15 2017
@@ -10,7 +10,6 @@ target triple="amdgcn--"
; CHECK: v_mbcnt_lo_u32_b32_e64
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; BB0_1:
; CHECK: s_load_dword s0, s[0:1], 0xa
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll Wed Jul 26 14:29:15 2017
@@ -303,7 +303,6 @@ done:
; GCN-LABEL: {{^}}uniform_inside_divergent:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0
; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]]
; GCN: s_endpgm
@@ -335,7 +334,6 @@ endif:
; GCN: [[IF_LABEL]]:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN: buffer_store_dword [[ONE]]
define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
@@ -360,7 +358,6 @@ endif:
; GCN-LABEL: {{^}}divergent_if_uniform_if:
; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN: buffer_store_dword [[ONE]]
; GCN: s_or_b64 exec, exec, [[MASK]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll Wed Jul 26 14:29:15 2017
@@ -5,7 +5,6 @@
; CHECK-LABEL: {{^}}test1:
; CHECK: v_cmp_ne_u32_e32 vcc, 0
; CHECK: s_and_saveexec_b64
-; CHECK-NEXT: s_xor_b64
; CHECK-NEXT: ; mask branch
; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
@@ -35,7 +34,6 @@ out:
; CHECK-LABEL: {{^}}test2:
; CHECK: s_and_saveexec_b64
-; CHECK-NEXT: s_xor_b64
; CHECK-NEXT: ; mask branch
; CHECK-NEXT: s_cbranch_execz
define amdgpu_kernel void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll?rev=309185&r1=309184&r2=309185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll Wed Jul 26 14:29:15 2017
@@ -18,7 +18,6 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
; SI: v_mov_b32_e32 v{{[0-9]}}, -1
; SI: s_and_saveexec_b64
-; SI-NEXT: s_xor_b64
; SI-NEXT: ; mask branch
; v_mov should be after exec modification
@@ -66,8 +65,7 @@ end:
; SI-LABEL: {{^}}simple_test_v_if:
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
-; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
-; SI: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
@@ -94,8 +92,7 @@ exit:
; SI-LABEL: {{^}}simple_test_v_if_ret_else_ret:
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
-; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
-; SI: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
@@ -160,8 +157,8 @@ exit:
; SI-LABEL: {{^}}simple_test_v_loop:
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
-; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
-; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: ; mask branch
+; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
@@ -202,8 +199,8 @@ exit:
; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
; SI: v_cmp_lt_i32_e32 vcc
; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
-; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
-; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: ; mask branch
+; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
; Initialize inner condition to false
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb10.preheader
More information about the llvm-commits
mailing list