[llvm] 008e65a - [AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier.
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 08:01:49 PST 2019
Author: vpykhtin
Date: 2019-11-26T18:59:37+03:00
New Revision: 008e65a7bfb320bf197a04ff6427da84f8d38b76
URL: https://github.com/llvm/llvm-project/commit/008e65a7bfb320bf197a04ff6427da84f8d38b76
DIFF: https://github.com/llvm/llvm-project/commit/008e65a7bfb320bf197a04ff6427da84f8d38b76.diff
LOG: [AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier.
Differential revision: https://reviews.llvm.org/D70405
Added:
Modified:
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
llvm/test/CodeGen/AMDGPU/loop_break.ll
llvm/test/CodeGen/AMDGPU/multilevel-break.ll
llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
llvm/test/CodeGen/AMDGPU/valu-i1.ll
llvm/test/CodeGen/AMDGPU/wave32.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 6f9abd3a8d9b..bf052dc3c930 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -372,12 +372,15 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
// exit" mask.
MachineInstr *And = nullptr, *Or = nullptr;
if (!SkipAnding) {
- And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst)
+ Register AndReg = MRI->createVirtualRegister(BoolRC);
+ And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
.addReg(Exec)
.add(MI.getOperand(1));
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
- .addReg(Dst)
+ .addReg(AndReg)
.add(MI.getOperand(2));
+ if (LIS)
+ LIS->createAndComputeVirtRegInterval(AndReg);
} else
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 0dec67ad340c..895539c00bce 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -16,29 +16,28 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
-; CHECK-NEXT: s_mov_b64 s[4:5], 0
+; CHECK-NEXT: s_mov_b64 s[2:3], 0
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
-; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5
; CHECK-NEXT: s_branch BB0_3
; CHECK-NEXT: BB0_1: ; %Flow1
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
-; CHECK-NEXT: s_mov_b64 s[10:11], 0
+; CHECK-NEXT: s_mov_b64 s[8:9], 0
; CHECK-NEXT: BB0_2: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7]
-; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5]
-; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT: s_and_b64 s[4:5], s[10:11], exec
-; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
-; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9]
-; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
+; CHECK-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3]
+; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
+; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec
+; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
+; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB0_6
; CHECK-NEXT: BB0_3: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
; CHECK-NEXT: s_cmp_lt_u32 s0, 32
-; CHECK-NEXT: s_mov_b64 s[10:11], -1
+; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: s_cbranch_scc0 BB0_2
; CHECK-NEXT: ; %bb.4: ; %endif1
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
@@ -53,9 +52,9 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
; CHECK-NEXT: s_branch BB0_1
; CHECK-NEXT: BB0_6: ; %Flow2
-; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
+; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
; CHECK-NEXT: ; mask branch BB0_8
; CHECK-NEXT: BB0_7: ; %if1
; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
@@ -63,6 +62,7 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
; CHECK-NEXT: s_endpgm
+
; this is the divergent branch with the condition not marked as divergent
start:
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
index fff1c22918ec..51d1c091ab91 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -3,11 +3,10 @@
; SI-LABEL: {{^}}i1_copy_from_loop:
;
-; SI: [[LOOP:BB0_[0-9]+]]: ; %Flow1
-; SI: s_or_b64 exec, exec, [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]]
; SI: ; %Flow
+; SI: s_or_b64 [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]]
; SI: s_and_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_MASK:s\[[0-9]+:[0-9]+\]]], exec
-; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], s[6:7], [[ACCUM_MASK]]
+; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[ACCUM_MASK]]
; SI: s_cbranch_execz [[FOR_END_LABEL:BB0_[0-9]+]]
; SI: ; %for.body
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index 46c4b1e6b3a1..684b183de690 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -40,10 +40,9 @@
; GCN: [[FLOW]]: ; %Flow
; GCN: ; in Loop: Header=BB0_1 Depth=1
-; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]]
-; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]]
-; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]]
-; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]]
+; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
+; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]]
+; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]]
; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
; GCN: ; %bb.4: ; %bb9
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 08d8ec0fba4d..5222ae56db87 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -25,22 +25,20 @@
; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2
-; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]]
+; GCN: s_or_b64 exec, exec, [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]]
; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]]
-; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
-; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]]
-; GCN: s_andn2_b64 exec, exec, [[TMP1]]
+; GCN: s_or_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[LEFT_OUTER]]
+; GCN: s_andn2_b64 exec, exec, [[LEFT_OUTER]]
; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]]
; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}}
-; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
+; GCN: s_mov_b64 [[LEFT_INNER]], 0{{$}}
; GCN: ; %Flow
; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]]
-; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
-; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]]
-; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]]
-; GCN: s_andn2_b64 exec, exec, [[TMP0]]
+; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
+; GCN: s_or_b64 [[LEFT_INNER]], [[TMP0]], [[LEFT_INNER]]
+; GCN: s_andn2_b64 exec, exec, [[LEFT_INNER]]
; GCN: s_cbranch_execz [[FLOW2]]
; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}}
@@ -82,17 +80,17 @@ ENDIF: ; preds = %LOOP
; OPT: llvm.amdgcn.end.cf
; GCN-LABEL: {{^}}multi_if_break_loop:
-; GCN: s_mov_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
+; GCN: s_mov_b64 [[SAVED_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %Flow4
-; GCN: s_and_b64 [[BROKEN_THREADS_MASK]], exec, [[BROKEN_THREADS_MASK]]
-; GCN: s_or_b64 [[BROKEN_THREADS_MASK]], [[BROKEN_THREADS_MASK]], [[SAVED:s\[[0-9]+:[0-9]+\]]]
-; GCN: s_andn2_b64 exec, exec, [[BROKEN_THREADS_MASK]]
+; GCN: s_and_b64 [[ANDTMP0:s\[[0-9]+:[0-9]+\]]], exec, {{s\[[0-9]+:[0-9]+\]}}
+; GCN: s_or_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], [[ANDTMP0]], [[SAVED_MASK]]
+; GCN: s_and_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, exec
+; GCN: s_andn2_b64 exec, exec, [[MASK1]]
; GCN-NEXT: s_cbranch_execz [[LOOP_EXIT:BB[0-9]+_[0-9]+]]
; GCN: ; %bb1{{$}}
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]],
-; GCN: s_mov_b64 [[SAVED]], [[BROKEN_THREADS_MASK]]
; GCN: ; %LeafBlock1
; GCN: v_cmp_eq_u32_e32 vcc, 1, [[LOAD0]]
@@ -122,7 +120,7 @@ ENDIF: ; preds = %LOOP
; GCN: s_branch [[LOOP]]
; GCN: [[LOOP_EXIT]]: ; %Flow6
-; GCN: s_or_b64 exec, exec, [[BROKEN_THREADS_MASK]]
+; GCN: s_or_b64 exec, exec, [[SAVED_MASK]]
define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
index 14d78fbef29e..23bb18e738f5 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
@@ -37,9 +37,8 @@ ENDIF:
; SI: ; %endif
; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop
-; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]]
-; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]]
+; SI: s_or_b64 [[LEFT]], [[TMP1]], [[LEFT]]
; SI: s_andn2_b64 exec, exec, [[LEFT]]
; SI: s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index ef17825024ed..ea74268dbe7c 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -223,9 +223,8 @@ exit:
; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]]
; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]]
; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]],
-; SI-NEXT: s_or_b64 [[TMP2:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[COND_STATE]]
-; SI-NEXT: s_mov_b64 [[COND_STATE]], [[TMP2]]
-; SI-NEXT: s_andn2_b64 exec, exec, [[TMP2]]
+; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]]
+; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
; SI: [[LABEL_EXIT]]:
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 91a993181979..92808fec360f 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -243,14 +243,12 @@ bb13:
; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], [[MASK0]]
; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], [[MASK0]]
; GCN: BB{{.*}}: ; %Flow
-; GFX1032: s_and_b32 [[MASK0:s[0-9]+]], exec_lo, [[MASK1]]
-; GFX1064: s_and_b64 [[MASK0:s\[[0-9:]+\]]], exec, [[MASK1]]
-; GFX1032: s_or_b32 [[MASK0]], [[MASK0]], [[ACC:s[0-9]+]]
-; GFX1064: s_or_b64 [[MASK0]], [[MASK0]], [[ACC:s\[[0-9:]+\]]]
-; GFX1032: s_mov_b32 [[ACC]], [[MASK0]]
-; GFX1064: s_mov_b64 [[ACC]], [[MASK0]]
-; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[MASK0]]
-; GFX1064: s_andn2_b64 exec, exec, [[MASK0]]
+; GFX1032: s_and_b32 [[TMP0:s[0-9]+]], exec_lo, [[MASK1]]
+; GFX1064: s_and_b64 [[TMP0:s\[[0-9:]+\]]], exec, [[MASK1]]
+; GFX1032: s_or_b32 [[ACC:s[0-9]+]], [[TMP0]], [[ACC]]
+; GFX1064: s_or_b64 [[ACC:s\[[0-9:]+\]]], [[TMP0]], [[ACC]]
+; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[ACC]]
+; GFX1064: s_andn2_b64 exec, exec, [[ACC]]
; GCN: s_cbranch_execz
; GCN: BB{{.*}}:
; GCN: s_load_dword [[LOAD:s[0-9]+]]
More information about the llvm-commits
mailing list