[llvm] 56a5d78 - [AMDGPU] Disable optimizeEndCf at -O0

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 17 23:50:40 PST 2022


Author: Christudasan Devadasan
Date: 2022-01-18T02:48:52-05:00
New Revision: 56a5d78893e62f348e329ea513f1fab28e869a10

URL: https://github.com/llvm/llvm-project/commit/56a5d78893e62f348e329ea513f1fab28e869a10
DIFF: https://github.com/llvm/llvm-project/commit/56a5d78893e62f348e329ea513f1fab28e869a10.diff

LOG: [AMDGPU] Disable optimizeEndCf at -O0

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D116819

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
    llvm/test/CodeGen/AMDGPU/collapse-endcf.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 6ec37b32d0a68..e1018bdfde469 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -56,6 +56,7 @@
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -90,6 +91,8 @@ class SILowerControlFlow : public MachineFunctionPass {
   unsigned OrSaveExecOpc;
   unsigned Exec;
 
+  bool EnableOptimizeEndCf = false;
+
   bool hasKill(const MachineBasicBlock *Begin, const MachineBasicBlock *End);
 
   void emitIf(MachineInstr &MI);
@@ -579,7 +582,7 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
 void SILowerControlFlow::optimizeEndCf() {
   // If the only instruction immediately following this END_CF is an another
   // END_CF in the only successor we can avoid emitting exec mask restore here.
-  if (!RemoveRedundantEndcf)
+  if (!EnableOptimizeEndCf)
     return;
 
   for (MachineInstr *MI : reverse(LoweredEndCf)) {
@@ -807,6 +810,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TII = ST.getInstrInfo();
   TRI = &TII->getRegisterInfo();
+  EnableOptimizeEndCf =
+      RemoveRedundantEndcf && MF.getTarget().getOptLevel() > CodeGenOpt::None;
 
   // This doesn't actually need LiveIntervals, but we can preserve them.
   LIS = getAnalysisIfAvailable<LiveIntervals>();

diff  --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 718fbdc952add..23879e5cb7685 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -1,5 +1,8 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
+; Disabled endcf collapse at -O0.
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -O0 -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN-O0 %s
+
 ; GCN-LABEL: {{^}}simple_nested_if:
 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
 ; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
@@ -11,7 +14,34 @@
 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
 ; GCN: ds_write_b32
 ; GCN: s_endpgm
-
+;
+; GCN-O0-LABEL: {{^}}simple_nested_if:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      store_dword
+; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0:      ds_write_b32
+; GCN-O0:      s_endpgm
+;
 define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -49,6 +79,38 @@ bb.outer.end:                                     ; preds = %bb.outer.then, %bb.
 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
 ; GCN: ds_write_b32
 ; GCN: s_endpgm
+;
+; GCN-O0-LABEL: {{^}}uncollapsable_nested_if:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      store_dword
+; GCN-O0-NEXT: s_branch [[ENDIF_INNER]]
+; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_branch [[LAST_BB:.LBB[0-9_]+]]
+; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0:      s_branch [[ENDIF_OUTER]]
+; GCN-O0-NEXT: {{^}}[[LAST_BB]]:
+; GCN-O0:      ds_write_b32
+; GCN-O0:      s_endpgm
+;
 define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -95,6 +157,48 @@ bb.outer.end:                                     ; preds = %bb.inner.then, %bb
 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
 ; GCN: ds_write_b32
 ; GCN: s_endpgm
+;
+; GCN-O0-LABEL: {{^}}nested_if_if_else:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: s_branch [[TEMP_BB:.LBB[0-9_]+]]
+; GCN-O0-NEXT: {{^}}[[THEN_INNER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      store_dword
+; GCN-O0-NEXT: s_branch [[ENDIF_INNER]]
+; GCN-O0-NEXT: {{^}}[[TEMP_BB]]:
+; GCN-O0:      s_branch [[THEN_INNER]]
+; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0:      ds_write_b32
+; GCN-O0:      s_endpgm
+;
 define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -151,6 +255,61 @@ bb.outer.end:                                        ; preds = %bb, %bb.then, %b
 ; GCN:      s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
 ; GCN:      ds_write_b32
 ; GCN:      s_endpgm
+;
+; GCN-O0-LABEL: {{^}}nested_if_else_if:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: s_branch [[INNER_IF_OUTER_ELSE:.LBB[0-9_]+]]
+; GCN-O0-NEXT: {{^}}[[THEN_OUTER]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      store_dword
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      store_dword
+; GCN-O0-NEXT: s_branch [[FLOW1]]
+; GCN-O0-NEXT: {{^}}[[INNER_IF_OUTER_ELSE]]
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      store_dword
+; GCN-O0-NEXT: {{^}}[[THEN_OUTER_FLOW]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_branch [[THEN_OUTER]]
+; GCN-O0-NEXT: {{^}}[[FLOW1]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0:      ds_write_b32
+; GCN-O0:      s_endpgm
+;
 define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -195,6 +354,23 @@ bb.outer.end:
 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
 ; GCN:      s_barrier
 ; GCN-NEXT: s_endpgm
+;
+; GCN-O0-LABEL: {{^}}s_endpgm_unsafe_barrier:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0:      store_dword
+; GCN-O0-NEXT: {{^}}[[ENDIF]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0:      s_barrier
+; GCN-O0:      s_endpgm
+;
 define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -238,6 +414,75 @@ bb.end:                                           ; preds = %bb.then, %bb
 ; GCN: buffer_store_dword
 ; GCN: buffer_store_dword
 ; GCN: s_setpc_b64
+;
+; GCN-O0-LABEL: {{^}}scc_liveness:
+; GCN-O0-COUNT-2: buffer_store_dword
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]]
+; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
+; GCN-O0: buffer_load_dword
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]]
+; GCN-O0:      s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_mov_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]]
+; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1]]
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[FLOW2:.LBB[0-9_]+]]
+; GCN-O0: {{^}}[[FLOW2]]:
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_1]]
+; GCN-O0:      s_branch [[FLOW:.LBB[0-9_]+]]
+; GCN-O0: {{^}}[[FLOW]]:
+; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_1:[0-9]+]]
+; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execz [[FLOW3:.LBB[0-9_]+]]
+; GCN-O0:      ; %bb.{{[0-9]+}}:
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_0:[0-9]+]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_1:[0-9]+]]
+; GCN-O0: {{^}}[[FLOW3]]:
+; GCN-O0-COUNT-4: buffer_load_dword
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_1]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_1]]
+; GCN-O0:      s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
+; GCN-O0-COUNT-2: s_mov_b64
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
+; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
+; GCN-O0-COUNT-4: buffer_store_dword
+; GCN-O0:      s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
+; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]]
+; GCN-O0:      ; %bb.{{[0-9]+}}:
+; GCN-O0-COUNT-4: buffer_store_dword
+; GCN-O0:     s_setpc_b64
+;
 define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
 bb:
   br label %bb1


        


More information about the llvm-commits mailing list