[PATCH] D93511: [Sink] Process basic blocks with a single successor

Thu Dec 17 21:29:59 PST 2020

MaskRay created this revision.
MaskRay added reviewers: arsenm, cfang, zsrkmyn.
Herald added subscribers: kerbowa, hiraditya, tpr, nhaehnle, jvesely.
MaskRay requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

This condition seems unnecessary. The updates AMDGPU tests generally
have fewer instructions, but they are not particularly clear what they
intend to test.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D93511

Files:
  llvm/lib/Transforms/Scalar/Sink.cpp
  llvm/test/CodeGen/AMDGPU/andorbitset.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
  llvm/test/CodeGen/AMDGPU/operand-folding.ll
  llvm/test/Transforms/Sink/single-succ.ll


Index: llvm/test/Transforms/Sink/single-succ.ll
===================================================================

--- /dev/null
+++ llvm/test/Transforms/Sink/single-succ.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S < %s -passes=sink | FileCheck %s
+
+; CHECK-LABEL: else:
+; CHECK-NEXT:    %l = load i32, i32* %a, align 4
+; CHECK-NEXT:    ret i32 %l
+
+define i32 @single_succ(i1 %b, i32* %a) {
+entry:
+  %l = load i32, i32* %a, align 4
+  br label %if
+if:
+  br i1 %b, label %then, label %else
+then:
+  ret i32 42
+else:
+  ret i32 %l
+}
Index: llvm/test/CodeGen/AMDGPU/operand-folding.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -126,7 +126,7 @@
 
 ; There should be exact one folding on the same operand.
 ; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
-; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; CHECK-NOT: %bb.1:
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define void @no_extra_fold_on_same_opnd() #1 {
 entry:
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@@ -207,8 +207,7 @@
 
 ;GCN-LABEL: {{^}}s_buffer_load_index_across_bb:
 ;GCN-NOT: s_waitcnt;
-;GCN: v_or_b32
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
 define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
 main_body:
   %tmp = shl i32 %index, 4
@@ -224,10 +223,7 @@
 
 ;GCN-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
 ;GCN-NOT: s_waitcnt;
-;GCN: v_or_b32
-;GCN: v_or_b32
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+;GCN: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
 define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
 main_body:
   %tmp = shl i32 %index, 4
Index: llvm/test/CodeGen/AMDGPU/andorbitset.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -50,7 +50,8 @@
 
 ; Make sure there's no verifier error with an undef source.
 ; SI-LABEL: {{^}}bitset_verifier_error:
-; SI: s_bitset0_b32 s{{[0-9]+}}, 31
+; SI-NOT:   %bb.1:
+; SI:       v_cmp_ge_f32_e64
 define void @bitset_verifier_error() local_unnamed_addr #0 {
 bb:
   %i = call float @llvm.fabs.f32(float undef) #0
Index: llvm/lib/Transforms/Scalar/Sink.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/Sink.cpp
+++ llvm/lib/Transforms/Scalar/Sink.cpp
@@ -178,9 +178,6 @@
 
 static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
                          AAResults &AA) {
-  // Can't sink anything out of a block that has less than two successors.
-  if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
-
   // Don't bother sinking code out of unreachable blocks. In addition to being
   // unprofitable, it can also lead to infinite looping, because in an
   // unreachable loop there may be nowhere to stop.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D93511.312682.patch
Type: text/x-patch
Size: 3468 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201218/9b76adfb/attachment.bin>