[PATCH] D93511: [Sink] Process basic blocks with a single successor
Fangrui Song via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 17 21:29:59 PST 2020
MaskRay created this revision.
MaskRay added reviewers: arsenm, cfang, zsrkmyn.
Herald added subscribers: kerbowa, hiraditya, tpr, nhaehnle, jvesely.
MaskRay requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
This condition seems unnecessary. The updates AMDGPU tests generally
have fewer instructions, but they are not particularly clear what they
intend to test.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D93511
Files:
llvm/lib/Transforms/Scalar/Sink.cpp
llvm/test/CodeGen/AMDGPU/andorbitset.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
llvm/test/CodeGen/AMDGPU/operand-folding.ll
llvm/test/Transforms/Sink/single-succ.ll
Index: llvm/test/Transforms/Sink/single-succ.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Sink/single-succ.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S < %s -passes=sink | FileCheck %s
+
+; CHECK-LABEL: else:
+; CHECK-NEXT: %l = load i32, i32* %a, align 4
+; CHECK-NEXT: ret i32 %l
+
+define i32 @single_succ(i1 %b, i32* %a) {
+entry:
+ %l = load i32, i32* %a, align 4
+ br label %if
+if:
+ br i1 %b, label %then, label %else
+then:
+ ret i32 42
+else:
+ ret i32 %l
+}
Index: llvm/test/CodeGen/AMDGPU/operand-folding.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -126,7 +126,7 @@
; There should be exact one folding on the same operand.
; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
-; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
+; CHECK-NOT: %bb.1:
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @no_extra_fold_on_same_opnd() #1 {
entry:
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@@ -207,8 +207,7 @@
;GCN-LABEL: {{^}}s_buffer_load_index_across_bb:
;GCN-NOT: s_waitcnt;
-;GCN: v_or_b32
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4
@@ -224,10 +223,7 @@
;GCN-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
;GCN-NOT: s_waitcnt;
-;GCN: v_or_b32
-;GCN: v_or_b32
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
+;GCN: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8
define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
main_body:
%tmp = shl i32 %index, 4
Index: llvm/test/CodeGen/AMDGPU/andorbitset.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -50,7 +50,8 @@
; Make sure there's no verifier error with an undef source.
; SI-LABEL: {{^}}bitset_verifier_error:
-; SI: s_bitset0_b32 s{{[0-9]+}}, 31
+; SI-NOT: %bb.1:
+; SI: v_cmp_ge_f32_e64
define void @bitset_verifier_error() local_unnamed_addr #0 {
bb:
%i = call float @llvm.fabs.f32(float undef) #0
Index: llvm/lib/Transforms/Scalar/Sink.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/Sink.cpp
+++ llvm/lib/Transforms/Scalar/Sink.cpp
@@ -178,9 +178,6 @@
static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
AAResults &AA) {
- // Can't sink anything out of a block that has less than two successors.
- if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
-
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
// unreachable loop there may be nowhere to stop.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D93511.312682.patch
Type: text/x-patch
Size: 3468 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201218/9b76adfb/attachment.bin>
More information about the llvm-commits
mailing list