[llvm] d7a0569 - [AMDGPU] Move LowerSwitch pass to CodeGenPrepare.

Sat Jul 11 04:22:20 PDT 2020

Author: Christudasan Devadasan
Date: 2020-07-11T16:33:38+05:30
New Revision: d7a05698efcfa6c596bcaadd8d5154612990f8f3

URL: https://github.com/llvm/llvm-project/commit/d7a05698efcfa6c596bcaadd8d5154612990f8f3
DIFF: https://github.com/llvm/llvm-project/commit/d7a05698efcfa6c596bcaadd8d5154612990f8f3.diff

LOG: [AMDGPU] Move LowerSwitch pass to CodeGenPrepare.

It is possible that LowerSwitch pass leaves certain blocks
unreachable from the entry. If not removed, these dead blocks
can cause undefined behavior in the subsequent passes.
It caused a crash in the AMDGPU backend after the instruction
selection when a PHI node has its incoming values coming from
these unreachable blocks.

In the AMDGPU pass flow, the last invocation of UnreachableBlockElim
precedes where LowerSwitch is currently placed and eventually
missed out on the opportunity to get these blocks eliminated.
This patch ensures that LowerSwitch pass get inserted earlier
to make use of the existing unreachable block elimination pass.

Reviewed By: sameerds, arsenm

Differential Revision: https://reviews.llvm.org/D83584

Added: 
    llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 8604f5005eb2..b4b10835837c 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -787,10 +787,15 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
 
   if (EnableLoadStoreVectorizer)
     addPass(createLoadStoreVectorizerPass());
+
+  // LowerSwitch pass may introduce unreachable blocks that can
+  // cause unexpected behavior for subsequent passes. Placing it
+  // here seems better that these blocks would get cleaned up by
+  // UnreachableBlockElim inserted next in the pass flow.
+  addPass(createLowerSwitchPass());
 }
 
 bool AMDGPUPassConfig::addPreISel() {
-  addPass(createLowerSwitchPass());
   addPass(createFlattenCFGPass());
   return false;
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
new file mode 100644
index 000000000000..13c4dc80be15
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
+define void @test() #1 {
+  ; Clean up the unreachable blocks introduced with LowerSwitch pass.
+  ; This test ensures that, in the pass flow, UnreachableBlockElim pass
+  ; follows the LowerSwitch. Otherwise, this testcase will crash
+  ; immediately after the instruction selection due to the incomplete
+  ; PHI node in an MBB whose incoming values were never codegenerated.
+  ;
+  ; GCN-LABEL: name: test
+  ; GCN: bb.{{[0-9]+}}.entry:
+  ; GCN: bb.{{[0-9]+}}.entry.true.blk:
+  ; GCN: bb.{{[0-9]+}}.entry.false.blk:
+  ; GCN: bb.{{[0-9]+}}.switch.blk:
+
+  ; GCN-NOT: bb.{{[0-9]+}}.preheader.blk
+  ; GCN-NOT: bb.{{[0-9]+}}.pre.false.blk:
+  ; GCN-NOT: bb.{{[0-9]+}}.unreach.blk:
+  ; GCN-NOT: PHI
+
+  ; GCN: bb.{{[0-9]+}}.exit:
+  entry:
+    %idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+    br i1 undef, label %entry.true.blk, label %entry.false.blk
+
+  entry.true.blk:                                   ; preds = %entry
+    %exit.cmp = icmp ult i32 %idx, 3
+    br i1 %exit.cmp, label %switch.blk, label %exit
+
+  entry.false.blk:                                  ; preds = %entry
+    unreachable
+
+  switch.blk:                                       ; preds = %entry.true.blk
+    switch i32 %idx, label %preheader.blk [
+      i32 0, label %exit
+      i32 1, label %exit
+      i32 2, label %exit
+    ]
+
+  preheader.blk:                                    ; preds = %switch.blk
+    %pre.exit = icmp ult i32 %idx, 5
+    br i1 %pre.exit, label %unreach.blk, label %pre.false.blk
+
+  pre.false.blk:                                    ; preds = %preheader.blk
+    %call.pre.false = tail call i32 @func(i32 %idx) #0
+    br label %unreach.blk
+
+  unreach.blk:                                      ; preds = %preheader.blk, %pre.false.blk
+    %phi.val = phi i32 [ %call.pre.false, %pre.false.blk ], [ undef, %preheader.blk ]
+    store i32 %phi.val, i32* undef
+    unreachable
+
+  exit:                                             ; preds = %switch.blk
+    ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @func(i32)#0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }