[llvm] AMDGPU\GlobalISel: remove amdgpu-global-isel-risky-select flag (PR #83426)

Thu Feb 29 05:27:29 PST 2024

llvmbot wrote:



@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-amdgpu

Author: Petar Avramovic (petar-avramovic)

<details>
<summary>Changes</summary>

AMDGPUInstructionSelector should no longer attempt to select S1 G_PHIs.
Remove MIR test that attempts to inst-select divergent vcc(S1) G_PHI.
Lane mask merging algorithm for GlobalISel is now responsible for
selecting divergent S1 G_PHIs in AMDGPUGlobalISelDivergenceLowering.
Uniform S1 G_PHIs should be lowered to S32 G_PHIs in reg bank select
pass. In summary S1 G_PHIs should not reach AMDGPUInstructionSelector.

---
Full diff: https://github.com/llvm/llvm-project/pull/83426.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+6-14) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir (+1-55) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index aacc3590a5dbf9..b2c65e61b0097c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -34,12 +34,6 @@
 using namespace llvm;
 using namespace MIPatternMatch;
 
-static cl::opt<bool> AllowRiskySelect(
-  "amdgpu-global-isel-risky-select",
-  cl::desc("Allow GlobalISel to select cases that are likely to not work yet"),
-  cl::init(false),
-  cl::ReallyHidden);
-
 #define GET_GLOBALISEL_IMPL
 #define AMDGPUSubtarget GCNSubtarget
 #include "AMDGPUGenGlobalISel.inc"
@@ -211,14 +205,12 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
   const Register DefReg = I.getOperand(0).getReg();
   const LLT DefTy = MRI->getType(DefReg);
 
-  if (DefTy == LLT::scalar(1)) {
-    if (!AllowRiskySelect) {
-      LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
-      return false;
-    }
-
-    LLVM_DEBUG(dbgs() << "Selecting risky boolean phi\n");
-  }
+  // S1 G_PHIs should not be selected in instruction-select, instead:
+  // - divergent S1 G_PHI should go through lane mask merging algorithm
+  //   and be fully inst-selected in AMDGPUGlobalISelDivergenceLowering
+  // - uniform S1 G_PHI should be lowered into S32 G_PHI in AMDGPURegBankSelect
+  if (DefTy == LLT::scalar(1))
+    return false;
 
   // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
index 312c6a3822ce4f..1855ede0483def 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 
 define void @temporal_divergent_i1_phi(float %val, ptr %addr) {
 ; GFX10-LABEL: temporal_divergent_i1_phi:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll
index b21e6a729dbc22..1934958ea8f37c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 
 define void @temporal_divergent_i32(float %val, ptr %addr) {
 ; GFX10-LABEL: temporal_divergent_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
index c7d45f062d0d20..4bb9eb807e1568 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -amdgpu-global-isel-risky-select -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GCN
+# RUN: llc -mtriple=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GCN
 
 ---
 name:            g_phi_s32_ss_sbranch
@@ -321,60 +321,6 @@ body:             |
 
 ...
 
----
-name:            g_phi_vcc_s1_sbranch
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-machineFunctionInfo: {}
-body:             |
-  ; GCN-LABEL: name: g_phi_vcc_s1_sbranch
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
-  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec
-  ; GCN-NEXT:   S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
-  ; GCN-NEXT:   $scc = COPY [[COPY3]]
-  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.2(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[V_CMP_EQ_U32_e64_]], %bb.0, [[V_CMP_EQ_U32_e64_1]], %bb.1
-  ; GCN-NEXT:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]]
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $sgpr2
-
-    %0:vgpr(s32) = COPY $vgpr0
-    %1:vgpr(s32) = COPY $vgpr1
-    %2:sgpr(s32) = COPY $sgpr2
-    %3:sgpr(s32) = G_CONSTANT i32 0
-    %4:vcc(s1) = G_ICMP intpred(eq), %0, %3
-    %5:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3
-    G_BRCOND %5, %bb.1
-    G_BR %bb.2
-
-  bb.1:
-    %6:vcc(s1) = G_ICMP intpred(eq), %1, %3
-    G_BR %bb.2
-
-  bb.2:
-    %7:vcc(s1) = G_PHI %4, %bb.0, %6, %bb.1
-    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %7
-
-...
-
 ---
 name:            phi_s32_ss_sbranch
 legalized:       true

``````````

</details>


https://github.com/llvm/llvm-project/pull/83426