[llvm] 452e0d9 - AMDGPU: Don't run mode switches with exec 0

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 2 10:47:54 PDT 2020


Author: Matt Arsenault
Date: 2020-06-02T13:47:48-04:00
New Revision: 452e0d9023ca9a747a3646a42cea13d66b689de7

URL: https://github.com/llvm/llvm-project/commit/452e0d9023ca9a747a3646a42cea13d66b689de7
DIFF: https://github.com/llvm/llvm-project/commit/452e0d9023ca9a747a3646a42cea13d66b689de7.diff

LOG: AMDGPU: Don't run mode switches with exec 0

These are scalar instructions that change vector instructions, so they
should not be executed without any active lanes.

The implementation of -amdgpu-skip-threshold also seem to be backwards
from expected, since decreasing it prevents removal.

Added: 
    llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5feed9d53bc7..9f954743d271 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3011,6 +3011,20 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
          Opcode == AMDGPU::DS_GWS_BARRIER;
 }
 
+bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
+  // Skip the full operand and register alias search modifiesRegister
+  // does. There's only a handful of instructions that touch this, it's only an
+  // implicit def, and doesn't alias any other registers.
+  if (const MCPhysReg *ImpDef = MI.getDesc().getImplicitDefs()) {
+    for (; ImpDef && *ImpDef; ++ImpDef) {
+      if (*ImpDef == AMDGPU::MODE)
+        return true;
+    }
+  }
+
+  return false;
+}
+
 bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
 
@@ -3036,6 +3050,10 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
   if (MI.isCall() || MI.isInlineAsm())
     return true; // conservative assumption
 
+  // A mode change is a scalar operation that influences vector instructions.
+  if (modifiesModeRegister(MI))
+    return true;
+
   // These are like SALU instructions in terms of effects, so it's questionable
   // whether we should return true for those.
   //

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c6d0349d3575..2665d70bface 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -691,6 +691,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
   }
 
+  /// Return true if the instruction modifies the mode register.q
+  static bool modifiesModeRegister(const MachineInstr &MI);
+
   /// Whether we must prevent this instruction from executing with EXEC = 0.
   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir
new file mode 100644
index 000000000000..3f2788d81899
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-mode-def.mir
@@ -0,0 +1,111 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs  %s -o - | FileCheck %s
+# Make sure mandatory skips are not removed around mode defs.
+# FIXME: -amdgpu-skip-threshold seems to be backwards.
+
+---
+
+name: need_skip_setreg_imm32_b32
+body: |
+  ; CHECK-LABEL: name: need_skip_setreg_imm32_b32
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+  ; CHECK: bb.2:
+  ; CHECK:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+
+name: need_skip_setreg_b32
+body: |
+  ; CHECK-LABEL: name: need_skip_setreg_b32
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode
+  ; CHECK: bb.2:
+  ; CHECK:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    liveins: $sgpr0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+
+name: need_skip_denorm_mode
+body: |
+  ; CHECK-LABEL: name: need_skip_denorm_mode
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   S_DENORM_MODE 3, implicit-def $mode, implicit $mode
+  ; CHECK: bb.2:
+  ; CHECK:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    S_DENORM_MODE 3, implicit-def $mode, implicit $mode
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+
+name: need_skip_round_mode
+body: |
+  ; CHECK-LABEL: name: need_skip_round_mode
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   S_ROUND_MODE 3, implicit-def $mode, implicit $mode
+  ; CHECK: bb.2:
+  ; CHECK:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    S_ROUND_MODE 3, implicit-def $mode, implicit $mode
+
+  bb.2:
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 7410d82fa128..e6799cda00ab 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -486,6 +486,24 @@ latch:
   ret void
 }
 
+; CHECK-LABEL: {{^}}skip_mode_switch:
+; CHECK: s_and_saveexec_b64
+; CHECK-NEXT: s_cbranch_execz
+; CHECK: s_setreg_imm32
+; CHECK: s_or_b64 exec, exec
+define void @skip_mode_switch(i32 %arg) {
+entry:
+  %cmp = icmp eq i32 %arg, 0
+  br i1 %cmp, label %bb.0, label %bb.1
+
+bb.0:
+  call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
+  br label %bb.1
+
+bb.1:
+  ret void
+}
+
 declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
 declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
 declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
@@ -494,6 +512,8 @@ declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, flo
 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare void @llvm.amdgcn.kill(i1) #0
 
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone speculatable }


        


More information about the llvm-commits mailing list