[llvm] fde8351 - [AMDGPU] Fix lowering of S_MOV_{B32,B64}_term

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 9 19:35:00 PST 2020


Author: Carl Ritson
Date: 2020-11-10T12:16:31+09:00
New Revision: fde8351743d5a7ee38ef8838adcfcd59f5ca6e4b

URL: https://github.com/llvm/llvm-project/commit/fde8351743d5a7ee38ef8838adcfcd59f5ca6e4b
DIFF: https://github.com/llvm/llvm-project/commit/fde8351743d5a7ee38ef8838adcfcd59f5ca6e4b.diff

LOG: [AMDGPU] Fix lowering of S_MOV_{B32,B64}_term

If the source of S_MOV_{B32,B64}_term is an immediate then it
cannot be lowered to a COPY.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D90451

Added: 
    llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 0dd6c09a958c..6ba751fcdf9b 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -179,9 +179,14 @@ static unsigned getSaveExecOp(unsigned Opc) {
 // register allocation, so turn them back into normal instructions.
 static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
   switch (MI.getOpcode()) {
-  case AMDGPU::S_MOV_B64_term:
   case AMDGPU::S_MOV_B32_term: {
-    MI.setDesc(TII.get(AMDGPU::COPY));
+    bool RegSrc = MI.getOperand(1).isReg();
+    MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
+    return true;
+  }
+  case AMDGPU::S_MOV_B64_term: {
+    bool RegSrc = MI.getOperand(1).isReg();
+    MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64));
     return true;
   }
   case AMDGPU::S_XOR_B64_term: {

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
new file mode 100644
index 000000000000..0c45c7df30bc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-optimize-exec-masking -verify-machineinstrs  %s -o - | FileCheck %s
+
+---
+name: lower_term_opcodes
+tracksRegLiveness: false
+body: |
+  ; CHECK-LABEL: name: lower_term_opcodes
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   $sgpr0 = COPY $sgpr1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   $sgpr0 = S_MOV_B32 0
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   $sgpr0 = S_MOV_B32 &SYMBOL
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.4(0x80000000)
+  ; CHECK:   $sgpr0_sgpr1 = COPY $sgpr2_sgpr3
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.5(0x80000000)
+  ; CHECK:   $sgpr0_sgpr1 = S_MOV_B64 0
+  ; CHECK: bb.5:
+  ; CHECK:   successors: %bb.6(0x80000000)
+  ; CHECK:   $sgpr0_sgpr1 = S_MOV_B64 &SYMBOL
+  ; CHECK: bb.6:
+  ; CHECK:   successors: %bb.7(0x80000000)
+  ; CHECK:   $sgpr0 = S_XOR_B32 $sgpr1, $sgpr2, implicit-def $scc
+  ; CHECK: bb.7:
+  ; CHECK:   successors: %bb.8(0x80000000)
+  ; CHECK:   $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc
+  ; CHECK: bb.8:
+  ; CHECK:   successors: %bb.9(0x80000000)
+  ; CHECK:   $sgpr0 = S_OR_B32 $sgpr1, $sgpr2, implicit-def $scc
+  ; CHECK: bb.9:
+  ; CHECK:   successors: %bb.10(0x80000000)
+  ; CHECK:   $sgpr0_sgpr1 = S_OR_B64 $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc
+  ; CHECK: bb.10:
+  ; CHECK:   successors: %bb.11(0x80000000)
+  ; CHECK:   $sgpr0 = S_ANDN2_B32 $sgpr1, $sgpr2, implicit-def $scc
+  ; CHECK: bb.11:
+  ; CHECK:   $sgpr0_sgpr1 = S_ANDN2_B64 $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc
+  bb.0:
+    $sgpr0 = S_MOV_B32_term $sgpr1
+
+  bb.1:
+    $sgpr0 = S_MOV_B32_term 0
+
+  bb.3:
+    $sgpr0 = S_MOV_B32_term &SYMBOL
+
+  bb.4:
+    $sgpr0_sgpr1 = S_MOV_B64_term $sgpr2_sgpr3
+
+  bb.5:
+    $sgpr0_sgpr1 = S_MOV_B64_term 0
+
+  bb.6:
+    $sgpr0_sgpr1 = S_MOV_B64_term &SYMBOL
+
+  bb.7:
+    $sgpr0 = S_XOR_B32_term $sgpr1, $sgpr2, implicit-def $scc
+
+  bb.8:
+    $sgpr0_sgpr1 = S_XOR_B64_term $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc
+
+  bb.9:
+    $sgpr0 = S_OR_B32_term $sgpr1, $sgpr2, implicit-def $scc
+
+  bb.10:
+    $sgpr0_sgpr1 = S_OR_B64_term $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc
+
+  bb.11:
+    $sgpr0 = S_ANDN2_B32_term $sgpr1, $sgpr2, implicit-def $scc
+
+  bb.12:
+    $sgpr0_sgpr1 = S_ANDN2_B64_term $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc
+...


        


More information about the llvm-commits mailing list