[llvm] r318132 - AMDGPU: Fix producing saveexec when the copy is spilled

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 13 18:16:55 PST 2017


Author: arsenm
Date: Mon Nov 13 18:16:54 2017
New Revision: 318132

URL: http://llvm.org/viewvc/llvm-project?rev=318132&view=rev
Log:
AMDGPU: Fix producing saveexec when the copy is spilled

If the register from the copy from exec was spilled,
the copy before the spill was deleted leaving a spill
of undefined register verifier error and miscompiling.
Check for other use instructions of the copy register.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp?rev=318132&r1=318131&r2=318132&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp Mon Nov 13 18:16:54 2017
@@ -10,7 +10,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -276,6 +276,8 @@ bool SIOptimizeExecMasking::runOnMachine
         break;
       }
 
+      bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);
+
       if (J->modifiesRegister(CopyToExec, TRI)) {
         if (SaveExecInst) {
           DEBUG(dbgs() << "Multiple instructions modify "
@@ -288,7 +290,7 @@ bool SIOptimizeExecMasking::runOnMachine
         if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
           break;
 
-        if (J->readsRegister(CopyFromExec, TRI)) {
+        if (ReadsCopyFromExec) {
           SaveExecInst = &*J;
           DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
           continue;
@@ -296,6 +298,18 @@ bool SIOptimizeExecMasking::runOnMachine
           DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
           break;
         }
+      } else if (ReadsCopyFromExec && !SaveExecInst) {
+        // Make sure no other instruction is trying to use this copy, before it
+        // will be rewritten by the saveexec, i.e. hasOneUse. There may have
+        // been another use, such as an inserted spill. For example:
+        //
+        // %sgpr0_sgpr1 = COPY %exec
+        // spill %sgpr0_sgpr1
+        // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
+        //
+        DEBUG(dbgs() << "Found second use of save inst candidate: "
+              << *J << '\n');
+        break;
       }
 
       if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {

Added: llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir?rev=318132&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir Mon Nov 13 18:16:54 2017
@@ -0,0 +1,144 @@
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -run-pass si-optimize-exec-masking -verify-machineinstrs %s
+--- |
+  define amdgpu_kernel void @undefined_physreg_sgpr_spill() #0 {
+    unreachable
+  }
+
+  define amdgpu_kernel void @undefined_physreg_sgpr_spill_reorder() #0 {
+    unreachable
+  }
+
+  attributes #0 = { nounwind "amdgpu-num-sgpr"="16" }
+
+...
+---
+
+# copy + s_and_b64 was turned into saveexec, deleting the copy,
+# leaving a spill of the undefined register.
+
+# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill:
+# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+# CHECK-NEXT: SI_SPILL_S64_SAVE %sgpr0_sgpr1,
+# CHECK-NEXT: %sgpr2_sgpr3 = S_AND_B64 killed %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+# CHECK: %exec = COPY killed %sgpr2_sgpr3
+name:            undefined_physreg_sgpr_spill
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+liveins:
+  - { reg: '%vgpr0', virtual-reg: '' }
+  - { reg: '%sgpr4_sgpr5', virtual-reg: '' }
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+      stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+constants:
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: %vgpr0, %sgpr4_sgpr5
+
+    %vgpr1_vgpr2 = COPY killed %sgpr4_sgpr5, implicit %exec
+    %vgpr1 = GLOBAL_LOAD_UBYTE killed %vgpr1_vgpr2, 0, 0, 0, implicit %exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`)
+    %vcc = V_CMP_NE_U32_e64 0, %vgpr0, implicit %exec
+    %sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed %vgpr1, implicit %exec
+    %vgpr1 = V_CNDMASK_B32_e64 0, -1, killed %sgpr0_sgpr1, implicit %exec
+    %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+    SI_SPILL_S64_SAVE %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4)
+    %sgpr2_sgpr3 = S_AND_B64 killed %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+    %exec = S_MOV_B64_term killed %sgpr2_sgpr3
+    SI_MASK_BRANCH %bb.2, implicit %exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.3(0x80000000)
+    liveins: %vgpr0, %vgpr1
+
+    %sgpr2_sgpr3 = S_MOV_B64 0
+    %vgpr2 = V_MOV_B32_e32 0, implicit %exec
+    %sgpr4_sgpr5 = IMPLICIT_DEF
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors:
+
+    %sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (load 8 from %stack.0, align 4)
+    %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
+
+  bb.3:
+    liveins: %vgpr0, %vgpr1, %vgpr2, %sgpr2_sgpr3, %sgpr4_sgpr5
+
+    %vcc = COPY %vgpr1
+    S_ENDPGM
+
+...
+---
+
+# Move spill to after future save instruction
+# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder:
+# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+# CHECK: %exec = COPY killed %sgpr2_sgpr3
+
+# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+# CHECK: SI_SPILL_S64_SAVE killed %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4)
+# CHECK: %exec = COPY killed %sgpr2_sgpr3
+name:            undefined_physreg_sgpr_spill_reorder
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+liveins:
+  - { reg: '%vgpr0', virtual-reg: '' }
+  - { reg: '%sgpr4_sgpr5', virtual-reg: '' }
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+      stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+constants:
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: %vgpr0, %sgpr4_sgpr5
+
+    %vgpr1_vgpr2 = COPY killed %sgpr4_sgpr5, implicit %exec
+    %vgpr1 = GLOBAL_LOAD_UBYTE killed %vgpr1_vgpr2, 0, 0, 0, implicit %exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`)
+    %vcc = V_CMP_NE_U32_e64 0, %vgpr0, implicit %exec
+    %sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed %vgpr1, implicit %exec
+    %vgpr1 = V_CNDMASK_B32_e64 0, -1, killed %sgpr0_sgpr1, implicit %exec
+    %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+    %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+    SI_SPILL_S64_SAVE killed %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4)
+    %exec = S_MOV_B64_term killed %sgpr2_sgpr3
+    SI_MASK_BRANCH %bb.2, implicit %exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.3(0x80000000)
+    liveins: %vgpr0, %vgpr1
+
+    %sgpr2_sgpr3 = S_MOV_B64 0
+    %vgpr2 = V_MOV_B32_e32 0, implicit %exec
+    %sgpr4_sgpr5 = IMPLICIT_DEF
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors:
+
+    %sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (load 8 from %stack.0, align 4)
+    %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
+
+  bb.3:
+    liveins: %vgpr0, %vgpr1, %vgpr2, %sgpr2_sgpr3, %sgpr4_sgpr5
+
+    %vcc = COPY %vgpr1
+    S_ENDPGM
+
+...




More information about the llvm-commits mailing list