[llvm] r318132 - AMDGPU: Fix producing saveexec when the copy is spilled
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 13 18:16:55 PST 2017
Author: arsenm
Date: Mon Nov 13 18:16:54 2017
New Revision: 318132
URL: http://llvm.org/viewvc/llvm-project?rev=318132&view=rev
Log:
AMDGPU: Fix producing saveexec when the copy is spilled
If the register from the copy from exec was spilled,
the copy before the spill was deleted leaving a spill
of undefined register verifier error and miscompiling.
Check for other use instructions of the copy register.
Added:
llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp?rev=318132&r1=318131&r2=318132&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp Mon Nov 13 18:16:54 2017
@@ -10,7 +10,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -276,6 +276,8 @@ bool SIOptimizeExecMasking::runOnMachine
break;
}
+ bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);
+
if (J->modifiesRegister(CopyToExec, TRI)) {
if (SaveExecInst) {
DEBUG(dbgs() << "Multiple instructions modify "
@@ -288,7 +290,7 @@ bool SIOptimizeExecMasking::runOnMachine
if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
break;
- if (J->readsRegister(CopyFromExec, TRI)) {
+ if (ReadsCopyFromExec) {
SaveExecInst = &*J;
DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
continue;
@@ -296,6 +298,18 @@ bool SIOptimizeExecMasking::runOnMachine
DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
break;
}
+ } else if (ReadsCopyFromExec && !SaveExecInst) {
+ // Make sure no other instruction is trying to use this copy, before it
+ // will be rewritten by the saveexec, i.e. hasOneUse. There may have
+ // been another use, such as an inserted spill. For example:
+ //
+ // %sgpr0_sgpr1 = COPY %exec
+ // spill %sgpr0_sgpr1
+ // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
+ //
+ DEBUG(dbgs() << "Found second use of save inst candidate: "
+ << *J << '\n');
+ break;
}
if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {
Added: llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir?rev=318132&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir Mon Nov 13 18:16:54 2017
@@ -0,0 +1,144 @@
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -run-pass si-optimize-exec-masking -verify-machineinstrs %s
+--- |
+ define amdgpu_kernel void @undefined_physreg_sgpr_spill() #0 {
+ unreachable
+ }
+
+ define amdgpu_kernel void @undefined_physreg_sgpr_spill_reorder() #0 {
+ unreachable
+ }
+
+ attributes #0 = { nounwind "amdgpu-num-sgpr"="16" }
+
+...
+---
+
+# copy + s_and_b64 was turned into saveexec, deleting the copy,
+# leaving a spill of the undefined register.
+
+# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill:
+# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+# CHECK-NEXT: SI_SPILL_S64_SAVE %sgpr0_sgpr1,
+# CHECK-NEXT: %sgpr2_sgpr3 = S_AND_B64 killed %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+# CHECK: %exec = COPY killed %sgpr2_sgpr3
+name: undefined_physreg_sgpr_spill
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+liveins:
+ - { reg: '%vgpr0', virtual-reg: '' }
+ - { reg: '%sgpr4_sgpr5', virtual-reg: '' }
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+ stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
+ di-variable: '', di-expression: '', di-location: '' }
+constants:
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: %vgpr0, %sgpr4_sgpr5
+
+ %vgpr1_vgpr2 = COPY killed %sgpr4_sgpr5, implicit %exec
+ %vgpr1 = GLOBAL_LOAD_UBYTE killed %vgpr1_vgpr2, 0, 0, 0, implicit %exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`)
+ %vcc = V_CMP_NE_U32_e64 0, %vgpr0, implicit %exec
+ %sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed %vgpr1, implicit %exec
+ %vgpr1 = V_CNDMASK_B32_e64 0, -1, killed %sgpr0_sgpr1, implicit %exec
+ %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+ SI_SPILL_S64_SAVE %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4)
+ %sgpr2_sgpr3 = S_AND_B64 killed %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+ %exec = S_MOV_B64_term killed %sgpr2_sgpr3
+ SI_MASK_BRANCH %bb.2, implicit %exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.3(0x80000000)
+ liveins: %vgpr0, %vgpr1
+
+ %sgpr2_sgpr3 = S_MOV_B64 0
+ %vgpr2 = V_MOV_B32_e32 0, implicit %exec
+ %sgpr4_sgpr5 = IMPLICIT_DEF
+ S_BRANCH %bb.3
+
+ bb.2:
+ successors:
+
+ %sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (load 8 from %stack.0, align 4)
+ %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
+
+ bb.3:
+ liveins: %vgpr0, %vgpr1, %vgpr2, %sgpr2_sgpr3, %sgpr4_sgpr5
+
+ %vcc = COPY %vgpr1
+ S_ENDPGM
+
+...
+---
+
+# Move spill to after future save instruction
+# CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder:
+# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+# CHECK: %exec = COPY killed %sgpr2_sgpr3
+
+# CHECK: %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+# CHECK: SI_SPILL_S64_SAVE killed %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4)
+# CHECK: %exec = COPY killed %sgpr2_sgpr3
+name: undefined_physreg_sgpr_spill_reorder
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+liveins:
+ - { reg: '%vgpr0', virtual-reg: '' }
+ - { reg: '%sgpr4_sgpr5', virtual-reg: '' }
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+ stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
+ di-variable: '', di-expression: '', di-location: '' }
+constants:
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: %vgpr0, %sgpr4_sgpr5
+
+ %vgpr1_vgpr2 = COPY killed %sgpr4_sgpr5, implicit %exec
+ %vgpr1 = GLOBAL_LOAD_UBYTE killed %vgpr1_vgpr2, 0, 0, 0, implicit %exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`)
+ %vcc = V_CMP_NE_U32_e64 0, %vgpr0, implicit %exec
+ %sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed %vgpr1, implicit %exec
+ %vgpr1 = V_CNDMASK_B32_e64 0, -1, killed %sgpr0_sgpr1, implicit %exec
+ %sgpr0_sgpr1 = COPY %exec, implicit-def %exec
+ %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def dead %scc
+ SI_SPILL_S64_SAVE killed %sgpr0_sgpr1, %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (store 8 into %stack.0, align 4)
+ %exec = S_MOV_B64_term killed %sgpr2_sgpr3
+ SI_MASK_BRANCH %bb.2, implicit %exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.3(0x80000000)
+ liveins: %vgpr0, %vgpr1
+
+ %sgpr2_sgpr3 = S_MOV_B64 0
+ %vgpr2 = V_MOV_B32_e32 0, implicit %exec
+ %sgpr4_sgpr5 = IMPLICIT_DEF
+ S_BRANCH %bb.3
+
+ bb.2:
+ successors:
+
+ %sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit %exec, implicit %sgpr8_sgpr9_sgpr10_sgpr11, implicit %sgpr13, implicit-def dead %m0 :: (load 8 from %stack.0, align 4)
+ %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
+
+ bb.3:
+ liveins: %vgpr0, %vgpr1, %vgpr2, %sgpr2_sgpr3, %sgpr4_sgpr5
+
+ %vcc = COPY %vgpr1
+ S_ENDPGM
+
+...
More information about the llvm-commits
mailing list