[llvm] r294691 - [AMDGPU] Override PSet for M0
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 9 18:08:00 PST 2017
Author: rampitec
Date: Thu Feb 9 20:07:58 2017
New Revision: 294691
URL: http://llvm.org/viewvc/llvm-project?rev=294691&view=rev
Log:
[AMDGPU] Override PSet for M0
This change returns empty PSet list for M0 register. Otherwise its
PSet as defined by tablegen is SReg_32. This results in incorrect
register pressure calculation every time an instruction uses M0.
Such uses count as SReg_32 PSet and inadequately increase pressure
on SGPRs.
Differential Revision: https://reviews.llvm.org/D29798
Added:
llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=294691&r1=294690&r2=294691&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Thu Feb 9 20:07:58 2017
@@ -1329,3 +1329,11 @@ unsigned SIRegisterInfo::getRegPressureS
return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
}
+
+const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
+ static const int Empty[] = { -1 };
+
+ if (hasRegUnit(AMDGPU::M0, RegUnit))
+ return Empty;
+ return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=294691&r1=294690&r2=294691&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Thu Feb 9 20:07:58 2017
@@ -211,6 +211,8 @@ public:
unsigned getRegPressureSetLimit(const MachineFunction &MF,
unsigned Idx) const override;
+ const int *getRegUnitPressureSets(unsigned RegUnit) const override;
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
Added: llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir?rev=294691&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir Thu Feb 9 20:07:58 2017
@@ -0,0 +1,57 @@
+# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler %s -o - -debug-only=misched 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Check there is no SReg_32 pressure created by DS_* instructions because of M0 use
+
+# CHECK: ScheduleDAGMILive::schedule starting
+# CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %M0<imp-use>, %EXEC<imp-use>
+# CHECK: Pressure Diff : {{$}}
+# CHECK: SU({{.*}} DS_WRITE_B32
+
+---
+name: mo_pset
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_128 }
+ - { id: 1, class: sgpr_64 }
+ - { id: 2, class: sreg_32_xm0 }
+ - { id: 3, class: sgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: vgpr_32 }
+ - { id: 8, class: vgpr_32 }
+liveins:
+ - { reg: '%sgpr4_sgpr5', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0:
+ liveins: %sgpr4_sgpr5
+
+ %1 = COPY %sgpr4_sgpr5
+ %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+ %m0 = S_MOV_B32 -1
+ %7 = COPY %5
+ %6 = DS_READ_B32 %7, 0, 0, implicit %m0, implicit %exec
+ DS_WRITE_B32 %7, %6, 4, 0, implicit killed %m0, implicit %exec
+ S_ENDPGM
+
+...
More information about the llvm-commits
mailing list