[llvm-branch-commits] [llvm-branch] r324089 - Merging r323909:
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 2 05:54:44 PST 2018
Author: hans
Date: Fri Feb 2 05:54:44 2018
New Revision: 324089
URL: http://llvm.org/viewvc/llvm-project?rev=324089&view=rev
Log:
Merging r323909:
------------------------------------------------------------------------
r323909 | mareko | 2018-01-31 21:18:11 +0100 (Wed, 31 Jan 2018) | 13 lines
AMDGPU: Fold inline offset for loads properly in moveToVALU on GFX9
Summary:
This enables load merging into x2, x4, which is driven by inline offsets.
6500 shaders are affected:
Code Size in affected shaders: -15.14 %
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D42078
------------------------------------------------------------------------
Modified:
llvm/branches/release_60/ (props changed)
llvm/branches/release_60/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/branches/release_60/test/CodeGen/AMDGPU/smrd.ll
Propchange: llvm/branches/release_60/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Feb 2 05:54:44 2018
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,321751,321789,321791,321806,321862,321870,321872,321878,321980,321991,321993-321994,322003,322016,322053,322056,322103,322106,322108,322123,322131,322223,322272,322313,322372,322473,322623,322644,322724,322767,322875,322878-322879,322900,322904-322905,322973,322993,323034,323155,323190,323307,323331,323355,323369,323371,323384,323469,323515,323536,323582,323671-323672,323706,323710,323759,323781,323810-323811,323813,323857,323907,323913,323915
+/llvm/trunk:155241,321751,321789,321791,321806,321862,321870,321872,321878,321980,321991,321993-321994,322003,322016,322053,322056,322103,322106,322108,322123,322131,322223,322272,322313,322372,322473,322623,322644,322724,322767,322875,322878-322879,322900,322904-322905,322973,322993,323034,323155,323190,323307,323331,323355,323369,323371,323384,323469,323515,323536,323582,323671-323672,323706,323710,323759,323781,323810-323811,323813,323857,323907,323909,323913,323915
Modified: llvm/branches/release_60/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=324089&r1=324088&r2=324089&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/branches/release_60/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Feb 2 05:54:44 2018
@@ -3756,36 +3756,45 @@ void SIInstrInfo::moveToVALU(MachineInst
// FIXME: This isn't safe because the addressing mode doesn't work
// correctly if vaddr is negative.
//
- // FIXME: Handle v_add_u32 and VOP3 form. Also don't rely on immediate
- // being in src0.
- //
// FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
//
// See if we can extract an immediate offset by recognizing one of these:
// V_ADD_I32_e32 dst, imm, src1
// V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
// V_ADD will be removed by "Remove dead machine instructions".
- if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
- const MachineOperand *Src =
- getNamedOperand(*Add, AMDGPU::OpName::src0);
-
- if (Src->isReg()) {
- auto Mov = MRI.getUniqueVRegDef(Src->getReg());
- if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
- Src = &Mov->getOperand(1);
- }
+ if (Add &&
+ (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
+ Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
+ static const unsigned SrcNames[2] = {
+ AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ };
- if (Src) {
- if (Src->isImm())
- Offset = Src->getImm();
- else if (Src->isCImm())
- Offset = Src->getCImm()->getZExtValue();
- }
+ // Find a literal offset in one of source operands.
+ for (int i = 0; i < 2; i++) {
+ const MachineOperand *Src =
+ getNamedOperand(*Add, SrcNames[i]);
+
+ if (Src->isReg()) {
+ auto Mov = MRI.getUniqueVRegDef(Src->getReg());
+ if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
+ Src = &Mov->getOperand(1);
+ }
+
+ if (Src) {
+ if (Src->isImm())
+ Offset = Src->getImm();
+ else if (Src->isCImm())
+ Offset = Src->getCImm()->getZExtValue();
+ }
+
+ if (Offset && isLegalMUBUFImmOffset(Offset)) {
+ VAddr = getNamedOperand(*Add, SrcNames[!i]);
+ break;
+ }
- if (Offset && isLegalMUBUFImmOffset(Offset))
- VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
- else
Offset = 0;
+ }
}
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
Modified: llvm/branches/release_60/test/CodeGen/AMDGPU/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/AMDGPU/smrd.ll?rev=324089&r1=324088&r2=324089&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/AMDGPU/smrd.ll (original)
+++ llvm/branches/release_60/test/CodeGen/AMDGPU/smrd.ll Fri Feb 2 05:54:44 2018
@@ -194,11 +194,7 @@ main_body:
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
-
-; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0
-; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
main_body:
%off = add i32 %offset, 4095
@@ -244,16 +240,8 @@ main_body:
; GCN-LABEL: {{^}}smrd_vgpr_merged:
; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
-; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
-
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
+; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
+; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
main_body:
%a1 = add i32 %a, 4
More information about the llvm-branch-commits
mailing list