[PATCH] D42078: AMDGPU: Fold inline offset for loads properly in moveToVALU on GFX9

Marek Olšák via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 15 09:41:14 PST 2018


mareko created this revision.
mareko added reviewers: arsenm, nhaehnle.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, kzhuravl.

This enables load merging into x2, x4, which is driven by inline offsets.

6500 shaders are affected:
Code Size in affected shaders: -15.14 %


Repository:
  rL LLVM

https://reviews.llvm.org/D42078

Files:
  lib/Target/AMDGPU/SIInstrInfo.cpp
  test/CodeGen/AMDGPU/smrd.ll


Index: test/CodeGen/AMDGPU/smrd.ll
===================================================================
--- test/CodeGen/AMDGPU/smrd.ll
+++ test/CodeGen/AMDGPU/smrd.ll
@@ -194,11 +194,7 @@
 
 ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
 ; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
-
-; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0
-; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
 define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
 main_body:
   %off = add i32 %offset, 4095
@@ -244,16 +240,8 @@
 
 ; GCN-LABEL: {{^}}smrd_vgpr_merged:
 ; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
-; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
-
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
+; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
+; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
 define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
 main_body:
   %a1 = add i32 %a, 4
Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3765,27 +3765,39 @@
       //   V_ADD_I32_e32 dst, imm, src1
       //   V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
       // V_ADD will be removed by "Remove dead machine instructions".
-      if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
-        const MachineOperand *Src =
-          getNamedOperand(*Add, AMDGPU::OpName::src0);
-
-        if (Src->isReg()) {
-          auto Mov = MRI.getUniqueVRegDef(Src->getReg());
-          if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
-            Src = &Mov->getOperand(1);
-        }
-
-        if (Src) {
-          if (Src->isImm())
-            Offset = Src->getImm();
-          else if (Src->isCImm())
-            Offset = Src->getCImm()->getZExtValue();
-        }
+      if (Add &&
+          (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
+           Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
+        static const unsigned SrcNames[2] = {
+          AMDGPU::OpName::src0,
+          AMDGPU::OpName::src1,
+        };
+
+        // Find a literal offset in one of source operands.
+        for (int i = 0; i < 2; i++) {
+          const MachineOperand *Src =
+            getNamedOperand(*Add, SrcNames[i]);
+
+          if (Src->isReg()) {
+            auto Mov = MRI.getUniqueVRegDef(Src->getReg());
+            if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
+              Src = &Mov->getOperand(1);
+          }
+
+          if (Src) {
+            if (Src->isImm())
+              Offset = Src->getImm();
+            else if (Src->isCImm())
+              Offset = Src->getCImm()->getZExtValue();
+          }
+
+          if (Offset && isLegalMUBUFImmOffset(Offset)) {
+            VAddr = getNamedOperand(*Add, SrcNames[!i]);
+            break;
+          }
 
-        if (Offset && isLegalMUBUFImmOffset(Offset))
-          VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
-        else
           Offset = 0;
+        }
       }
 
       BuildMI(*MBB, Inst, Inst.getDebugLoc(),


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D42078.129876.patch
Type: text/x-patch
Size: 3532 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180115/3a0cb413/attachment.bin>


More information about the llvm-commits mailing list