[PATCH] D68873: [AMDGPU] Amend target loop unroll defaults

Fri Oct 11 09:19:30 PDT 2019

timcorringham created this revision.
Herald added subscribers: llvm-commits, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.

Amend the loop unroll thresholds for PAL shaders to be more aggressive.
This gives an overall performance benefit on a representative sample
of shaders.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68873

Files:
  llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
  llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h


Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
===================================================================

--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -49,6 +49,8 @@
   const TargetSubtargetInfo *ST;
   const TargetLoweringBase *TLI;
 
+  AMDGPUSubtarget::Generation Gen;
+
   const TargetSubtargetInfo *getST() const { return ST; }
   const TargetLoweringBase *getTLI() const { return TLI; }
 
@@ -57,7 +59,8 @@
       : BaseT(TM, F.getParent()->getDataLayout()),
         TargetTriple(TM->getTargetTriple()),
         ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
-        TLI(ST->getTargetLowering()) {}
+        TLI(ST->getTargetLowering()),
+        Gen(TM->getSubtarget<GCNSubtarget>(F).getGeneration()) {}
 
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -96,6 +96,19 @@
 
   // TODO: Do we want runtime unrolling?
 
+  // Set more aggressive defaults for PAL shaders
+  if (TargetTriple.getOS() == Triple::AMDPAL) {
+    UP.MaxPercentThresholdBoost = 1000;
+    // and even more aggressive for GFX10
+    if (Gen >= AMDGPUSubtarget::GFX10) {
+      UP.Threshold = 1100;
+      UP.PartialThreshold = 1100;
+    } else {
+      UP.Threshold = 700;
+      UP.PartialThreshold = 700;
+    }
+  }
+
   // Maximum alloca size than can fit registers. Reserve 16 registers.
   const unsigned MaxAlloca = (256 - 16) * 4;
   unsigned ThresholdPrivate = UnrollThresholdPrivate;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D68873.224612.patch
Type: text/x-patch
Size: 1805 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191011/c9eb0db7/attachment.bin>