[llvm-branch-commits] [llvm-branch] r240285 - Merging r238147:

Tom Stellard thomas.stellard at amd.com
Mon Jun 22 07:58:20 PDT 2015


Author: tstellar
Date: Mon Jun 22 09:58:20 2015
New Revision: 240285

URL: http://llvm.org/viewvc/llvm-project?rev=240285&view=rev
Log:
Merging r238147:

------------------------------------------------------------------------
r238147 | thomas.stellard | 2015-05-25 12:15:54 -0400 (Mon, 25 May 2015) | 4 lines

R600/SI: Fix bug with v_interp_p1_f32 instructions on 16 bank lds chips

The src and dst register cannot be the same on chips with 16 lds banks.

------------------------------------------------------------------------

Modified:
    llvm/branches/release_36/lib/Target/R600/AMDGPU.td
    llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp
    llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h
    llvm/branches/release_36/lib/Target/R600/Processors.td
    llvm/branches/release_36/lib/Target/R600/SIInstructions.td
    llvm/branches/release_36/test/CodeGen/R600/llvm.SI.fs.interp.ll

Modified: llvm/branches/release_36/lib/Target/R600/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPU.td?rev=240285&r1=240284&r2=240285&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPU.td (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPU.td Mon Jun 22 09:58:20 2015
@@ -121,6 +121,15 @@ def FeatureWavefrontSize16 : SubtargetFe
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 
+class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
+      "ldsbankcount"#Value,
+      "LDSBankCount",
+      !cast<string>(Value),
+      "The number of LDS banks per compute unit.">;
+
+def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
+def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
         "localmemorysize"#Value,
         "LocalMemorySize",
@@ -152,7 +161,7 @@ def FeatureNorthernIslands : SubtargetFe
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
-         FeatureWavefrontSize64]>;
+         FeatureWavefrontSize64, FeatureLDSBankCount32]>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
@@ -160,7 +169,8 @@ def FeatureSeaIslands : SubtargetFeature
 
 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+         FeatureWavefrontSize64, FeatureFlatAddressSpace,
+         FeatureLDSBankCount32]>;
 
 //===----------------------------------------------------------------------===//
 

Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp?rev=240285&r1=240284&r2=240285&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp Mon Jun 22 09:58:20 2015
@@ -81,6 +81,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringR
       EnablePromoteAlloca(false), EnableIfCvt(true),
       EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
       EnableVGPRSpilling(false),SGPRInitBug(false),
+      LDSBankCount(0),
       DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)

Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h?rev=240285&r1=240284&r2=240285&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h Mon Jun 22 09:58:20 2015
@@ -71,6 +71,7 @@ private:
   int LocalMemorySize;
   bool EnableVGPRSpilling;
   bool SGPRInitBug;
+  int LDSBankCount;
 
   const DataLayout DL;
   AMDGPUFrameLowering FrameLowering;
@@ -212,6 +213,10 @@ public:
     return SGPRInitBug;
   }
 
+  int getLDSBankCount() const {
+    return LDSBankCount;
+  }
+
   unsigned getAmdKernelCodeChipID() const;
 
   bool enableMachineScheduler() const override {

Modified: llvm/branches/release_36/lib/Target/R600/Processors.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/Processors.td?rev=240285&r1=240284&r2=240285&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/Processors.td (original)
+++ llvm/branches/release_36/lib/Target/R600/Processors.td Mon Jun 22 09:58:20 2015
@@ -99,15 +99,24 @@ def : ProcessorModel<"hainan",   SIQuart
 // Sea Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"bonaire",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]
+>;
+
+def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
 
-def : ProcessorModel<"kabini",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
-
-def : ProcessorModel<"kaveri",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
-
-def : ProcessorModel<"hawaii",     SIFullSpeedModel, [FeatureSeaIslands]>;
-
-def : ProcessorModel<"mullins",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]>;
 
 //===----------------------------------------------------------------------===//
 // Volcanic Islands

Modified: llvm/branches/release_36/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIInstructions.td?rev=240285&r1=240284&r2=240285&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIInstructions.td (original)
+++ llvm/branches/release_36/lib/Target/R600/SIInstructions.td Mon Jun 22 09:58:20 2015
@@ -40,6 +40,9 @@ def isVI : Predicate <
 
 def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
 
+def has16BankLDS : Predicate<"Subtarget.getLDSBankCount() == 16">;
+def has32BankLDS : Predicate<"Subtarget.getLDSBankCount() == 32">;
+
 def SWaitMatchClass : AsmOperandClass {
   let Name = "SWaitCnt";
   let RenderMethod = "addImmOperands";
@@ -1376,12 +1379,26 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<
 //===----------------------------------------------------------------------===//
 
 // FIXME: Specify SchedRW for VINTRP insturctions.
-defm V_INTERP_P1_F32 : VINTRP_m <
-  0x00000000, 
+
+multiclass V_INTERP_P1_F32_m : VINTRP_m <
+  0x00000000,
   (outs VGPR_32:$dst),
   (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
   "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
-  "$m0">;
+  "$m0"
+>;
+
+let OtherPredicates = [has32BankLDS] in {
+
+defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS]
+
+let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
+
+defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
 
 defm V_INTERP_P2_F32 : VINTRP_m <
   0x00000001,
@@ -2672,14 +2689,26 @@ def : Pat <
   (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
 >;
 
-def : Pat <
+class FSInterpPat <Instruction P1> : Pat <
   (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
-  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
+  (V_INTERP_P2_F32 (P1 (EXTRACT_SUBREG v2i32:$ij, sub0),
                                     imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
                    (EXTRACT_SUBREG $ij, sub1),
                    imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
 >;
 
+let Predicates = [has32BankLDS] in {
+
+def : FSInterpPat<V_INTERP_P1_F32>;
+
+} // EndPredicates = [has32BankLDS]
+
+let Predicates = [has16BankLDS] in {
+
+def : FSInterpPat<V_INTERP_P1_F32_16bank>;
+
+} // End Predicates = [has32BankLDS]
+
 /********** ================== **********/
 /********** Intrinsic Patterns **********/
 /********** ================== **********/

Modified: llvm/branches/release_36/test/CodeGen/R600/llvm.SI.fs.interp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/llvm.SI.fs.interp.ll?rev=240285&r1=240284&r2=240285&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/llvm.SI.fs.interp.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/llvm.SI.fs.interp.ll Mon Jun 22 09:58:20 2015
@@ -1,11 +1,12 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-NOT: s_wqm
-;CHECK: s_mov_b32
-;CHECK: v_interp_p1_f32
-;CHECK: v_interp_p2_f32
-;CHECK: v_interp_mov_f32
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=kabini -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=16BANK %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+
+;GCN-NOT: s_wqm
+;GCN: s_mov_b32
+;GCN: v_interp_p1_f32
+;GCN: v_interp_p2_f32
+;GCN: v_interp_mov_f32
 
 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
 main_body:
@@ -16,7 +17,33 @@ main_body:
   ret void
 }
 
-declare void @llvm.AMDGPU.shader.type(i32)
+; Thest that v_interp_p1 uses different source and destination registers
+; on 16 bank LDS chips.
+
+; 16BANK-LABEL: {{^}}v_interp_p1_bank16_bug:
+; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
+
+define void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
+main_body:
+  %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
+  %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
+  %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
+  %25 = call float @fabs(float %22)
+  %26 = call float @fabs(float %23)
+  %27 = call float @fabs(float %24)
+  %28 = call i32 @llvm.SI.packf16(float %25, float %26)
+  %29 = bitcast i32 %28 to float
+  %30 = call i32 @llvm.SI.packf16(float %27, float 1.000000e+00)
+  %31 = bitcast i32 %30 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @fabs(float) #2
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
 
 ; Function Attrs: nounwind readnone
 declare float @llvm.SI.fs.constant(i32, i32, i32) #1
@@ -28,3 +55,4 @@ declare void @llvm.SI.export(i32, i32, i
 
 attributes #0 = { "ShaderType"="0" }
 attributes #1 = { nounwind readnone }
+attributes #2 = { readnone }





More information about the llvm-branch-commits mailing list