[llvm] eebdd85 - [AMDGPU] allow multi-dword flat scratch access since GFX9

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 17 10:47:18 PST 2020


Author: Stanislav Mekhanoshin
Date: 2020-01-17T10:47:03-08:00
New Revision: eebdd85e7df4d8edfe1d63eeecf5ce96ca2dec45

URL: https://github.com/llvm/llvm-project/commit/eebdd85e7df4d8edfe1d63eeecf5ce96ca2dec45
DIFF: https://github.com/llvm/llvm-project/commit/eebdd85e7df4d8edfe1d63eeecf5ce96ca2dec45.diff

LOG: [AMDGPU] allow multi-dword flat scratch access since GFX9

This is supported starting with GFX9.

Differential Revision: https://reviews.llvm.org/D72865

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/flat-address-space.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 19a240800ba1..61db7ba37e04 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -724,6 +724,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return ScalarFlatScratchInsts;
   }
 
+  bool hasMultiDwordFlatScratchAddressing() const {
+    return getGeneration() >= GFX9;
+  }
+
   bool hasFlatSegmentOffsetBug() const {
     return HasFlatSegmentOffsetBug;
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1b15f2a431ea..8b91622ed547 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7383,7 +7383,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   // If there is a possibilty that flat instruction access scratch memory
   // then we need to use the same legalization rules we use for private.
-  if (AS == AMDGPUAS::FLAT_ADDRESS)
+  if (AS == AMDGPUAS::FLAT_ADDRESS &&
+      !Subtarget->hasMultiDwordFlatScratchAddressing())
     AS = MFI->hasFlatScratchInit() ?
          AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
 
@@ -7886,7 +7887,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   // If there is a possibilty that flat instruction access scratch memory
   // then we need to use the same legalization rules we use for private.
-  if (AS == AMDGPUAS::FLAT_ADDRESS)
+  if (AS == AMDGPUAS::FLAT_ADDRESS &&
+      !Subtarget->hasMultiDwordFlatScratchAddressing())
     AS = MFI->hasFlatScratchInit() ?
          AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
index e48e8c96cb5d..b2e74d2819b5 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA,CIVI-HSA %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX10 %s
 
 ; CHECK-LABEL: {{^}}store_flat_i32:
 ; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
@@ -145,8 +146,10 @@ define amdgpu_kernel void @flat_scratch_unaligned_store() {
 }
 
 ; CHECK-LABEL: flat_scratch_multidword_load:
-; HSA: flat_load_dword
-; HSA: flat_load_dword
+; CIVI-HSA: flat_load_dword v
+; CIVI-HSA: flat_load_dword v
+; GFX9:  flat_load_dwordx2
+; GFX10: flat_load_dwordx2
 ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
 define amdgpu_kernel void @flat_scratch_multidword_load() {
   %scratch = alloca <2 x i32>, addrspace(5)
@@ -156,8 +159,10 @@ define amdgpu_kernel void @flat_scratch_multidword_load() {
 }
 
 ; CHECK-LABEL: flat_scratch_multidword_store:
-; HSA: flat_store_dword
-; HSA: flat_store_dword
+; CIVI-HSA: flat_store_dword v
+; CIVI-HSA: flat_store_dword v
+; GFX9:  flat_store_dwordx2
+; GFX10: flat_store_dwordx2
 ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
 define amdgpu_kernel void @flat_scratch_multidword_store() {
   %scratch = alloca <2 x i32>, addrspace(5)


        


More information about the llvm-commits mailing list