[llvm] eebdd85 - [AMDGPU] allow multi-dword flat scratch access since GFX9
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 10:47:18 PST 2020
Author: Stanislav Mekhanoshin
Date: 2020-01-17T10:47:03-08:00
New Revision: eebdd85e7df4d8edfe1d63eeecf5ce96ca2dec45
URL: https://github.com/llvm/llvm-project/commit/eebdd85e7df4d8edfe1d63eeecf5ce96ca2dec45
DIFF: https://github.com/llvm/llvm-project/commit/eebdd85e7df4d8edfe1d63eeecf5ce96ca2dec45.diff
LOG: [AMDGPU] allow multi-dword flat scratch access since GFX9
This is supported starting with GFX9.
Differential Revision: https://reviews.llvm.org/D72865
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/flat-address-space.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 19a240800ba1..61db7ba37e04 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -724,6 +724,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
return ScalarFlatScratchInsts;
}
+ bool hasMultiDwordFlatScratchAddressing() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasFlatSegmentOffsetBug() const {
return HasFlatSegmentOffsetBug;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1b15f2a431ea..8b91622ed547 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7383,7 +7383,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS &&
+ !Subtarget->hasMultiDwordFlatScratchAddressing())
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
@@ -7886,7 +7887,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS &&
+ !Subtarget->hasMultiDwordFlatScratchAddressing())
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
index e48e8c96cb5d..b2e74d2819b5 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -1,7 +1,8 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA,CIVI-HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX10 %s
; CHECK-LABEL: {{^}}store_flat_i32:
; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
@@ -145,8 +146,10 @@ define amdgpu_kernel void @flat_scratch_unaligned_store() {
}
; CHECK-LABEL: flat_scratch_multidword_load:
-; HSA: flat_load_dword
-; HSA: flat_load_dword
+; CIVI-HSA: flat_load_dword v
+; CIVI-HSA: flat_load_dword v
+; GFX9: flat_load_dwordx2
+; GFX10: flat_load_dwordx2
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define amdgpu_kernel void @flat_scratch_multidword_load() {
%scratch = alloca <2 x i32>, addrspace(5)
@@ -156,8 +159,10 @@ define amdgpu_kernel void @flat_scratch_multidword_load() {
}
; CHECK-LABEL: flat_scratch_multidword_store:
-; HSA: flat_store_dword
-; HSA: flat_store_dword
+; CIVI-HSA: flat_store_dword v
+; CIVI-HSA: flat_store_dword v
+; GFX9: flat_store_dwordx2
+; GFX10: flat_store_dwordx2
; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
define amdgpu_kernel void @flat_scratch_multidword_store() {
%scratch = alloca <2 x i32>, addrspace(5)
More information about the llvm-commits
mailing list