[llvm] r285198 - AMDGPU/SI: Don't emit multi-dword flat memory ops when they might access scratch
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 26 07:38:47 PDT 2016
Author: tstellar
Date: Wed Oct 26 09:38:47 2016
New Revision: 285198
URL: http://llvm.org/viewvc/llvm-project?rev=285198&view=rev
Log:
AMDGPU/SI: Don't emit multi-dword flat memory ops when they might access scratch
Summary:
A single flat memory operations that might access the scratch buffer
can only access MaxPrivateElementSize bytes.
Reviewers: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D25788
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=285198&r1=285197&r2=285198&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Oct 26 09:38:47 2016
@@ -2591,6 +2591,14 @@ SDValue SITargetLowering::LowerLOAD(SDVa
return DAG.getMergeValues(Ops, DL);
}
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ // If there is a possibilty that flat instruction access scratch memory
+ // then we need to use the same legalization rules we use for private.
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
+ AS = MFI->hasFlatScratchInit() ?
+ AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+
unsigned NumElements = MemVT.getVectorNumElements();
switch (AS) {
case AMDGPUAS::CONSTANT_ADDRESS:
@@ -2890,6 +2898,14 @@ SDValue SITargetLowering::LowerSTORE(SDV
return expandUnalignedStore(Store, DAG);
}
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ // If there is a possibilty that flat instruction access scratch memory
+ // then we need to use the same legalization rules we use for private.
+ if (AS == AMDGPUAS::FLAT_ADDRESS)
+ AS = MFI->hasFlatScratchInit() ?
+ AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+
unsigned NumElements = VT.getVectorNumElements();
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
Modified: llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll?rev=285198&r1=285197&r2=285198&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-address-space.ll Wed Oct 26 09:38:47 2016
@@ -1,5 +1,6 @@
-; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,HSA %s
; Disable optimizations in case there are optimizations added that
; specialize away generic pointer accesses.
@@ -149,6 +150,28 @@ define void @flat_scratch_unaligned_stor
ret void
}
+; CHECK-LABEL: flat_scratch_multidword_load:
+; HSA: flat_load_dword
+; HSA: flat_load_dword
+; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
+define void @flat_scratch_multidword_load() {
+ %scratch = alloca <2 x i32>
+ %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
+ %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr
+ ret void
+}
+
+; CHECK-LABEL: flat_scratch_multidword_store:
+; HSA: flat_store_dword
+; HSA: flat_store_dword
+; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
+define void @flat_scratch_multidword_store() {
+ %scratch = alloca <2 x i32>
+ %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
+ store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind convergent }
attributes #3 = { nounwind readnone }
More information about the llvm-commits
mailing list