[llvm] r314566 - CodeGen: Fix pointer info in expandUnalignedLoad/Store
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 29 16:31:15 PDT 2017
Author: yaxunl
Date: Fri Sep 29 16:31:14 2017
New Revision: 314566
URL: http://llvm.org/viewvc/llvm-project?rev=314566&view=rev
Log:
CodeGen: Fix pointer info in expandUnalignedLoad/Store
Currently expandUnalignedLoad/Store uses place holder pointer info for temporary memory operand
in stack, which does not have correct address space. This causes unaligned private double16 load/store to be
lowered to flat_load instead of buffer_load for amdgcn target.
This fixes failures of OpenCL conformance test basic/vload_private/vstore_private on target amdgcn---amdgizcl.
Differential Revision: https://reviews.llvm.org/D35361
Added:
llvm/trunk/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=314566&r1=314565&r2=314566&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Fri Sep 29 16:31:14 2017
@@ -3481,6 +3481,7 @@ TargetLowering::expandUnalignedLoad(Load
EVT VT = LD->getValueType(0);
EVT LoadedVT = LD->getMemoryVT();
SDLoc dl(LD);
+ auto &MF = DAG.getMachineFunction();
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
@@ -3511,7 +3512,7 @@ TargetLowering::expandUnalignedLoad(Load
// Make sure the stack slot is also aligned for the register type.
SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
+ auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
SmallVector<SDValue, 8> Stores;
SDValue StackPtr = StackBase;
unsigned Offset = 0;
@@ -3530,8 +3531,9 @@ TargetLowering::expandUnalignedLoad(Load
MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo()));
+ Stores.push_back(DAG.getStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
// Increment the pointers.
Offset += RegBytes;
Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
@@ -3550,15 +3552,17 @@ TargetLowering::expandUnalignedLoad(Load
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo(), MemVT));
+ Stores.push_back(DAG.getTruncStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
- MachinePointerInfo(), LoadedVT);
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
+ LoadedVT);
// Callers expect a MERGE_VALUES node.
return std::make_pair(Load, TF);
@@ -3628,6 +3632,7 @@ SDValue TargetLowering::expandUnalignedS
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
+ auto &MF = DAG.getMachineFunction();
SDLoc dl(ST);
if (ST->getMemoryVT().isFloatingPoint() ||
@@ -3662,10 +3667,12 @@ SDValue TargetLowering::expandUnalignedS
// Make sure the stack slot is also aligned for the register type.
SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Perform the original store, only redirected to the stack slot.
- SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr,
- MachinePointerInfo(), StoredVT);
+ SDValue Store = DAG.getTruncStore(
+ Chain, dl, Val, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
EVT StackPtrVT = StackPtr.getValueType();
@@ -3677,8 +3684,9 @@ SDValue TargetLowering::expandUnalignedS
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
- SDValue Load =
- DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo());
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
@@ -3698,8 +3706,9 @@ SDValue TargetLowering::expandUnalignedS
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- MachinePointerInfo(), MemVT);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
Added: llvm/trunk/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll?rev=314566&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll Fri Sep 29 16:31:14 2017
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+
+; GCN-LABEL: @test_unaligned_load
+; GCN: buffer_load_dword
+; GCN-NOT: flat_load_dword
+define amdgpu_kernel void @test_unaligned_load(<16 x double> addrspace(1)* %results, i32 %i) {
+entry:
+ %a = inttoptr i32 %i to <16 x double> addrspace(5)*
+ %v = load <16 x double>, <16 x double> addrspace(5)* %a, align 8
+ store <16 x double> %v, <16 x double> addrspace(1)* %results, align 128
+ ret void
+}
+
+; GCN-LABEL: @test_unaligned_store
+; GCN: buffer_store_dword
+; GCN-NOT: flat_store_dword
+define amdgpu_kernel void @test_unaligned_store(<16 x double> %v, i32 %i) {
+entry:
+ %a = inttoptr i32 %i to <16 x double> addrspace(5)*
+ store <16 x double> %v, <16 x double> addrspace(5)* %a, align 8
+ ret void
+}
More information about the llvm-commits
mailing list