[PATCH] AMDGPU: Fix chains for memory ops dependent on argument loads
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Jun 25 12:18:29 PDT 2015
Most loads and stores are derived from pointers derived from
a kernel argument load inserted during argument lowering.
This was just using the EntryToken chain for the argument loads,
and any users of these loads were also on the EntryToken chain.
Return the chain of the lowered argument load so that dependent loads
end up on the correct chain.
No test since I'm not aware of any case where this actually
broke.
This does break a handful of tests due to stores no longer being merged, because
GatherAllAliases gives up on analyzing cases where the pointer is loaded. We will probably
need to handle this specific case since we know the loaded pointer is from constant memory.
http://reviews.llvm.org/D10742
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -429,6 +429,8 @@
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
+ Chain = BasePtr.getValue(1);
+
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(Offset, SL, PtrVT));
SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
@@ -451,7 +453,12 @@
true, // isNonTemporal
true, // isInvariant
Align); // Alignment
- return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
+ SDValue Ops[] = {
+ DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
+ Load.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, SL);
}
ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
@@ -569,6 +576,8 @@
AnalyzeFormalArguments(CCInfo, Splits);
+ SmallVector<SDValue, 16> Chains;
+
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
@@ -586,8 +595,9 @@
const unsigned Offset = 36 + VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
+ SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
Offset, Ins[i].Flags.isSExt());
+ Chains.push_back(Arg.getValue(1));
const PointerType *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
@@ -613,14 +623,17 @@
Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
&AMDGPU::SReg_64RegClass);
Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
- InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ InVals.push_back(Copy);
+ Chains.push_back(Copy.getValue(1));
continue;
}
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ Chains.push_back(Val.getValue(1));
if (Arg.VT.isVector()) {
@@ -633,7 +646,10 @@
for (unsigned j = 1; j != NumElements; ++j) {
Reg = ArgLocs[ArgIdx++].getLocReg();
Reg = MF.addLiveIn(Reg, RC);
- Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ Regs.push_back(Copy);
+ Chains.push_back(Copy.getValue(1));
}
// Fill up the missing vector elements
@@ -652,7 +668,11 @@
AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
}
- return Chain;
+
+ if (Chains.empty())
+ return Chain;
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D10742.28491.patch
Type: text/x-patch
Size: 3377 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150625/2ef48bde/attachment.bin>
More information about the llvm-commits
mailing list