[PATCH] R600: Correctly set the src value offset for scalarized kernel args
Matt Arsenault
Matthew.Arsenault at amd.com
Tue May 13 18:28:58 PDT 2014
Fix using wrong type for scalarized vector arguments. Restores v1i64 to being broken, and fixes other tests
http://reviews.llvm.org/D3657
Files:
lib/Target/R600/R600ISelLowering.cpp
test/CodeGen/R600/kernel-args.ll
Index: lib/Target/R600/R600ISelLowering.cpp
===================================================================
--- lib/Target/R600/R600ISelLowering.cpp
+++ lib/Target/R600/R600ISelLowering.cpp
@@ -1393,8 +1393,13 @@
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT VT = Ins[i].VT;
- EVT MemVT = LocalIns[i].VT;
+ const ISD::InputArg &In = Ins[i];
+ EVT VT = In.VT;
+ EVT MemVT = VA.getLocVT();
+ if (!VT.isVector() && MemVT.isVector()) {
+ // Get load source type if scalarized.
+ MemVT = MemVT.getVectorElementType();
+ }
if (ShaderType != ShaderType::COMPUTE) {
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
@@ -1404,7 +1409,7 @@
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
+ AMDGPUAS::CONSTANT_BUFFER_0);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
@@ -1414,14 +1419,28 @@
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- // FIXME: This should really check the extload type, but the handling of
- // extload vecto parameters seems to be broken.
- //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
- ISD::LoadExtType Ext = ISD::SEXTLOAD;
- SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
- DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
- MachinePointerInfo(UndefValue::get(PtrTy)),
- MemVT, false, false, 4);
+ ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
+ if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
+ // FIXME: This should really check the extload type, but the handling of
+ // extload vector parameters seems to be broken.
+
+ // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ Ext = ISD::SEXTLOAD;
+ }
+
+ // Compute the offset from the value.
+ // XXX - I think PartOffset should give you this, but it seems to give the
+ // size of the register which isn't useful.
+
+ unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
+ unsigned PartOffset = VA.getLocMemOffset();
+
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
+ SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
+ DAG.getConstant(36 + PartOffset, MVT::i32),
+ DAG.getUNDEF(MVT::i32),
+ PtrInfo,
+ MemVT, false, true, true, 4);
// 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
Index: test/CodeGen/R600/kernel-args.ll
===================================================================
--- test/CodeGen/R600/kernel-args.ll
+++ test/CodeGen/R600/kernel-args.ll
@@ -453,3 +453,21 @@
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: @kernel_arg_i64
+; SI: S_LOAD_DWORDX2
+; SI: S_LOAD_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
+ store i64 %a, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; XFUNC-LABEL: @kernel_arg_v1i64
+; XSI: S_LOAD_DWORDX2
+; XSI: S_LOAD_DWORDX2
+; XSI: BUFFER_STORE_DWORDX2
+; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
+; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
+; ret void
+; }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3657.9369.patch
Type: text/x-patch
Size: 3771 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140514/b24a3f67/attachment.bin>
More information about the llvm-commits
mailing list