[llvm] r215564 - R600: Correctly set the src value offset for scalarized kernel args

Matt Arsenault Matthew.Arsenault at amd.com
Wed Aug 13 11:14:11 PDT 2014


Author: arsenm
Date: Wed Aug 13 13:14:11 2014
New Revision: 215564

URL: http://llvm.org/viewvc/llvm-project?rev=215564&view=rev
Log:
R600: Correctly set the src value offset for scalarized kernel args

This for some reason fixes v1i64 kernel arguments on pre-SI. This
currently breaks some other cases in the kernel-args.ll test for R600,
but I'm not particularly confident in the new output. VTX_READ_* are not
used for some of the scalarized cases, and the code reading from the
constant buffer doesn't make much sense to me.

Modified:
    llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
    llvm/trunk/test/CodeGen/R600/kernel-args.ll

Modified: llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ISelLowering.cpp?rev=215564&r1=215563&r2=215564&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600ISelLowering.cpp Wed Aug 13 13:14:11 2014
@@ -1705,8 +1705,13 @@ SDValue R600TargetLowering::LowerFormalA
 
   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-    EVT VT = Ins[i].VT;
-    EVT MemVT = LocalIns[i].VT;
+    const ISD::InputArg &In = Ins[i];
+    EVT VT = In.VT;
+    EVT MemVT = VA.getLocVT();
+    if (!VT.isVector() && MemVT.isVector()) {
+      // Get load source type if scalarized.
+      MemVT = MemVT.getVectorElementType();
+    }
 
     if (ShaderType != ShaderType::COMPUTE) {
       unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
@@ -1716,7 +1721,7 @@ SDValue R600TargetLowering::LowerFormalA
     }
 
     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                                   AMDGPUAS::CONSTANT_BUFFER_0);
+                                          AMDGPUAS::CONSTANT_BUFFER_0);
 
     // i64 isn't a legal type, so the register type used ends up as i32, which
     // isn't expected here. It attempts to create this sextload, but it ends up
@@ -1725,15 +1730,28 @@ SDValue R600TargetLowering::LowerFormalA
 
     // The first 36 bytes of the input buffer contains information about
     // thread group and global sizes.
+    ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
+    if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
+      // FIXME: This should really check the extload type, but the handling of
+      // extload vector parameters seems to be broken.
+
+      // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+      Ext = ISD::SEXTLOAD;
+    }
 
-    // FIXME: This should really check the extload type, but the handling of
-    // extload vecto parameters seems to be broken.
-    //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
-    ISD::LoadExtType Ext = ISD::SEXTLOAD;
-    SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
-                                 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
-                                 MachinePointerInfo(UndefValue::get(PtrTy)),
-                                 MemVT, false, false, false, 4);
+    // Compute the offset from the value.
+    // XXX - I think PartOffset should give you this, but it seems to give the
+    // size of the register which isn't useful.
+
+    unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
+    unsigned PartOffset = VA.getLocMemOffset();
+
+    MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
+    SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
+                              DAG.getConstant(36 + PartOffset, MVT::i32),
+                              DAG.getUNDEF(MVT::i32),
+                              PtrInfo,
+                              MemVT, false, true, true, 4);
 
     // 4 is the preferred alignment for the CONSTANT memory space.
     InVals.push_back(Arg);

Modified: llvm/trunk/test/CodeGen/R600/kernel-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/kernel-args.ll?rev=215564&r1=215563&r2=215564&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/kernel-args.ll (original)
+++ llvm/trunk/test/CodeGen/R600/kernel-args.ll Wed Aug 13 13:14:11 2014
@@ -453,3 +453,21 @@ entry:
   store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
   ret void
 }
+
+; FUNC-LABEL: @kernel_arg_i64
+; SI: S_LOAD_DWORDX2
+; SI: S_LOAD_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
+  store i64 %a, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; XFUNC-LABEL: @kernel_arg_v1i64
+; XSI: S_LOAD_DWORDX2
+; XSI: S_LOAD_DWORDX2
+; XSI: BUFFER_STORE_DWORDX2
+; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
+;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
+;   ret void
+; }





More information about the llvm-commits mailing list