[llvm] r211939 - [NVPTX] Fix handling of ldg/ldu intrinsics.

Jingyue Wu jingyue at google.com
Thu Aug 21 15:59:37 PDT 2014


Hi Justin,

The assertion on NVPTXISelLowering.cpp:3305

assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");

looks a little fragile. !align is extended metadata and the optimization
passes (e.g., SimplifyCFG) don't guarantee to preserve it.

I attached a reduced test case (hoist.ll) to demonstrate this issue. If you
run "opt -simplifycfg hoist.ll -S", you will see the !align metadata is
removed by HoistThenElseCodeToIf. Further running llc on the simplified
bitcode will hit the assertion error.

One way to fix this issue is of course having HositThenElseCodeToIf to
preserve the align metadata. But before I do that, I am curious whether we
should assume the align metadata always exists at first hand. Can you shed
some light on this, Justin?

Thanks much,
Jingyue

On Fri Jun 27 2014 at 12:07:55 PM Justin Holewinski <jholewinski at nvidia.com>
wrote:

> Author: jholewinski
> Date: Fri Jun 27 13:35:51 2014
> New Revision: 211939
>
> URL: http://llvm.org/viewvc/llvm-project?rev=211939&view=rev
> Log:
> [NVPTX] Fix handling of ldg/ldu intrinsics.
>
> The address space of the pointer must be global (1) for these intrinsics.
> There must also be alignment metadata attached to the intrinsic calls, e.g.
>
> %val = tail call i32 @llvm.nvvm.ldu.i.global.i32.p1i32(i32 addrspace(1)*
> %ptr), !align !0
>
> !0 = metadata !{i32 4}
>
> Added:
>     llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll
> Modified:
>     llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
>     llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
>     llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
>     llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
>     llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
>     llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll
>     llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/
> IR/IntrinsicsNVVM.td?rev=211939&r1=211938&r2=211939&view=diff
> ============================================================
> ==================
> --- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td Fri Jun 27 13:35:51 2014
> @@ -796,26 +796,25 @@ def llvm_anyi64ptr_ty     : LLVMAnyPoint
>
>
>  // Generated within nvvm. Use for ldu on sm_20 or later
> -// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
>  def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
> -  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
>    "llvm.nvvm.ldu.global.i">;
>  def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
> -  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
>    "llvm.nvvm.ldu.global.f">;
>  def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
> -  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
>    "llvm.nvvm.ldu.global.p">;
>
>  // Generated within nvvm. Use for ldg on sm_35 or later
>  def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
> -  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
>    "llvm.nvvm.ldg.global.i">;
>  def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
> -  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
>    "llvm.nvvm.ldg.global.f">;
>  def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
> -  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
>    "llvm.nvvm.ldg.global.p">;
>
>  // Use for generic pointers
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NV
> PTX/NVPTXISelDAGToDAG.cpp?rev=211939&r1=211938&r2=211939&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Fri Jun 27 13:35:51
> 2014
> @@ -141,7 +141,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode
>    case NVPTXISD::LDGV4:
>    case NVPTXISD::LDUV2:
>    case NVPTXISD::LDUV4:
> -    ResNode = SelectLDGLDUVector(N);
> +    ResNode = SelectLDGLDU(N);
>      break;
>    case NVPTXISD::StoreV2:
>    case NVPTXISD::StoreV4:
> @@ -167,6 +167,9 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode
>    case ISD::INTRINSIC_WO_CHAIN:
>      ResNode = SelectIntrinsicNoChain(N);
>      break;
> +  case ISD::INTRINSIC_W_CHAIN:
> +    ResNode = SelectIntrinsicChain(N);
> +    break;
>    case NVPTXISD::Tex1DFloatI32:
>    case NVPTXISD::Tex1DFloatFloat:
>    case NVPTXISD::Tex1DFloatFloatLevel:
> @@ -273,6 +276,21 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode
>    return SelectCode(N);
>  }
>
> +SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
> +  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
> +  switch (IID) {
> +  default:
> +    return NULL;
> +  case Intrinsic::nvvm_ldg_global_f:
> +  case Intrinsic::nvvm_ldg_global_i:
> +  case Intrinsic::nvvm_ldg_global_p:
> +  case Intrinsic::nvvm_ldu_global_f:
> +  case Intrinsic::nvvm_ldu_global_i:
> +  case Intrinsic::nvvm_ldu_global_p:
> +    return SelectLDGLDU(N);
> +  }
> +}
> +
>  static unsigned int getCodeAddrSpace(MemSDNode *N,
>                                       const NVPTXSubtarget &Subtarget) {
>    const Value *Src = N->getMemOperand()->getValue();
> @@ -990,22 +1008,101 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVec
>    return LD;
>  }
>
> -SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
> +SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
>
>    SDValue Chain = N->getOperand(0);
> -  SDValue Op1 = N->getOperand(1);
> +  SDValue Op1;
> +  MemSDNode *Mem;
> +  bool IsLDG = true;
> +
> +  // If this is an LDG intrinsic, the address is the third operand. Its
> its an
> +  // LDG/LDU SD node (from custom vector handling), then its the second
> operand
> +  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
> +    Op1 = N->getOperand(2);
> +    Mem = cast<MemIntrinsicSDNode>(N);
> +    unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()
> ;
> +    switch (IID) {
> +    default:
> +      return NULL;
> +    case Intrinsic::nvvm_ldg_global_f:
> +    case Intrinsic::nvvm_ldg_global_i:
> +    case Intrinsic::nvvm_ldg_global_p:
> +      IsLDG = true;
> +      break;
> +    case Intrinsic::nvvm_ldu_global_f:
> +    case Intrinsic::nvvm_ldu_global_i:
> +    case Intrinsic::nvvm_ldu_global_p:
> +      IsLDG = false;
> +      break;
> +    }
> +  } else {
> +    Op1 = N->getOperand(1);
> +    Mem = cast<MemSDNode>(N);
> +  }
> +
>    unsigned Opcode;
>    SDLoc DL(N);
>    SDNode *LD;
> -  MemSDNode *Mem = cast<MemSDNode>(N);
>    SDValue Base, Offset, Addr;
>
> -  EVT EltVT = Mem->getMemoryVT().getVectorElementType();
> +  EVT EltVT = Mem->getMemoryVT();
> +  if (EltVT.isVector()) {
> +    EltVT = EltVT.getVectorElementType();
> +  }
>
>    if (SelectDirectAddr(Op1, Addr)) {
>      switch (N->getOpcode()) {
>      default:
>        return nullptr;
> +    case ISD::INTRINSIC_W_CHAIN:
> +      if (IsLDG) {
> +        switch (EltVT.getSimpleVT().SimpleTy) {
> +        default:
> +          return nullptr;
> +        case MVT::i8:
> +          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
> +          break;
> +        case MVT::i16:
> +          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
> +          break;
> +        case MVT::i32:
> +          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
> +          break;
> +        case MVT::i64:
> +          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
> +          break;
> +        case MVT::f32:
> +          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
> +          break;
> +        case MVT::f64:
> +          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
> +          break;
> +        }
> +      } else {
> +        switch (EltVT.getSimpleVT().SimpleTy) {
> +        default:
> +          return nullptr;
> +        case MVT::i8:
> +          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
> +          break;
> +        case MVT::i16:
> +          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
> +          break;
> +        case MVT::i32:
> +          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
> +          break;
> +        case MVT::i64:
> +          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
> +          break;
> +        case MVT::f32:
> +          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
> +          break;
> +        case MVT::f64:
> +          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
> +          break;
> +        }
> +      }
> +      break;
>      case NVPTXISD::LDGV2:
>        switch (EltVT.getSimpleVT().SimpleTy) {
>        default:
> @@ -1101,6 +1198,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
>        switch (N->getOpcode()) {
>        default:
>          return nullptr;
> +      case ISD::INTRINSIC_W_CHAIN:
> +        if (IsLDG) {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
> +            break;
> +          }
> +        } else {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
> +            break;
> +          }
> +        }
> +        break;
>        case NVPTXISD::LDGV2:
>          switch (EltVT.getSimpleVT().SimpleTy) {
>          default:
> @@ -1190,6 +1336,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
>        switch (N->getOpcode()) {
>        default:
>          return nullptr;
> +      case ISD::INTRINSIC_W_CHAIN:
> +        if (IsLDG) {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
> +            break;
> +          }
> +        } else {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
> +            break;
> +          }
> +        }
> +        break;
>        case NVPTXISD::LDGV2:
>          switch (EltVT.getSimpleVT().SimpleTy) {
>          default:
> @@ -1285,6 +1480,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
>        switch (N->getOpcode()) {
>        default:
>          return nullptr;
> +      case ISD::INTRINSIC_W_CHAIN:
> +        if (IsLDG) {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
> +            break;
> +          }
> +        } else {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
> +            break;
> +          }
> +        }
> +        break;
>        case NVPTXISD::LDGV2:
>          switch (EltVT.getSimpleVT().SimpleTy) {
>          default:
> @@ -1374,6 +1618,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
>        switch (N->getOpcode()) {
>        default:
>          return nullptr;
> +      case ISD::INTRINSIC_W_CHAIN:
> +        if (IsLDG) {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
> +            break;
> +          }
> +        } else {
> +          switch (EltVT.getSimpleVT().SimpleTy) {
> +          default:
> +            return nullptr;
> +          case MVT::i8:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
> +            break;
> +          case MVT::i16:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
> +            break;
> +          case MVT::i32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
> +            break;
> +          case MVT::i64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
> +            break;
> +          case MVT::f32:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
> +            break;
> +          case MVT::f64:
> +            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
> +            break;
> +          }
> +        }
> +        break;
>        case NVPTXISD::LDGV2:
>          switch (EltVT.getSimpleVT().SimpleTy) {
>          default:
> @@ -1466,7 +1759,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
>    }
>
>    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
> -  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
> +  MemRefs0[0] = Mem->getMemOperand();
>    cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
>
>    return LD;
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NV
> PTX/NVPTXISelDAGToDAG.h?rev=211939&r1=211938&r2=211939&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h Fri Jun 27 13:35:51
> 2014
> @@ -59,10 +59,11 @@ private:
>
>    SDNode *Select(SDNode *N) override;
>    SDNode *SelectIntrinsicNoChain(SDNode *N);
> +  SDNode *SelectIntrinsicChain(SDNode *N);
>    SDNode *SelectTexSurfHandle(SDNode *N);
>    SDNode *SelectLoad(SDNode *N);
>    SDNode *SelectLoadVector(SDNode *N);
> -  SDNode *SelectLDGLDUVector(SDNode *N);
> +  SDNode *SelectLDGLDU(SDNode *N);
>    SDNode *SelectStore(SDNode *N);
>    SDNode *SelectStoreVector(SDNode *N);
>    SDNode *SelectLoadParam(SDNode *N);
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NV
> PTX/NVPTXISelLowering.cpp?rev=211939&r1=211938&r2=211939&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Fri Jun 27 13:35:51
> 2014
> @@ -2363,22 +2363,62 @@ bool NVPTXTargetLowering::getTgtMemIntri
>
>    case Intrinsic::nvvm_ldu_global_i:
>    case Intrinsic::nvvm_ldu_global_f:
> -  case Intrinsic::nvvm_ldu_global_p:
> +  case Intrinsic::nvvm_ldu_global_p: {
>
>      Info.opc = ISD::INTRINSIC_W_CHAIN;
>      if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
>        Info.memVT = getValueType(I.getType());
> -    else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
> +    else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
> +      Info.memVT = getPointerTy();
> +    else
>        Info.memVT = getValueType(I.getType());
> +    Info.ptrVal = I.getArgOperand(0);
> +    Info.offset = 0;
> +    Info.vol = 0;
> +    Info.readMem = true;
> +    Info.writeMem = false;
> +
> +    // alignment is available as metadata.
> +    // Grab it and set the alignment.
> +    assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment
> metadata");
> +    MDNode *AlignMD = I.getMetadata("align");
> +    assert(AlignMD && "Must have a non-null MDNode");
> +    assert(AlignMD->getNumOperands() == 1 && "Must have a single
> operand");
> +    Value *Align = AlignMD->getOperand(0);
> +    int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
> +    Info.align = Alignment;
> +
> +    return true;
> +  }
> +  case Intrinsic::nvvm_ldg_global_i:
> +  case Intrinsic::nvvm_ldg_global_f:
> +  case Intrinsic::nvvm_ldg_global_p: {
> +
> +    Info.opc = ISD::INTRINSIC_W_CHAIN;
> +    if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
> +      Info.memVT = getValueType(I.getType());
> +    else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
> +      Info.memVT = getPointerTy();
>      else
> -      Info.memVT = MVT::f32;
> +      Info.memVT = getValueType(I.getType());
>      Info.ptrVal = I.getArgOperand(0);
>      Info.offset = 0;
>      Info.vol = 0;
>      Info.readMem = true;
>      Info.writeMem = false;
> -    Info.align = 0;
> +
> +    // alignment is available as metadata.
> +    // Grab it and set the alignment.
> +    assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment
> metadata");
> +    MDNode *AlignMD = I.getMetadata("align");
> +    assert(AlignMD && "Must have a non-null MDNode");
> +    assert(AlignMD->getNumOperands() == 1 && "Must have a single
> operand");
> +    Value *Align = AlignMD->getOperand(0);
> +    int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
> +    Info.align = Alignment;
> +
>      return true;
> +  }
>
>    case Intrinsic::nvvm_tex_1d_v4f32_i32:
>    case Intrinsic::nvvm_tex_1d_v4f32_f32:
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NV
> PTX/NVPTXIntrinsics.td?rev=211939&r1=211938&r2=211939&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td Fri Jun 27 13:35:51
> 2014
> @@ -1374,67 +1374,33 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.
>  // Support for ldu on sm_20 or later
>  //-----------------------------------
>
> -def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr),
> [{
> -  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
> -  return M->getMemoryVT() == MVT::i8;
> -}]>;
> -
>  // Scalar
> -// @TODO: Revisit this, Changed imemAny to imem
> -multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
> -  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
> -               !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp Int32Regs:$src))]>,
> Requires<[hasLDU]>;
> -  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
> -               !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp Int64Regs:$src))]>,
> Requires<[hasLDU]>;
> - def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
> -               !strconcat("ldu.global.", TyStr),
> -                [(set regclass:$result, (IntOp (Wrapper
> tglobaladdr:$src)))]>,
> -                Requires<[hasLDU]>;
> - def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
> -               !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri:$src))]>,
> Requires<[hasLDU]>;
> - def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
> -               !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri64:$src))]>,
> Requires<[hasLDU]>;
> -}
> -
> -multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag
> IntOp> {
> +multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
>    def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
>                 !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp Int32Regs:$src))]>,
> Requires<[hasLDU]>;
> +                      []>, Requires<[hasLDU]>;
>    def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
>                 !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp Int64Regs:$src))]>,
> Requires<[hasLDU]>;
> - def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
> +                        []>, Requires<[hasLDU]>;
> + def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
>                 !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
> -         Requires<[hasLDU]>;
> +                      []>, Requires<[hasLDU]>;
>   def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
>                 !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri:$src))]>,
> Requires<[hasLDU]>;
> +                      []>, Requires<[hasLDU]>;
>   def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
>                 !strconcat("ldu.global.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri64:$src))]>,
> Requires<[hasLDU]>;
> +                        []>, Requires<[hasLDU]>;
>  }
>
> -defm INT_PTX_LDU_GLOBAL_i8  : LDU_G_NOINTRIN<"u8 \t$result, [$src];",
> Int16Regs,
> -                                             ldu_i8>;
> -defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
> -int_nvvm_ldu_global_i>;
> -defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
> -int_nvvm_ldu_global_i>;
> -defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
> -int_nvvm_ldu_global_i>;
> -defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
> -int_nvvm_ldu_global_f>;
> -defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
> -int_nvvm_ldu_global_f>;
> -defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
> -int_nvvm_ldu_global_p>;
> -defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
> -int_nvvm_ldu_global_p>;
> +defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
> +defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
> +defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
> +defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
> +defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];",
> Float32Regs>;
> +defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];",
> Float64Regs>;
> +defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
> +defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
>
>  // vector
>
> @@ -1504,65 +1470,40 @@ defm INT_PTX_LDU_G_v4f32_ELE
>  // Support for ldg on sm_35 or later
>  //-----------------------------------
>
> -def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr),
> [{
> -  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
> -  return M->getMemoryVT() == MVT::i8;
> -}]>;
> -
> -multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
> -  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
> -               !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp Int32Regs:$src))]>,
> Requires<[hasLDG]>;
> -  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
> -               !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp Int64Regs:$src))]>,
> Requires<[hasLDG]>;
> - def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
> -               !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
> -         Requires<[hasLDG]>;
> - def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
> -               !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri:$src))]>,
> Requires<[hasLDG]>;
> - def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
> -               !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri64:$src))]>,
> Requires<[hasLDG]>;
> -}
> -
> -multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag
> IntOp> {
> +multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
>    def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
>                 !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp Int32Regs:$src))]>,
> Requires<[hasLDG]>;
> +                      []>, Requires<[hasLDG]>;
>    def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
>                 !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp Int64Regs:$src))]>,
> Requires<[hasLDG]>;
> - def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
> +                        []>, Requires<[hasLDG]>;
> + def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
>                 !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
> -        Requires<[hasLDG]>;
> +                      []>, Requires<[hasLDG]>;
>   def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
>                 !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri:$src))]>,
> Requires<[hasLDG]>;
> +                      []>, Requires<[hasLDG]>;
>   def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
>                 !strconcat("ld.global.nc.", TyStr),
> -         [(set regclass:$result, (IntOp ADDRri64:$src))]>,
> Requires<[hasLDG]>;
> +                        []>, Requires<[hasLDG]>;
>  }
>
>  defm INT_PTX_LDG_GLOBAL_i8
> -  : LDG_G_NOINTRIN<"u8 \t$result, [$src];",  Int16Regs, ldg_i8>;
> +  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
>  defm INT_PTX_LDG_GLOBAL_i16
> -  : LDG_G<"u16 \t$result, [$src];", Int16Regs,   int_nvvm_ldg_global_i>;
> +  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
>  defm INT_PTX_LDG_GLOBAL_i32
> -  : LDG_G<"u32 \t$result, [$src];", Int32Regs,   int_nvvm_ldg_global_i>;
> +  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
>  defm INT_PTX_LDG_GLOBAL_i64
> -  : LDG_G<"u64 \t$result, [$src];", Int64Regs,   int_nvvm_ldg_global_i>;
> +  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
>  defm INT_PTX_LDG_GLOBAL_f32
> -  : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
> +  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
>  defm INT_PTX_LDG_GLOBAL_f64
> -  : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
> +  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
>  defm INT_PTX_LDG_GLOBAL_p32
> -  : LDG_G<"u32 \t$result, [$src];", Int32Regs,   int_nvvm_ldg_global_p>;
> +  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
>  defm INT_PTX_LDG_GLOBAL_p64
> -  : LDG_G<"u64 \t$result, [$src];", Int64Regs,   int_nvvm_ldg_global_p>;
> +  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
>
>  // vector
>
>
> Modified: llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
> NVPTX/ldu-i8.ll?rev=211939&r1=211938&r2=211939&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll (original)
> +++ llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll Fri Jun 27 13:35:51 2014
> @@ -2,13 +2,15 @@
>
>  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i1
> 6:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v3
> 2:32:32-v64:64:64-v128:128:128-n16:32:64"
>
> -declare i8 @llvm.nvvm.ldu.global.i.i8(i8*)
> +declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
>
>  define i8 @foo(i8* %a) {
>  ; Ensure we properly truncate off the high-order 24 bits
>  ; CHECK:        ldu.global.u8
>  ; CHECK:        cvt.u32.u16
>  ; CHECK:        and.b32         %r{{[0-9]+}}, %r{{[0-9]+}}, 255
> -  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a)
> +  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
>    ret i8 %val
>  }
> +
> +!0 = metadata !{i32 4}
>
> Added: llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
> NVPTX/ldu-ldg.ll?rev=211939&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll Fri Jun 27 13:35:51 2014
> @@ -0,0 +1,40 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
> +
> +
> +declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
> +declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
> +declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
> +declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
> +
> +
> +; CHECK: func0
> +define i8 @func0(i8 addrspace(1)* %ptr) {
> +; ldu.global.u8
> +  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)*
> %ptr), !align !0
> +  ret i8 %val
> +}
> +
> +; CHECK: func1
> +define i32 @func1(i32 addrspace(1)* %ptr) {
> +; ldu.global.u32
> +  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32
> addrspace(1)* %ptr), !align !0
> +  ret i32 %val
> +}
> +
> +; CHECK: func2
> +define i8 @func2(i8 addrspace(1)* %ptr) {
> +; ld.global.nc.u8
> +  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)*
> %ptr), !align !0
> +  ret i8 %val
> +}
> +
> +; CHECK: func3
> +define i32 @func3(i32 addrspace(1)* %ptr) {
> +; ld.global.nc.u32
> +  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32
> addrspace(1)* %ptr), !align !0
> +  ret i32 %val
> +}
> +
> +
> +
> +!0 = metadata !{i32 4}
>
> Modified: llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
> NVPTX/ldu-reg-plus-offset.ll?rev=211939&r1=211938&r2=211939&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll (original)
> +++ llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll Fri Jun 27
> 13:35:51 2014
> @@ -7,9 +7,9 @@ define void @reg_plus_offset(i32* %a) {
>  ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
>  ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
>    %p2 = getelementptr i32* %a, i32 8
> -  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
> +  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
>    %p3 = getelementptr i32* %a, i32 9
> -  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
> +  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
>    %t3 = mul i32 %t1, %t2
>    store i32 %t3, i32* %a
>    ret void
> @@ -17,5 +17,5 @@ define void @reg_plus_offset(i32* %a) {
>
>  !1 = metadata !{ i32 4 }
>
> -declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
> +declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
>  declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140821/e2c7440b/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hoist.ll
Type: application/octet-stream
Size: 533 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140821/e2c7440b/attachment.obj>


More information about the llvm-commits mailing list