[llvm] r211939 - [NVPTX] Fix handling of ldg/ldu intrinsics.

Justin Holewinski jholewinski at nvidia.com
Fri Jun 27 11:35:51 PDT 2014


Author: jholewinski
Date: Fri Jun 27 13:35:51 2014
New Revision: 211939

URL: http://llvm.org/viewvc/llvm-project?rev=211939&view=rev
Log:
[NVPTX] Fix handling of ldg/ldu intrinsics.

The address space of the pointer must be global (1) for these intrinsics.  There must also be alignment metadata attached to the intrinsic calls, e.g.

%val = tail call i32 @llvm.nvvm.ldu.i.global.i32.p1i32(i32 addrspace(1)* %ptr), !align !0

!0 = metadata !{i32 4}

Added:
    llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll
Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
    llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
    llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
    llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
    llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll
    llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td?rev=211939&r1=211938&r2=211939&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td Fri Jun 27 13:35:51 2014
@@ -796,26 +796,25 @@ def llvm_anyi64ptr_ty     : LLVMAnyPoint
 
 
 // Generated within nvvm. Use for ldu on sm_20 or later
-// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
   "llvm.nvvm.ldu.global.i">;
 def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
   "llvm.nvvm.ldu.global.f">;
 def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
   "llvm.nvvm.ldu.global.p">;
 
 // Generated within nvvm. Use for ldg on sm_35 or later
 def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
   "llvm.nvvm.ldg.global.i">;
 def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
   "llvm.nvvm.ldg.global.f">;
 def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
   "llvm.nvvm.ldg.global.p">;
 
 // Use for generic pointers

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp?rev=211939&r1=211938&r2=211939&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Fri Jun 27 13:35:51 2014
@@ -141,7 +141,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode
   case NVPTXISD::LDGV4:
   case NVPTXISD::LDUV2:
   case NVPTXISD::LDUV4:
-    ResNode = SelectLDGLDUVector(N);
+    ResNode = SelectLDGLDU(N);
     break;
   case NVPTXISD::StoreV2:
   case NVPTXISD::StoreV4:
@@ -167,6 +167,9 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode
   case ISD::INTRINSIC_WO_CHAIN:
     ResNode = SelectIntrinsicNoChain(N);
     break;
+  case ISD::INTRINSIC_W_CHAIN:
+    ResNode = SelectIntrinsicChain(N);
+    break;
   case NVPTXISD::Tex1DFloatI32:
   case NVPTXISD::Tex1DFloatFloat:
   case NVPTXISD::Tex1DFloatFloatLevel:
@@ -273,6 +276,21 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode
   return SelectCode(N);
 }
 
+SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
+  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  switch (IID) {
+  default:
+    return NULL;
+  case Intrinsic::nvvm_ldg_global_f:
+  case Intrinsic::nvvm_ldg_global_i:
+  case Intrinsic::nvvm_ldg_global_p:
+  case Intrinsic::nvvm_ldu_global_f:
+  case Intrinsic::nvvm_ldu_global_i:
+  case Intrinsic::nvvm_ldu_global_p:
+    return SelectLDGLDU(N);
+  }
+}
+
 static unsigned int getCodeAddrSpace(MemSDNode *N,
                                      const NVPTXSubtarget &Subtarget) {
   const Value *Src = N->getMemOperand()->getValue();
@@ -990,22 +1008,101 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVec
   return LD;
 }
 
-SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
 
   SDValue Chain = N->getOperand(0);
-  SDValue Op1 = N->getOperand(1);
+  SDValue Op1;
+  MemSDNode *Mem;
+  bool IsLDG = true;
+
+  // If this is an LDG intrinsic, the address is the third operand. Its its an
+  // LDG/LDU SD node (from custom vector handling), then its the second operand
+  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+    Op1 = N->getOperand(2);
+    Mem = cast<MemIntrinsicSDNode>(N);
+    unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    switch (IID) {
+    default:
+      return NULL;
+    case Intrinsic::nvvm_ldg_global_f:
+    case Intrinsic::nvvm_ldg_global_i:
+    case Intrinsic::nvvm_ldg_global_p:
+      IsLDG = true;
+      break;
+    case Intrinsic::nvvm_ldu_global_f:
+    case Intrinsic::nvvm_ldu_global_i:
+    case Intrinsic::nvvm_ldu_global_p:
+      IsLDG = false;
+      break;
+    }
+  } else {
+    Op1 = N->getOperand(1);
+    Mem = cast<MemSDNode>(N);
+  }
+
   unsigned Opcode;
   SDLoc DL(N);
   SDNode *LD;
-  MemSDNode *Mem = cast<MemSDNode>(N);
   SDValue Base, Offset, Addr;
 
-  EVT EltVT = Mem->getMemoryVT().getVectorElementType();
+  EVT EltVT = Mem->getMemoryVT();
+  if (EltVT.isVector()) {
+    EltVT = EltVT.getVectorElementType();
+  }
 
   if (SelectDirectAddr(Op1, Addr)) {
     switch (N->getOpcode()) {
     default:
       return nullptr;
+    case ISD::INTRINSIC_W_CHAIN:
+      if (IsLDG) {
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return nullptr;
+        case MVT::i8:
+          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
+          break;
+        }
+      } else {
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return nullptr;
+        case MVT::i8:
+          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
+          break;
+        }
+      }
+      break;
     case NVPTXISD::LDGV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
       default:
@@ -1101,6 +1198,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::INTRINSIC_W_CHAIN:
+        if (IsLDG) {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
+            break;
+          }
+        } else {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
+            break;
+          }
+        }
+        break;
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1190,6 +1336,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::INTRINSIC_W_CHAIN:
+        if (IsLDG) {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
+            break;
+          }
+        } else {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
+            break;
+          }
+        }
+        break;
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1285,6 +1480,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::INTRINSIC_W_CHAIN:
+        if (IsLDG) {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
+            break;
+          }
+        } else {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
+            break;
+          }
+        }
+        break;
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1374,6 +1618,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::INTRINSIC_W_CHAIN:
+        if (IsLDG) {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
+            break;
+          }
+        } else {
+          switch (EltVT.getSimpleVT().SimpleTy) {
+          default:
+            return nullptr;
+          case MVT::i8:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
+            break;
+          case MVT::i16:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
+            break;
+          case MVT::i32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
+            break;
+          case MVT::i64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
+            break;
+          case MVT::f32:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
+            break;
+          case MVT::f64:
+            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
+            break;
+          }
+        }
+        break;
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1466,7 +1759,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUV
   }
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  MemRefs0[0] = Mem->getMemOperand();
   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
 
   return LD;

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h?rev=211939&r1=211938&r2=211939&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h Fri Jun 27 13:35:51 2014
@@ -59,10 +59,11 @@ private:
 
   SDNode *Select(SDNode *N) override;
   SDNode *SelectIntrinsicNoChain(SDNode *N);
+  SDNode *SelectIntrinsicChain(SDNode *N);
   SDNode *SelectTexSurfHandle(SDNode *N);
   SDNode *SelectLoad(SDNode *N);
   SDNode *SelectLoadVector(SDNode *N);
-  SDNode *SelectLDGLDUVector(SDNode *N);
+  SDNode *SelectLDGLDU(SDNode *N);
   SDNode *SelectStore(SDNode *N);
   SDNode *SelectStoreVector(SDNode *N);
   SDNode *SelectLoadParam(SDNode *N);

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=211939&r1=211938&r2=211939&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Fri Jun 27 13:35:51 2014
@@ -2363,22 +2363,62 @@ bool NVPTXTargetLowering::getTgtMemIntri
 
   case Intrinsic::nvvm_ldu_global_i:
   case Intrinsic::nvvm_ldu_global_f:
-  case Intrinsic::nvvm_ldu_global_p:
+  case Intrinsic::nvvm_ldu_global_p: {
 
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
       Info.memVT = getValueType(I.getType());
-    else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
+    else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
+      Info.memVT = getPointerTy();
+    else
       Info.memVT = getValueType(I.getType());
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.vol = 0;
+    Info.readMem = true;
+    Info.writeMem = false;
+
+    // alignment is available as metadata.
+    // Grab it and set the alignment.
+    assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
+    MDNode *AlignMD = I.getMetadata("align");
+    assert(AlignMD && "Must have a non-null MDNode");
+    assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
+    Value *Align = AlignMD->getOperand(0);
+    int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
+    Info.align = Alignment;
+
+    return true;
+  }
+  case Intrinsic::nvvm_ldg_global_i:
+  case Intrinsic::nvvm_ldg_global_f:
+  case Intrinsic::nvvm_ldg_global_p: {
+
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
+      Info.memVT = getValueType(I.getType());
+    else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
+      Info.memVT = getPointerTy();
     else
-      Info.memVT = MVT::f32;
+      Info.memVT = getValueType(I.getType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.vol = 0;
     Info.readMem = true;
     Info.writeMem = false;
-    Info.align = 0;
+
+    // alignment is available as metadata.
+    // Grab it and set the alignment.
+    assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
+    MDNode *AlignMD = I.getMetadata("align");
+    assert(AlignMD && "Must have a non-null MDNode");
+    assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
+    Value *Align = AlignMD->getOperand(0);
+    int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
+    Info.align = Alignment;
+
     return true;
+  }
 
   case Intrinsic::nvvm_tex_1d_v4f32_i32:
   case Intrinsic::nvvm_tex_1d_v4f32_f32:

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td?rev=211939&r1=211938&r2=211939&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td Fri Jun 27 13:35:51 2014
@@ -1374,67 +1374,33 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.
 // Support for ldu on sm_20 or later
 //-----------------------------------
 
-def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{
-  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
-  return M->getMemoryVT() == MVT::i8;
-}]>;
-
 // Scalar
-// @TODO: Revisit this, Changed imemAny to imem
-multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
-  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
-               !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
-  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
-               !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
- def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
-               !strconcat("ldu.global.", TyStr),
-                [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
-                Requires<[hasLDU]>;
- def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
-               !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
- def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
-               !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
-}
-
-multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
                !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
+                      []>, Requires<[hasLDU]>;
   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
                !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
- def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
+                        []>, Requires<[hasLDU]>;
+ def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
                !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
-         Requires<[hasLDU]>;
+                      []>, Requires<[hasLDU]>;
  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
                !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
+                      []>, Requires<[hasLDU]>;
  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
                !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
+                        []>, Requires<[hasLDU]>;
 }
 
-defm INT_PTX_LDU_GLOBAL_i8  : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs,
-                                             ldu_i8>;
-defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
-int_nvvm_ldu_global_f>;
-defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
-int_nvvm_ldu_global_f>;
-defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
-int_nvvm_ldu_global_p>;
-defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
-int_nvvm_ldu_global_p>;
+defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
+defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
+defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
+defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
+defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
+defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
+defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
+defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
 
 // vector
 
@@ -1504,65 +1470,40 @@ defm INT_PTX_LDU_G_v4f32_ELE
 // Support for ldg on sm_35 or later 
 //-----------------------------------
 
-def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
-  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
-  return M->getMemoryVT() == MVT::i8;
-}]>;
-
-multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
-  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
-               !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
-  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
-               !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
- def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
-               !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
-         Requires<[hasLDG]>;
- def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
-               !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
- def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
-               !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
-}
-
-multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
                !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+                      []>, Requires<[hasLDG]>;
   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
                !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
- def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
+                        []>, Requires<[hasLDG]>;
+ def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
                !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
-        Requires<[hasLDG]>;
+                      []>, Requires<[hasLDG]>;
  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
                !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+                      []>, Requires<[hasLDG]>;
  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
                !strconcat("ld.global.nc.", TyStr),
-         [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+                        []>, Requires<[hasLDG]>;
 }
 
 defm INT_PTX_LDG_GLOBAL_i8
-  : LDG_G_NOINTRIN<"u8 \t$result, [$src];",  Int16Regs, ldg_i8>;
+  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
 defm INT_PTX_LDG_GLOBAL_i16
-  : LDG_G<"u16 \t$result, [$src];", Int16Regs,   int_nvvm_ldg_global_i>;
+  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
 defm INT_PTX_LDG_GLOBAL_i32
-  : LDG_G<"u32 \t$result, [$src];", Int32Regs,   int_nvvm_ldg_global_i>;
+  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
 defm INT_PTX_LDG_GLOBAL_i64
-  : LDG_G<"u64 \t$result, [$src];", Int64Regs,   int_nvvm_ldg_global_i>;
+  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
 defm INT_PTX_LDG_GLOBAL_f32
-  : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
 defm INT_PTX_LDG_GLOBAL_f64
-  : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
 defm INT_PTX_LDG_GLOBAL_p32
-  : LDG_G<"u32 \t$result, [$src];", Int32Regs,   int_nvvm_ldg_global_p>;
+  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
 defm INT_PTX_LDG_GLOBAL_p64
-  : LDG_G<"u64 \t$result, [$src];", Int64Regs,   int_nvvm_ldg_global_p>;
+  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
 
 // vector
 

Modified: llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll?rev=211939&r1=211938&r2=211939&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/ldu-i8.ll Fri Jun 27 13:35:51 2014
@@ -2,13 +2,15 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
-declare i8 @llvm.nvvm.ldu.global.i.i8(i8*)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
 
 define i8 @foo(i8* %a) {
 ; Ensure we properly truncate off the high-order 24 bits
 ; CHECK:        ldu.global.u8
 ; CHECK:        cvt.u32.u16
 ; CHECK:        and.b32         %r{{[0-9]+}}, %r{{[0-9]+}}, 255
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a)
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
   ret i8 %val
 }
+
+!0 = metadata !{i32 4}

Added: llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll?rev=211939&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/ldu-ldg.ll Fri Jun 27 13:35:51 2014
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
+
+
+; CHECK: func0
+define i8 @func0(i8 addrspace(1)* %ptr) {
+; ldu.global.u8
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+  ret i8 %val
+}
+
+; CHECK: func1
+define i32 @func1(i32 addrspace(1)* %ptr) {
+; ldu.global.u32
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+  ret i32 %val
+}
+
+; CHECK: func2
+define i8 @func2(i8 addrspace(1)* %ptr) {
+; ld.global.nc.u8
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+  ret i8 %val
+}
+
+; CHECK: func3
+define i32 @func3(i32 addrspace(1)* %ptr) {
+; ld.global.nc.u32
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+  ret i32 %val
+}
+
+
+
+!0 = metadata !{i32 4}

Modified: llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll?rev=211939&r1=211938&r2=211939&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll Fri Jun 27 13:35:51 2014
@@ -7,9 +7,9 @@ define void @reg_plus_offset(i32* %a) {
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
   %p2 = getelementptr i32* %a, i32 8
-  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
+  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
   %p3 = getelementptr i32* %a, i32 9
-  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
+  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
   %t3 = mul i32 %t1, %t2
   store i32 %t3, i32* %a
   ret void
@@ -17,5 +17,5 @@ define void @reg_plus_offset(i32* %a) {
 
 !1 = metadata !{ i32 4 }
 
-declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()





More information about the llvm-commits mailing list