[llvm] [NVVM] Upgrade nvvm.ptr.* intrinics to addrspace cast (PR #109710)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 23 13:01:05 PDT 2024


https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/109710

Remove the following intrinsics which can be trivially replaced with an `addrspacecast`

  * ``llvm.nvvm.ptr.gen.to.global``
  * ``llvm.nvvm.ptr.gen.to.shared``
  * ``llvm.nvvm.ptr.gen.to.constant``
  * ``llvm.nvvm.ptr.gen.to.local``
  * ``llvm.nvvm.ptr.global.to.gen``
  * ``llvm.nvvm.ptr.shared.to.gen``
  * ``llvm.nvvm.ptr.constant.to.gen``
  * ``llvm.nvvm.ptr.local.to.gen``

Also, cleanup the NVPTX lowering of `addrspacecast` making it more concise. 

>From 83a224841b637f262ce010a0a0580bac61f42c78 Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Sun, 8 Sep 2024 21:34:29 +0000
Subject: [PATCH] [NVVM] Upgrade nvvm.ptr.* intrinics to addrspace cast

---
 llvm/docs/NVPTXUsage.rst                      | 63 -------------
 llvm/docs/ReleaseNotes.rst                    | 12 +++
 llvm/include/llvm/IR/IntrinsicsNVVM.td        | 50 +++-------
 llvm/lib/IR/AutoUpgrade.cpp                   | 21 +++++
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp   | 58 ++++++------
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td       |  4 -
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td      | 92 ++++++-------------
 .../Assembler/auto_upgrade_nvvm_intrinsics.ll | 35 +++++++
 llvm/test/CodeGen/NVPTX/intrin-nocapture.ll   | 21 -----
 9 files changed, 138 insertions(+), 218 deletions(-)
 delete mode 100644 llvm/test/CodeGen/NVPTX/intrin-nocapture.ll

diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index 3a566bbac36233..8b0b05c0ea424e 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -127,69 +127,6 @@ Example: 64-bit PTX for CUDA Driver API: ``nvptx64-nvidia-cuda``
 NVPTX Intrinsics
 ================
 
-Address Space Conversion
-------------------------
-
-'``llvm.nvvm.ptr.*.to.gen``' Intrinsics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-
-These are overloaded intrinsics.  You can use these on any pointer types.
-
-.. code-block:: llvm
-
-    declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1))
-    declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3))
-    declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4))
-    declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5))
-
-Overview:
-"""""""""
-
-The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic
-address space to a generic address space pointer.
-
-Semantics:
-""""""""""
-
-These intrinsics modify the pointer value to be a valid generic address space
-pointer.
-
-
-'``llvm.nvvm.ptr.gen.to.*``' Intrinsics
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-
-These are overloaded intrinsics.  You can use these on any pointer types.
-
-.. code-block:: llvm
-
-    declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
-    declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
-    declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr)
-    declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr)
-
-Overview:
-"""""""""
-
-The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic
-address space to a pointer in the target address space.  Note that these
-intrinsics are only useful if the address space of the target address space of
-the pointer is known.  It is not legal to use address space conversion
-intrinsics to convert a pointer from one non-generic address space to another
-non-generic address space.
-
-Semantics:
-""""""""""
-
-These intrinsics modify the pointer value to be a valid pointer in the target
-non-generic address space.
-
-
 Reading PTX Special Registers
 -----------------------------
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 64c1b5df6d582f..fae4dbc4bf04d8 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -63,6 +63,18 @@ Changes to the LLVM IR
   * ``llvm.nvvm.bitcast.d2ll``
   * ``llvm.nvvm.bitcast.ll2d``
 
+* Remove the following intrinsics which can be replaced with an
+  ``addrspacecast``:
+
+  * ``llvm.nvvm.ptr.gen.to.global``
+  * ``llvm.nvvm.ptr.gen.to.shared``
+  * ``llvm.nvvm.ptr.gen.to.constant``
+  * ``llvm.nvvm.ptr.gen.to.local``
+  * ``llvm.nvvm.ptr.global.to.gen``
+  * ``llvm.nvvm.ptr.shared.to.gen``
+  * ``llvm.nvvm.ptr.constant.to.gen``
+  * ``llvm.nvvm.ptr.local.to.gen``
+
 Changes to LLVM infrastructure
 ------------------------------
 
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 737dd6092e2183..fb4c6619088e14 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -30,10 +30,18 @@
 //   * llvm.nvvm.max.ui  --> select(x ule y, x, y)
 //   * llvm.nvvm.max.ull --> ibid.
 //   * llvm.nvvm.h2f     --> llvm.convert.to.fp16.f32
-//   * llvm.nvvm.bitcast.f2i  --> bitcast
-//   * llvm.nvvm.bitcast.i2f  --> ibid.
-//   * llvm.nvvm.bitcast.d2ll --> ibid.
-//   * llvm.nvvm.bitcast.ll2d --> ibid.
+//   * llvm.nvvm.bitcast.f2i         --> bitcast
+//   * llvm.nvvm.bitcast.i2f         --> ibid.
+//   * llvm.nvvm.bitcast.d2ll        --> ibid.
+//   * llvm.nvvm.bitcast.ll2d        --> ibid.
+//   * llvm.nvvm.ptr.gen.to.global   --> addrspacecast
+//   * llvm.nvvm.ptr.gen.to.shared   --> ibid.
+//   * llvm.nvvm.ptr.gen.to.constant --> ibid.
+//   * llvm.nvvm.ptr.gen.to.local    --> ibid.
+//   * llvm.nvvm.ptr.global.to.gen   --> ibid.
+//   * llvm.nvvm.ptr.shared.to.gen   --> ibid.
+//   * llvm.nvvm.ptr.constant.to.gen --> ibid.
+//   * llvm.nvvm.ptr.local.to.gen    --> ibid.
 
 def llvm_global_ptr_ty  : LLVMQualPointerType<1>;  // (global)ptr
 def llvm_shared_ptr_ty  : LLVMQualPointerType<3>;  // (shared)ptr
@@ -1602,40 +1610,6 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
   [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldg.global.p">;
 
-// Use for generic pointers
-// - These intrinsics are used to convert address spaces.
-// - The input pointer and output pointer must have the same type, except for
-//   the address-space. (This restriction is not enforced here as there is
-//   currently no way to describe it).
-// - This complements the llvm bitcast, which can be used to cast one type
-//   of pointer to another type of pointer, while the address space remains
-//   the same.
-def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.local.to.gen">;
-def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.shared.to.gen">;
-def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.global.to.gen">;
-def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.constant.to.gen">;
-
-def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.gen.to.global">;
-def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.gen.to.shared">;
-def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.gen.to.local">;
-def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
-                 "llvm.nvvm.ptr.gen.to.constant">;
-
 // Used in nvvm internally to help address space opt and ptx code generation
 // This is for params that are passed to kernel functions by pointer by-val.
 def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 02d1d9d9f78984..8f6146648cf0ac 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1272,6 +1272,16 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
         // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
         Expand =
             Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
+      else if (Name.consume_front("ptr.gen.to."))
+        // nvvm.ptr.gen.to.{local,shared,global,constant}
+        Expand = Name.starts_with("local") || Name.starts_with("shared") ||
+                 Name.starts_with("global") || Name.starts_with("constant");
+      else if (Name.consume_front("ptr."))
+        // nvvm.ptr.{local,shared,global,constant}.to.gen
+        Expand =
+            (Name.consume_front("local") || Name.consume_front("shared") ||
+             Name.consume_front("global") || Name.consume_front("constant")) &&
+            Name.starts_with(".to.gen");
       else
         Expand = false;
 
@@ -4266,6 +4276,17 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
                  (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
                   Name == "d2ll")) {
         Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
+      } else if ((Name.consume_front("ptr.gen.to.") &&
+                  (Name.starts_with("local") || Name.starts_with("shared") ||
+                   Name.starts_with("global") ||
+                   Name.starts_with("constant"))) ||
+                 (Name.consume_front("ptr.") &&
+                  (Name.consume_front("local") ||
+                   Name.consume_front("shared") ||
+                   Name.consume_front("global") ||
+                   Name.consume_front("constant")) &&
+                  Name.starts_with(".to.gen"))) {
+        Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
       } else {
         Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
         if (IID != Intrinsic::not_intrinsic &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 56c96ea943b89d..7f942de74bdcc9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1109,11 +1109,21 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
   unsigned DstAddrSpace = CastN->getDestAddressSpace();
+  SDLoc DL(N);
   assert(SrcAddrSpace != DstAddrSpace &&
          "addrspacecast must be between different address spaces");
 
   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
     // Specific to generic
+
+    if (TM.is64Bit() && TM.getPointerSizeInBits(SrcAddrSpace) == 32) {
+      SDValue CvtNone =
+          CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32);
+      SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u64_u32, DL, MVT::i64,
+                                           Src, CvtNone);
+      Src = SDValue(Cvt, 0);
+    }
+
     unsigned Opc;
     switch (SrcAddrSpace) {
     default: report_fatal_error("Bad address space in addrspacecast");
@@ -1121,26 +1131,16 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
       Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
       break;
     case ADDRESS_SPACE_SHARED:
-      Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
-                                ? NVPTX::cvta_shared_6432
-                                : NVPTX::cvta_shared_64)
-                         : NVPTX::cvta_shared;
+      Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
       break;
     case ADDRESS_SPACE_CONST:
-      Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
-                                ? NVPTX::cvta_const_6432
-                                : NVPTX::cvta_const_64)
-                         : NVPTX::cvta_const;
+      Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const;
       break;
     case ADDRESS_SPACE_LOCAL:
-      Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
-                                ? NVPTX::cvta_local_6432
-                                : NVPTX::cvta_local_64)
-                         : NVPTX::cvta_local;
+      Opc = TM.is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local;
       break;
     }
-    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
-                                          Src));
+    ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src));
     return;
   } else {
     // Generic to specific
@@ -1153,30 +1153,28 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
       break;
     case ADDRESS_SPACE_SHARED:
-      Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
-                                ? NVPTX::cvta_to_shared_3264
-                                : NVPTX::cvta_to_shared_64)
-                         : NVPTX::cvta_to_shared;
+      Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared;
       break;
     case ADDRESS_SPACE_CONST:
-      Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
-                                ? NVPTX::cvta_to_const_3264
-                                : NVPTX::cvta_to_const_64)
-                         : NVPTX::cvta_to_const;
+      Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const;
       break;
     case ADDRESS_SPACE_LOCAL:
-      Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
-                                ? NVPTX::cvta_to_local_3264
-                                : NVPTX::cvta_to_local_64)
-                         : NVPTX::cvta_to_local;
+      Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
       break;
     case ADDRESS_SPACE_PARAM:
-      Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
-                         : NVPTX::nvvm_ptr_gen_to_param;
+      Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr;
       break;
     }
-    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
-                                          Src));
+
+    SDNode *CVTA = CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src);
+    if (TM.is64Bit() && TM.getPointerSizeInBits(DstAddrSpace) == 32) {
+      SDValue CvtNone =
+          CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32);
+      CVTA = CurDAG->getMachineNode(NVPTX::CVT_u32_u64, DL, MVT::i32,
+                                    SDValue(CVTA, 0), CvtNone);
+    }
+
+    ReplaceNode(N, CVTA);
     return;
   }
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 510e4b81003119..cb2c62b1a46da8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -174,10 +174,6 @@ def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
 def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
                           "&& Subtarget->getPTXVersion() >= 64)">;
 
-def useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">;
-def useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">;
-def useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">;
-
 def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
 def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 56c551661151d7..35925b497d3973 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2537,59 +2537,45 @@ defm INT_PTX_LDG_G_v4f32_ELE
   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
 
 
-multiclass NG_TO_G<string Str, Intrinsic Intrin, Predicate ShortPtr> {
+multiclass NG_TO_G<string Str> {
    def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
-          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
-      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+          "cvta." # Str # ".u32 \t$result, $src;", []>;
    def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
-          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
-      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
-   def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
-          "{{ .reg .b64 %tmp;\n\t"
-          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
-          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
-      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
-      Requires<[ShortPtr]>;
+          "cvta." # Str # ".u64 \t$result, $src;", []>;
 }
 
-multiclass G_TO_NG<string Str, Intrinsic Intrin, Predicate ShortPtr> {
+multiclass G_TO_NG<string Str> {
    def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
-          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
-      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
+          "cvta.to." # Str # ".u32 \t$result, $src;", []>;
    def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
-          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
-      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
-   def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
-          "{{ .reg .b64 %tmp;\n\t"
-          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
-          #"  cvt.u32.u64 \t$result, %tmp; }}",
-      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
-      Requires<[ShortPtr]>;
-}
-
-defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>;
-defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>;
-defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>;
-defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>;
-defm cvta_param  : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>;
-
-defm cvta_to_local  : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>;
-defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>;
-defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>;
-defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>;
+          "cvta.to." # Str # ".u64 \t$result, $src;", []>;
+}
+
+defm cvta_local  : NG_TO_G<"local">;
+defm cvta_shared : NG_TO_G<"shared">;
+defm cvta_global : NG_TO_G<"global">;
+defm cvta_const  : NG_TO_G<"const">;
+
+defm cvta_to_local  : G_TO_NG<"local">;
+defm cvta_to_shared : G_TO_NG<"shared">;
+defm cvta_to_global : G_TO_NG<"global">;
+defm cvta_to_const  : G_TO_NG<"const">;
+
+// nvvm.ptr.param.to.gen
+defm cvta_param : NG_TO_G<"param">;
+
+def : Pat<(int_nvvm_ptr_param_to_gen Int32Regs:$src),
+          (cvta_param Int32Regs:$src)>;
+
+def : Pat<(int_nvvm_ptr_param_to_gen Int64Regs:$src),
+          (cvta_param_64 Int64Regs:$src)>;
 
 // nvvm.ptr.gen.to.param
-def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
-  (ins Int32Regs:$src),
-                        "mov.u32 \t$result, $src;",
-                              [(set Int32Regs:$result,
-                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
-def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
-  (ins Int64Regs:$src),
-                        "mov.u64 \t$result, $src;",
-                              [(set Int64Regs:$result,
-                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
+def : Pat<(int_nvvm_ptr_gen_to_param Int32Regs:$src),
+          (IMOV32rr Int32Regs:$src)>;
 
+def : Pat<(int_nvvm_ptr_gen_to_param Int64Regs:$src),
+          (IMOV64rr Int64Regs:$src)>;
 
 // nvvm.move intrinsicc
 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
@@ -2632,24 +2618,6 @@ def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
                              [(set Int64Regs:$r,
                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
 
-
-// MoveParam        %r1, param
-// ptr_local_to_gen %r2, %r1
-// ptr_gen_to_local %r3, %r2
-// ->
-// mov %r1, param
-
-// @TODO: Revisit this.  There is a type
-// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
-// instructions are not currently defined. However, we can use the ptr
-// variants and the asm printer will do the right thing.
-def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
-                (MoveParam texternalsym:$src)))),
-               (nvvm_move_ptr64  texternalsym:$src)>;
-def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
-                (MoveParam texternalsym:$src)))),
-               (nvvm_move_ptr32  texternalsym:$src)>;
-
 def texsurf_handles
   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
               "mov.u64 \t$result, $src;", []>;
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 7e4a4d527fc903..16ecefff02e7bb 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -31,6 +31,15 @@ declare float @llvm.nvvm.bitcast.i2f(i32)
 declare i64 @llvm.nvvm.bitcast.d2ll(double)
 declare double @llvm.nvvm.bitcast.ll2d(i64)
 
+declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
+declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
+declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr)
+declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr)
+declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1))
+declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3))
+declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4))
+declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5))
+
 ; CHECK-LABEL: @simple_upgrade
 define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
 ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
@@ -138,5 +147,31 @@ define void @bitcast(i32 %a, i64 %b, float %c, double %d) {
   %r3 = call i64 @llvm.nvvm.bitcast.d2ll(double %d)
   %r4 = call double @llvm.nvvm.bitcast.ll2d(i64 %b)
 
+  ret void
+}
+
+; CHECK-LABEL: @addrspacecast
+define void @addrspacecast(ptr %p0) {
+; CHECK: %1 = addrspacecast ptr %p0 to ptr addrspace(1)
+; CHECK: %2 = addrspacecast ptr addrspace(1) %1 to ptr
+; CHECK: %3 = addrspacecast ptr %2 to ptr addrspace(3)
+; CHECK: %4 = addrspacecast ptr addrspace(3) %3 to ptr
+; CHECK: %5 = addrspacecast ptr %4 to ptr addrspace(4)
+; CHECK: %6 = addrspacecast ptr addrspace(4) %5 to ptr
+; CHECK: %7 = addrspacecast ptr %6 to ptr addrspace(5)
+; CHECK: %8 = addrspacecast ptr addrspace(5) %7 to ptr
+;
+  %p1 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %p0)
+  %p2 = call ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1) %p1)
+
+  %p3 = call ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr %p2)
+  %p4 = call ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3) %p3)
+
+  %p5 = call ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr %p4)
+  %p6 = call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) %p5)
+
+  %p7 = call ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr %p6)
+  %p8 = call ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5) %p7)
+
   ret void
 }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll
deleted file mode 100644
index 040bbde13800cd..00000000000000
--- a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt < %s -O3 -S | FileCheck %s
-
-; Address space intrinsics were erroneously marked NoCapture, leading to bad
-; optimizations (such as the store below being eliminated as dead code). This
-; test makes sure we don't regress.
-
-declare void @foo(ptr addrspace(1))
-
-declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
-
-; CHECK: @bar
-define void @bar() {
-  %t1 = alloca i32
-; CHECK: call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr nonnull %t1)
-; CHECK-NEXT: store i32 10, ptr %t1
-  %t2 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %t1)
-  store i32 10, ptr %t1
-  call void @foo(ptr addrspace(1) %t2)
-  ret void
-}
-



More information about the llvm-commits mailing list