[llvm] r274769 - NVPTX: Remove the legacy ptx intrinsics

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 7 09:40:17 PDT 2016


Author: bogner
Date: Thu Jul  7 11:40:17 2016
New Revision: 274769

URL: http://llvm.org/viewvc/llvm-project?rev=274769&view=rev
Log:
NVPTX: Remove the legacy ptx intrinsics

- Rename the ptx.read.* intrinsics to nvvm.read.ptx.sreg.* - some but
  not all of these registers were already accessible via the nvvm
  name.
- Rename ptx.bar.sync nvvm.bar.sync, to match nvvm.bar0.

There's a fair amount of code motion here, but it's all very
mechanical.

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
    llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
    llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp
    llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
    llvm/trunk/test/CodeGen/NVPTX/bug22322.ll
    llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td Thu Jul  7 11:40:17 2016
@@ -738,6 +738,10 @@ def llvm_anyi64ptr_ty     : LLVMAnyPoint
   def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
 
+  def int_nvvm_bar_sync :
+      Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
+      GCCBuiltin<"__nvvm_bar_sync">;
+
   // Membar
   def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
       Intrinsic<[], [], []>;
@@ -746,53 +750,6 @@ def llvm_anyi64ptr_ty     : LLVMAnyPoint
   def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
       Intrinsic<[], [], []>;
 
-
-// Accessing special registers
-  def int_nvvm_read_ptx_sreg_tid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
-  def int_nvvm_read_ptx_sreg_tid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
-  def int_nvvm_read_ptx_sreg_tid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
-
-  def int_nvvm_read_ptx_sreg_ntid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
-  def int_nvvm_read_ptx_sreg_ntid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
-  def int_nvvm_read_ptx_sreg_ntid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
-
-  def int_nvvm_read_ptx_sreg_ctaid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
-  def int_nvvm_read_ptx_sreg_ctaid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
-  def int_nvvm_read_ptx_sreg_ctaid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
-
-  def int_nvvm_read_ptx_sreg_nctaid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
-  def int_nvvm_read_ptx_sreg_nctaid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
-  def int_nvvm_read_ptx_sreg_nctaid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
-
-  def int_nvvm_read_ptx_sreg_warpsize :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
-
-
 // Generated within nvvm. Use for ldu on sm_20 or later.  Second arg is the
 // pointer's alignment.
 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
@@ -3666,9 +3623,8 @@ def int_nvvm_swap_lo_hi_b64
               GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
 
 
-// Old PTX back-end intrinsics retained here for backwards-compatibility
-
-multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
+// Accessing special registers.
+multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
 // FIXME: Do we need the 128-bit integer type version?
 //    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem]>;
 
@@ -3676,74 +3632,57 @@ multiclass PTXReadSpecialRegisterIntrins
 //    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
 
   def _x     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_x")>;
+               GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
   def _y     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_y")>;
+               GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
   def _z     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_z")>;
+               GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
   def _w     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_w")>;
+               GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
 }
 
-class PTXReadSpecialRegisterIntrinsic_r32<string name>
+class PTXReadSRegIntrinsic_r32<string name>
   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-    GCCBuiltin<name>;
+    GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
 
-class PTXReadSpecialRegisterIntrinsic_r64<string name>
+class PTXReadSRegIntrinsic_r64<string name>
   : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
-    GCCBuiltin<name>;
+    GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
+defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
+defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
 
-defm int_ptx_read_tid        : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_tid">;
-defm int_ptx_read_ntid       : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_ntid">;
-
-def int_ptx_read_laneid      : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_laneid">;
-def int_ptx_read_warpid      : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_warpid">;
-def int_ptx_read_nwarpid     : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_nwarpid">;
-
-defm int_ptx_read_ctaid      : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_ctaid">;
-defm int_ptx_read_nctaid     : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_nctaid">;
-
-def int_ptx_read_smid        : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_smid">;
-def int_ptx_read_nsmid       : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_nsmid">;
-def int_ptx_read_gridid      : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_gridid">;
-
-def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_eq">;
-def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_le">;
-def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_lt">;
-def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_ge">;
-def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_gt">;
-
-def int_ptx_read_clock       : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_clock">;
-def int_ptx_read_clock64     : PTXReadSpecialRegisterIntrinsic_r64
-                               <"__builtin_ptx_read_clock64">;
-
-def int_ptx_read_pm0         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm0">;
-def int_ptx_read_pm1         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm1">;
-def int_ptx_read_pm2         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm2">;
-def int_ptx_read_pm3         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm3">;
+def int_nvvm_read_ptx_sreg_laneid : PTXReadSRegIntrinsic_r32<"laneid">;
+def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32<"warpid">;
+def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32<"nwarpid">;
+
+defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<"ctaid">;
+defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<"nctaid">;
+
+def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32<"smid">;
+def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32<"nsmid">;
+def int_nvvm_read_ptx_sreg_gridid : PTXReadSRegIntrinsic_r32<"gridid">;
+
+def int_nvvm_read_ptx_sreg_lanemask_eq :
+    PTXReadSRegIntrinsic_r32<"lanemask_eq">;
+def int_nvvm_read_ptx_sreg_lanemask_le :
+    PTXReadSRegIntrinsic_r32<"lanemask_le">;
+def int_nvvm_read_ptx_sreg_lanemask_lt :
+    PTXReadSRegIntrinsic_r32<"lanemask_lt">;
+def int_nvvm_read_ptx_sreg_lanemask_ge :
+    PTXReadSRegIntrinsic_r32<"lanemask_ge">;
+def int_nvvm_read_ptx_sreg_lanemask_gt :
+    PTXReadSRegIntrinsic_r32<"lanemask_gt">;
+
+def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
+def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
+
+def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
+def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
+def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
+def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
 
-def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
-                       GCCBuiltin<"__builtin_ptx_bar_sync">;
+def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
 
 //
 // SHUFFLE

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td Thu Jul  7 11:40:17 2016
@@ -62,6 +62,9 @@ def INT_BARRIER0_OR : NVPTXInst<(outs In
         !strconcat("}}", ""))))))),
       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
 
+def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
+                             [(int_nvvm_bar_sync imm:$i)]>;
+
 // shfl.{up,down,bfly,idx}.b32
 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
   // The last two parameters to shfl can be regs or imms.  ptxas is smart
@@ -1375,44 +1378,6 @@ defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_A
   ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
 
 
-//-----------------------------------
-// Read Special Registers
-//-----------------------------------
-class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
-      NVPTXInst<(outs regclassOut:$dst), (ins),
-               OpStr,
-         [(set regclassOut:$dst, (IntOp))]>;
-
-def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
-  int_nvvm_read_ptx_sreg_tid_x>;
-def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
-  int_nvvm_read_ptx_sreg_tid_y>;
-def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
-  int_nvvm_read_ptx_sreg_tid_z>;
-
-def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
-  int_nvvm_read_ptx_sreg_ntid_x>;
-def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
-  int_nvvm_read_ptx_sreg_ntid_y>;
-def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
-  int_nvvm_read_ptx_sreg_ntid_z>;
-
-def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
-  int_nvvm_read_ptx_sreg_ctaid_x>;
-def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
-  int_nvvm_read_ptx_sreg_ctaid_y>;
-def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
-  int_nvvm_read_ptx_sreg_ctaid_z>;
-
-def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
-  int_nvvm_read_ptx_sreg_nctaid_x>;
-def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
-  int_nvvm_read_ptx_sreg_nctaid_y>;
-def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
-  int_nvvm_read_ptx_sreg_nctaid_z>;
-
-def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
-  int_nvvm_read_ptx_sreg_warpsize>;
 
 
 //-----------------------------------
@@ -7005,98 +6970,95 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
+//-----------------------------------
+// Read Special Registers
+//-----------------------------------
 
-
-//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
-
-// These intrinsics are handled to retain compatibility with the old backend.
-
-// PTX Special Purpose Register Accessor Intrinsics
-
-class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+class PTX_READ_SREG_R64<string regname, Intrinsic intop>
   : NVPTXInst<(outs Int64Regs:$d), (ins),
               !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
               [(set Int64Regs:$d, (intop))]>;
 
-class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+class PTX_READ_SREG_R32<string regname, Intrinsic intop>
   : NVPTXInst<(outs Int32Regs:$d), (ins),
               !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
               [(set Int32Regs:$d, (intop))]>;
 
 // TODO Add read vector-version of special registers
 
-def PTX_READ_TID_X   : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
-                                                     int_ptx_read_tid_x>;
-def PTX_READ_TID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
-                                                     int_ptx_read_tid_y>;
-def PTX_READ_TID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
-                                                     int_ptx_read_tid_z>;
-def PTX_READ_TID_W   : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
-                                                     int_ptx_read_tid_w>;
-
-def PTX_READ_NTID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
-                                                      int_ptx_read_ntid_x>;
-def PTX_READ_NTID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
-                                                      int_ptx_read_ntid_y>;
-def PTX_READ_NTID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
-                                                      int_ptx_read_ntid_z>;
-def PTX_READ_NTID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
-                                                      int_ptx_read_ntid_w>;
-
-def PTX_READ_LANEID  : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
-                                                     int_ptx_read_laneid>;
-def PTX_READ_WARPID  : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
-                                                     int_ptx_read_warpid>;
-def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
-                                                     int_ptx_read_nwarpid>;
-
-def PTX_READ_CTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
-                                                       int_ptx_read_ctaid_x>;
-def PTX_READ_CTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
-                                                       int_ptx_read_ctaid_y>;
-def PTX_READ_CTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
-                                                       int_ptx_read_ctaid_z>;
-def PTX_READ_CTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
-                                                       int_ptx_read_ctaid_w>;
-
-def PTX_READ_NCTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
-                                                        int_ptx_read_nctaid_x>;
-def PTX_READ_NCTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
-                                                        int_ptx_read_nctaid_y>;
-def PTX_READ_NCTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
-                                                        int_ptx_read_nctaid_z>;
-def PTX_READ_NCTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
-                                                        int_ptx_read_nctaid_w>;
-
-def PTX_READ_SMID  : PTX_READ_SPECIAL_REGISTER_R32<"smid",
-                                                   int_ptx_read_smid>;
-def PTX_READ_NSMID  : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
-                                                    int_ptx_read_nsmid>;
-def PTX_READ_GRIDID  : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
-                                                     int_ptx_read_gridid>;
-
-def PTX_READ_LANEMASK_EQ
-  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
-def PTX_READ_LANEMASK_LE
-  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
-def PTX_READ_LANEMASK_LT
-  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
-def PTX_READ_LANEMASK_GE
-  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
-def PTX_READ_LANEMASK_GT
-  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
-
-def PTX_READ_CLOCK
-  : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
-def PTX_READ_CLOCK64
-  : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
-
-def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
-def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
-def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
-def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
-
-// PTX Parallel Synchronization and Communication Intrinsics
-
-def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
-                             [(int_ptx_bar_sync imm:$i)]>;
+def INT_PTX_SREG_TID_X :
+    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
+def INT_PTX_SREG_TID_Y :
+    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
+def INT_PTX_SREG_TID_Z :
+    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
+def INT_PTX_SREG_TID_W :
+    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
+
+def INT_PTX_SREG_NTID_X :
+    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
+def INT_PTX_SREG_NTID_Y :
+    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
+def INT_PTX_SREG_NTID_Z :
+    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
+def INT_PTX_SREG_NTID_W :
+    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
+
+def INT_PTX_SREG_LANEID :
+    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
+def INT_PTX_SREG_WARPID :
+    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
+def INT_PTX_SREG_NWARPID :
+    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
+
+def INT_PTX_SREG_CTAID_X :
+    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
+def INT_PTX_SREG_CTAID_Y :
+    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
+def INT_PTX_SREG_CTAID_Z :
+    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
+def INT_PTX_SREG_CTAID_W :
+    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
+
+def INT_PTX_SREG_NCTAID_X :
+    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
+def INT_PTX_SREG_NCTAID_Y :
+    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
+def INT_PTX_SREG_NCTAID_Z :
+    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
+def INT_PTX_SREG_NCTAID_W :
+    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
+
+def INT_PTX_SREG_SMID :
+    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
+def INT_PTX_SREG_NSMID :
+    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
+def INT_PTX_SREG_GRIDID :
+    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
+
+def INT_PTX_SREG_LANEMASK_EQ :
+    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
+def INT_PTX_SREG_LANEMASK_LE :
+    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
+def INT_PTX_SREG_LANEMASK_LT :
+    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
+def INT_PTX_SREG_LANEMASK_GE :
+    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
+def INT_PTX_SREG_LANEMASK_GT :
+    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
+
+def INT_PTX_SREG_CLOCK :
+    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
+def INT_PTX_SREG_CLOCK64 :
+    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
+
+def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
+def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
+def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
+def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
+
+// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
+// handle the constant.
+def INT_PTX_SREG_WARPSIZE :
+    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
+              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp Thu Jul  7 11:40:17 2016
@@ -32,7 +32,7 @@ static bool readsThreadIndex(const Intri
 }
 
 static bool readsLaneId(const IntrinsicInst *II) {
-  return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+  return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
 }
 
 // Whether the given intrinsic is an atomic instruction in PTX.

Modified: llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp Thu Jul  7 11:40:17 2016
@@ -85,57 +85,45 @@ bool NVVMIntrRange::runOnFunction(Functi
     if (Function *Callee = Call->getCalledFunction()) {
       switch (Callee->getIntrinsicID()) {
       // Index within block
-      case Intrinsic::ptx_read_tid_x:
       case Intrinsic::nvvm_read_ptx_sreg_tid_x:
         Changed |= addRangeMetadata(0, MaxBlockSize.x, Call);
         break;
-      case Intrinsic::ptx_read_tid_y:
       case Intrinsic::nvvm_read_ptx_sreg_tid_y:
         Changed |= addRangeMetadata(0, MaxBlockSize.y, Call);
         break;
-      case Intrinsic::ptx_read_tid_z:
       case Intrinsic::nvvm_read_ptx_sreg_tid_z:
         Changed |= addRangeMetadata(0, MaxBlockSize.z, Call);
         break;
 
       // Block size
-      case Intrinsic::ptx_read_ntid_x:
       case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
         Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call);
         break;
-      case Intrinsic::ptx_read_ntid_y:
       case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
         Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call);
         break;
-      case Intrinsic::ptx_read_ntid_z:
       case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
         Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call);
         break;
 
       // Index within grid
-      case Intrinsic::ptx_read_ctaid_x:
       case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
         Changed |= addRangeMetadata(0, MaxGridSize.x, Call);
         break;
-      case Intrinsic::ptx_read_ctaid_y:
       case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
         Changed |= addRangeMetadata(0, MaxGridSize.y, Call);
         break;
-      case Intrinsic::ptx_read_ctaid_z:
       case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
         Changed |= addRangeMetadata(0, MaxGridSize.z, Call);
         break;
 
       // Grid size
-      case Intrinsic::ptx_read_nctaid_x:
       case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
         Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call);
         break;
-      case Intrinsic::ptx_read_nctaid_y:
       case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
         Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call);
         break;
-      case Intrinsic::ptx_read_nctaid_z:
       case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
         Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call);
         break;
@@ -146,7 +134,7 @@ bool NVVMIntrRange::runOnFunction(Functi
         break;
 
       // Lane ID is [0..warpsize)
-      case Intrinsic::ptx_read_laneid:
+      case Intrinsic::nvvm_read_ptx_sreg_laneid:
         Changed |= addRangeMetadata(0, 32, Call);
         break;
 

Modified: llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll (original)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll Thu Jul  7 11:40:17 2016
@@ -100,7 +100,7 @@ merge:
 define i32 @loop() {
 ; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
 entry:
-  %laneid = call i32 @llvm.ptx.read.laneid()
+  %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
   br label %loop
 loop:
   %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
@@ -208,7 +208,7 @@ bb3:
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
-declare i32 @llvm.ptx.read.laneid()
+declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
 
 !nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
 !0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}

Modified: llvm/trunk/test/CodeGen/NVPTX/bug22322.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug22322.ll?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug22322.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/bug22322.ll Thu Jul  7 11:40:17 2016
@@ -10,10 +10,10 @@ target triple = "nvptx64-nvidia-cuda"
 define void @some_kernel(%class.float3* nocapture %dst) #0 {
 _ZL11compute_vecRK6float3jb.exit:
   %ret_vec.sroa.8.i = alloca float, align 4
-  %0 = tail call i32 @llvm.ptx.read.ctaid.x()
-  %1 = tail call i32 @llvm.ptx.read.ntid.x()
+  %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+  %1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
   %2 = mul nsw i32 %1, %0
-  %3 = tail call i32 @llvm.ptx.read.tid.x()
+  %3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
   %4 = add nsw i32 %2, %3
   %5 = zext i32 %4 to i64
   %6 = bitcast float* %ret_vec.sroa.8.i to i8*
@@ -37,13 +37,13 @@ _ZL11compute_vecRK6float3jb.exit:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.ptx.read.ctaid.x() #1
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.ptx.read.ntid.x() #1
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.ptx.read.tid.x() #1
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
 
 ; Function Attrs: nounwind
 declare void @llvm.lifetime.start(i64, i8* nocapture) #2

Modified: llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll Thu Jul  7 11:40:17 2016
@@ -8,71 +8,71 @@
 
 define ptx_device i32 @test_tid_x() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
-; RANGE: call i32 @llvm.ptx.read.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.tid.x()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_y() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
-; RANGE: call i32 @llvm.ptx.read.tid.y(), !range ![[BLK_IDX_XY]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.tid.y()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_z() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
-; RANGE: call i32 @llvm.ptx.read.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.tid.z()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_w() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.tid.w()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_x() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
-; RANGE: call i32 @llvm.ptx.read.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ntid.x()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_y() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
-; RANGE: call i32 @llvm.ptx.read.ntid.y(), !range ![[BLK_SIZE_XY]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ntid.y()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_z() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
-; RANGE: call i32 @llvm.ptx.read.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ntid.z()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_w() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ntid.w()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_laneid() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
-; RANGE: call i32 @llvm.ptx.read.laneid(), !range ![[LANEID:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.laneid()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
 	ret i32 %x
 }
 
@@ -87,71 +87,71 @@ define ptx_device i32 @test_warpsize() {
 define ptx_device i32 @test_warpid() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.warpid()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nwarpid() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.nwarpid()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_y() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
-; RANGE: call i32 @llvm.ptx.read.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ctaid.y()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_z() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
-; RANGE: call i32 @llvm.ptx.read.ctaid.z(), !range ![[GRID_IDX_YZ]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ctaid.z()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_x() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
-; RANGE_30: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_YZ]]
+; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
+; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ctaid.x()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_w() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.ctaid.w()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_y() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
-; RANGE: call i32 @llvm.ptx.read.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.nctaid.y()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_z() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
-; RANGE: call i32 @llvm.ptx.read.nctaid.z(), !range ![[GRID_SIZE_YZ]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.nctaid.z()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_x() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
-; RANGE_30: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_YZ]]
+; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
+; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.nctaid.x()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
 	ret i32 %x
 }
 
@@ -159,157 +159,157 @@ define ptx_device i32 @test_nctaid_x() {
 define ptx_device i32 @test_nctaid_w() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.nctaid.w()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_smid() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.smid()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nsmid() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.nsmid()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_gridid() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.gridid()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_eq() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.lanemask.eq()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_le() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.lanemask.le()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_lt() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.lanemask.lt()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_ge() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.lanemask.ge()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_gt() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.lanemask.gt()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_clock() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.clock()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
 	ret i32 %x
 }
 
 define ptx_device i64 @test_clock64() {
 ; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
 ; CHECK: ret;
-	%x = call i64 @llvm.ptx.read.clock64()
+	%x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
 	ret i64 %x
 }
 
 define ptx_device i32 @test_pm0() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.pm0()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm1() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.pm1()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm2() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.pm2()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm3() {
 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
 ; CHECK: ret;
-	%x = call i32 @llvm.ptx.read.pm3()
+	%x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
 	ret i32 %x
 }
 
 define ptx_device void @test_bar_sync() {
 ; CHECK: bar.sync 0
 ; CHECK: ret;
-	call void @llvm.ptx.bar.sync(i32 0)
+	call void @llvm.nvvm.bar.sync(i32 0)
 	ret void
 }
 
-declare i32 @llvm.ptx.read.tid.x()
-declare i32 @llvm.ptx.read.tid.y()
-declare i32 @llvm.ptx.read.tid.z()
-declare i32 @llvm.ptx.read.tid.w()
-declare i32 @llvm.ptx.read.ntid.x()
-declare i32 @llvm.ptx.read.ntid.y()
-declare i32 @llvm.ptx.read.ntid.z()
-declare i32 @llvm.ptx.read.ntid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
 
 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-declare i32 @llvm.ptx.read.laneid()
-declare i32 @llvm.ptx.read.warpid()
-declare i32 @llvm.ptx.read.nwarpid()
-
-declare i32 @llvm.ptx.read.ctaid.x()
-declare i32 @llvm.ptx.read.ctaid.y()
-declare i32 @llvm.ptx.read.ctaid.z()
-declare i32 @llvm.ptx.read.ctaid.w()
-declare i32 @llvm.ptx.read.nctaid.x()
-declare i32 @llvm.ptx.read.nctaid.y()
-declare i32 @llvm.ptx.read.nctaid.z()
-declare i32 @llvm.ptx.read.nctaid.w()
-
-declare i32 @llvm.ptx.read.smid()
-declare i32 @llvm.ptx.read.nsmid()
-declare i32 @llvm.ptx.read.gridid()
-
-declare i32 @llvm.ptx.read.lanemask.eq()
-declare i32 @llvm.ptx.read.lanemask.le()
-declare i32 @llvm.ptx.read.lanemask.lt()
-declare i32 @llvm.ptx.read.lanemask.ge()
-declare i32 @llvm.ptx.read.lanemask.gt()
-
-declare i32 @llvm.ptx.read.clock()
-declare i64 @llvm.ptx.read.clock64()
-
-declare i32 @llvm.ptx.read.pm0()
-declare i32 @llvm.ptx.read.pm1()
-declare i32 @llvm.ptx.read.pm2()
-declare i32 @llvm.ptx.read.pm3()
+declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
+declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
+declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.smid()
+declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
+declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.clock()
+declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
+declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
+declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
+declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
 
-declare void @llvm.ptx.bar.sync(i32 %i)
+declare void @llvm.nvvm.bar.sync(i32 %i)
 
 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
 ; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}




More information about the llvm-commits mailing list