[llvm] r274769 - NVPTX: Remove the legacy ptx intrinsics
Justin Bogner via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 7 09:40:17 PDT 2016
Author: bogner
Date: Thu Jul 7 11:40:17 2016
New Revision: 274769
URL: http://llvm.org/viewvc/llvm-project?rev=274769&view=rev
Log:
NVPTX: Remove the legacy ptx intrinsics
- Rename the ptx.read.* intrinsics to nvvm.read.ptx.sreg.* - some but
not all of these registers were already accessible via the nvvm
name.
- Rename ptx.bar.sync nvvm.bar.sync, to match nvvm.bar0.
There's a fair amount of code motion here, but it's all very
mechanical.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp
llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
llvm/trunk/test/CodeGen/NVPTX/bug22322.ll
llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td Thu Jul 7 11:40:17 2016
@@ -738,6 +738,10 @@ def llvm_anyi64ptr_ty : LLVMAnyPoint
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
+ def int_nvvm_bar_sync :
+ Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
+ GCCBuiltin<"__nvvm_bar_sync">;
+
// Membar
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
Intrinsic<[], [], []>;
@@ -746,53 +750,6 @@ def llvm_anyi64ptr_ty : LLVMAnyPoint
def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
Intrinsic<[], [], []>;
-
-// Accessing special registers
- def int_nvvm_read_ptx_sreg_tid_x :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
- def int_nvvm_read_ptx_sreg_tid_y :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
- def int_nvvm_read_ptx_sreg_tid_z :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
-
- def int_nvvm_read_ptx_sreg_ntid_x :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
- def int_nvvm_read_ptx_sreg_ntid_y :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
- def int_nvvm_read_ptx_sreg_ntid_z :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
-
- def int_nvvm_read_ptx_sreg_ctaid_x :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
- def int_nvvm_read_ptx_sreg_ctaid_y :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
- def int_nvvm_read_ptx_sreg_ctaid_z :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
-
- def int_nvvm_read_ptx_sreg_nctaid_x :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
- def int_nvvm_read_ptx_sreg_nctaid_y :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
- def int_nvvm_read_ptx_sreg_nctaid_z :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
-
- def int_nvvm_read_ptx_sreg_warpsize :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
-
-
// Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
@@ -3666,9 +3623,8 @@ def int_nvvm_swap_lo_hi_b64
GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
-// Old PTX back-end intrinsics retained here for backwards-compatibility
-
-multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
+// Accessing special registers.
+multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
// FIXME: Do we need the 128-bit integer type version?
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
@@ -3676,74 +3632,57 @@ multiclass PTXReadSpecialRegisterIntrins
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<!strconcat(prefix, "_x")>;
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<!strconcat(prefix, "_y")>;
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<!strconcat(prefix, "_z")>;
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<!strconcat(prefix, "_w")>;
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
}
-class PTXReadSpecialRegisterIntrinsic_r32<string name>
+class PTXReadSRegIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
-class PTXReadSpecialRegisterIntrinsic_r64<string name>
+class PTXReadSRegIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
+defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
+defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
-defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
- <"__builtin_ptx_read_tid">;
-defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
- <"__builtin_ptx_read_ntid">;
-
-def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_laneid">;
-def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_warpid">;
-def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_nwarpid">;
-
-defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
- <"__builtin_ptx_read_ctaid">;
-defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
- <"__builtin_ptx_read_nctaid">;
-
-def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_smid">;
-def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_nsmid">;
-def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_gridid">;
-
-def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_lanemask_eq">;
-def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_lanemask_le">;
-def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_lanemask_lt">;
-def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_lanemask_ge">;
-def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_lanemask_gt">;
-
-def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_clock">;
-def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
- <"__builtin_ptx_read_clock64">;
-
-def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_pm0">;
-def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_pm1">;
-def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_pm2">;
-def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
- <"__builtin_ptx_read_pm3">;
+def int_nvvm_read_ptx_sreg_laneid : PTXReadSRegIntrinsic_r32<"laneid">;
+def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32<"warpid">;
+def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32<"nwarpid">;
+
+defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<"ctaid">;
+defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<"nctaid">;
+
+def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32<"smid">;
+def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32<"nsmid">;
+def int_nvvm_read_ptx_sreg_gridid : PTXReadSRegIntrinsic_r32<"gridid">;
+
+def int_nvvm_read_ptx_sreg_lanemask_eq :
+ PTXReadSRegIntrinsic_r32<"lanemask_eq">;
+def int_nvvm_read_ptx_sreg_lanemask_le :
+ PTXReadSRegIntrinsic_r32<"lanemask_le">;
+def int_nvvm_read_ptx_sreg_lanemask_lt :
+ PTXReadSRegIntrinsic_r32<"lanemask_lt">;
+def int_nvvm_read_ptx_sreg_lanemask_ge :
+ PTXReadSRegIntrinsic_r32<"lanemask_ge">;
+def int_nvvm_read_ptx_sreg_lanemask_gt :
+ PTXReadSRegIntrinsic_r32<"lanemask_gt">;
+
+def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
+def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
+
+def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
+def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
+def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
+def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
-def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
- GCCBuiltin<"__builtin_ptx_bar_sync">;
+def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
//
// SHUFFLE
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td Thu Jul 7 11:40:17 2016
@@ -62,6 +62,9 @@ def INT_BARRIER0_OR : NVPTXInst<(outs In
!strconcat("}}", ""))))))),
[(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
+def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
+ [(int_nvvm_bar_sync imm:$i)]>;
+
// shfl.{up,down,bfly,idx}.b32
multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
// The last two parameters to shfl can be regs or imms. ptxas is smart
@@ -1375,44 +1378,6 @@ defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_A
".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
-//-----------------------------------
-// Read Special Registers
-//-----------------------------------
-class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
- NVPTXInst<(outs regclassOut:$dst), (ins),
- OpStr,
- [(set regclassOut:$dst, (IntOp))]>;
-
-def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
- int_nvvm_read_ptx_sreg_tid_x>;
-def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
- int_nvvm_read_ptx_sreg_tid_y>;
-def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
- int_nvvm_read_ptx_sreg_tid_z>;
-
-def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
- int_nvvm_read_ptx_sreg_ntid_x>;
-def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
- int_nvvm_read_ptx_sreg_ntid_y>;
-def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
- int_nvvm_read_ptx_sreg_ntid_z>;
-
-def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
- int_nvvm_read_ptx_sreg_ctaid_x>;
-def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
- int_nvvm_read_ptx_sreg_ctaid_y>;
-def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
- int_nvvm_read_ptx_sreg_ctaid_z>;
-
-def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
- int_nvvm_read_ptx_sreg_nctaid_x>;
-def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
- int_nvvm_read_ptx_sreg_nctaid_y>;
-def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
- int_nvvm_read_ptx_sreg_nctaid_z>;
-
-def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
- int_nvvm_read_ptx_sreg_warpsize>;
//-----------------------------------
@@ -7005,98 +6970,95 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+//-----------------------------------
+// Read Special Registers
+//-----------------------------------
-
-//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
-
-// These intrinsics are handled to retain compatibility with the old backend.
-
-// PTX Special Purpose Register Accessor Intrinsics
-
-class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+class PTX_READ_SREG_R64<string regname, Intrinsic intop>
: NVPTXInst<(outs Int64Regs:$d), (ins),
!strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
[(set Int64Regs:$d, (intop))]>;
-class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+class PTX_READ_SREG_R32<string regname, Intrinsic intop>
: NVPTXInst<(outs Int32Regs:$d), (ins),
!strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
[(set Int32Regs:$d, (intop))]>;
// TODO Add read vector-version of special registers
-def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
- int_ptx_read_tid_x>;
-def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
- int_ptx_read_tid_y>;
-def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
- int_ptx_read_tid_z>;
-def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
- int_ptx_read_tid_w>;
-
-def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
- int_ptx_read_ntid_x>;
-def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
- int_ptx_read_ntid_y>;
-def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
- int_ptx_read_ntid_z>;
-def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
- int_ptx_read_ntid_w>;
-
-def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
- int_ptx_read_laneid>;
-def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
- int_ptx_read_warpid>;
-def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
- int_ptx_read_nwarpid>;
-
-def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
- int_ptx_read_ctaid_x>;
-def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
- int_ptx_read_ctaid_y>;
-def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
- int_ptx_read_ctaid_z>;
-def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
- int_ptx_read_ctaid_w>;
-
-def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
- int_ptx_read_nctaid_x>;
-def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
- int_ptx_read_nctaid_y>;
-def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
- int_ptx_read_nctaid_z>;
-def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
- int_ptx_read_nctaid_w>;
-
-def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
- int_ptx_read_smid>;
-def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
- int_ptx_read_nsmid>;
-def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
- int_ptx_read_gridid>;
-
-def PTX_READ_LANEMASK_EQ
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
-def PTX_READ_LANEMASK_LE
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
-def PTX_READ_LANEMASK_LT
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
-def PTX_READ_LANEMASK_GE
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
-def PTX_READ_LANEMASK_GT
- : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
-
-def PTX_READ_CLOCK
- : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
-def PTX_READ_CLOCK64
- : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
-
-def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
-def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
-def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
-def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
-
-// PTX Parallel Synchronization and Communication Intrinsics
-
-def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
- [(int_ptx_bar_sync imm:$i)]>;
+def INT_PTX_SREG_TID_X :
+ PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
+def INT_PTX_SREG_TID_Y :
+ PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
+def INT_PTX_SREG_TID_Z :
+ PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
+def INT_PTX_SREG_TID_W :
+ PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
+
+def INT_PTX_SREG_NTID_X :
+ PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
+def INT_PTX_SREG_NTID_Y :
+ PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
+def INT_PTX_SREG_NTID_Z :
+ PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
+def INT_PTX_SREG_NTID_W :
+ PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
+
+def INT_PTX_SREG_LANEID :
+ PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
+def INT_PTX_SREG_WARPID :
+ PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
+def INT_PTX_SREG_NWARPID :
+ PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
+
+def INT_PTX_SREG_CTAID_X :
+ PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
+def INT_PTX_SREG_CTAID_Y :
+ PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
+def INT_PTX_SREG_CTAID_Z :
+ PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
+def INT_PTX_SREG_CTAID_W :
+ PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
+
+def INT_PTX_SREG_NCTAID_X :
+ PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
+def INT_PTX_SREG_NCTAID_Y :
+ PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
+def INT_PTX_SREG_NCTAID_Z :
+ PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
+def INT_PTX_SREG_NCTAID_W :
+ PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
+
+def INT_PTX_SREG_SMID :
+ PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
+def INT_PTX_SREG_NSMID :
+ PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
+def INT_PTX_SREG_GRIDID :
+ PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
+
+def INT_PTX_SREG_LANEMASK_EQ :
+ PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
+def INT_PTX_SREG_LANEMASK_LE :
+ PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
+def INT_PTX_SREG_LANEMASK_LT :
+ PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
+def INT_PTX_SREG_LANEMASK_GE :
+ PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
+def INT_PTX_SREG_LANEMASK_GT :
+ PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
+
+def INT_PTX_SREG_CLOCK :
+ PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
+def INT_PTX_SREG_CLOCK64 :
+ PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
+
+def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
+def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
+def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
+def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
+
+// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
+// handle the constant.
+def INT_PTX_SREG_WARPSIZE :
+ NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
+ [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp Thu Jul 7 11:40:17 2016
@@ -32,7 +32,7 @@ static bool readsThreadIndex(const Intri
}
static bool readsLaneId(const IntrinsicInst *II) {
- return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+ return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
}
// Whether the given intrinsic is an atomic instruction in PTX.
Modified: llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVVMIntrRange.cpp Thu Jul 7 11:40:17 2016
@@ -85,57 +85,45 @@ bool NVVMIntrRange::runOnFunction(Functi
if (Function *Callee = Call->getCalledFunction()) {
switch (Callee->getIntrinsicID()) {
// Index within block
- case Intrinsic::ptx_read_tid_x:
case Intrinsic::nvvm_read_ptx_sreg_tid_x:
Changed |= addRangeMetadata(0, MaxBlockSize.x, Call);
break;
- case Intrinsic::ptx_read_tid_y:
case Intrinsic::nvvm_read_ptx_sreg_tid_y:
Changed |= addRangeMetadata(0, MaxBlockSize.y, Call);
break;
- case Intrinsic::ptx_read_tid_z:
case Intrinsic::nvvm_read_ptx_sreg_tid_z:
Changed |= addRangeMetadata(0, MaxBlockSize.z, Call);
break;
// Block size
- case Intrinsic::ptx_read_ntid_x:
case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call);
break;
- case Intrinsic::ptx_read_ntid_y:
case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call);
break;
- case Intrinsic::ptx_read_ntid_z:
case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call);
break;
// Index within grid
- case Intrinsic::ptx_read_ctaid_x:
case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
Changed |= addRangeMetadata(0, MaxGridSize.x, Call);
break;
- case Intrinsic::ptx_read_ctaid_y:
case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
Changed |= addRangeMetadata(0, MaxGridSize.y, Call);
break;
- case Intrinsic::ptx_read_ctaid_z:
case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
Changed |= addRangeMetadata(0, MaxGridSize.z, Call);
break;
// Grid size
- case Intrinsic::ptx_read_nctaid_x:
case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call);
break;
- case Intrinsic::ptx_read_nctaid_y:
case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call);
break;
- case Intrinsic::ptx_read_nctaid_z:
case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call);
break;
@@ -146,7 +134,7 @@ bool NVVMIntrRange::runOnFunction(Functi
break;
// Lane ID is [0..warpsize)
- case Intrinsic::ptx_read_laneid:
+ case Intrinsic::nvvm_read_ptx_sreg_laneid:
Changed |= addRangeMetadata(0, 32, Call);
break;
Modified: llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll (original)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll Thu Jul 7 11:40:17 2016
@@ -100,7 +100,7 @@ merge:
define i32 @loop() {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
entry:
- %laneid = call i32 @llvm.ptx.read.laneid()
+ %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
@@ -208,7 +208,7 @@ bb3:
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
-declare i32 @llvm.ptx.read.laneid()
+declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
Modified: llvm/trunk/test/CodeGen/NVPTX/bug22322.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug22322.ll?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug22322.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/bug22322.ll Thu Jul 7 11:40:17 2016
@@ -10,10 +10,10 @@ target triple = "nvptx64-nvidia-cuda"
define void @some_kernel(%class.float3* nocapture %dst) #0 {
_ZL11compute_vecRK6float3jb.exit:
%ret_vec.sroa.8.i = alloca float, align 4
- %0 = tail call i32 @llvm.ptx.read.ctaid.x()
- %1 = tail call i32 @llvm.ptx.read.ntid.x()
+ %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+ %1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
%2 = mul nsw i32 %1, %0
- %3 = tail call i32 @llvm.ptx.read.tid.x()
+ %3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%4 = add nsw i32 %2, %3
%5 = zext i32 %4 to i64
%6 = bitcast float* %ret_vec.sroa.8.i to i8*
@@ -37,13 +37,13 @@ _ZL11compute_vecRK6float3jb.exit:
}
; Function Attrs: nounwind readnone
-declare i32 @llvm.ptx.read.ctaid.x() #1
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #1
; Function Attrs: nounwind readnone
-declare i32 @llvm.ptx.read.ntid.x() #1
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #1
; Function Attrs: nounwind readnone
-declare i32 @llvm.ptx.read.tid.x() #1
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #2
Modified: llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll?rev=274769&r1=274768&r2=274769&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll Thu Jul 7 11:40:17 2016
@@ -8,71 +8,71 @@
define ptx_device i32 @test_tid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
-; RANGE: call i32 @llvm.ptx.read.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.tid.x()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
ret i32 %x
}
define ptx_device i32 @test_tid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
-; RANGE: call i32 @llvm.ptx.read.tid.y(), !range ![[BLK_IDX_XY]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.tid.y()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
ret i32 %x
}
define ptx_device i32 @test_tid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
-; RANGE: call i32 @llvm.ptx.read.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.tid.z()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
ret i32 %x
}
define ptx_device i32 @test_tid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.tid.w()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
ret i32 %x
}
define ptx_device i32 @test_ntid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
-; RANGE: call i32 @llvm.ptx.read.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ntid.x()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
ret i32 %x
}
define ptx_device i32 @test_ntid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
-; RANGE: call i32 @llvm.ptx.read.ntid.y(), !range ![[BLK_SIZE_XY]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ntid.y()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
ret i32 %x
}
define ptx_device i32 @test_ntid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
-; RANGE: call i32 @llvm.ptx.read.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ntid.z()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
ret i32 %x
}
define ptx_device i32 @test_ntid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ntid.w()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
ret i32 %x
}
define ptx_device i32 @test_laneid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
-; RANGE: call i32 @llvm.ptx.read.laneid(), !range ![[LANEID:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.laneid()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
ret i32 %x
}
@@ -87,71 +87,71 @@ define ptx_device i32 @test_warpsize() {
define ptx_device i32 @test_warpid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.warpid()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
ret i32 %x
}
define ptx_device i32 @test_nwarpid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.nwarpid()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
ret i32 %x
}
define ptx_device i32 @test_ctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
-; RANGE: call i32 @llvm.ptx.read.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ctaid.y()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
ret i32 %x
}
define ptx_device i32 @test_ctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
-; RANGE: call i32 @llvm.ptx.read.ctaid.z(), !range ![[GRID_IDX_YZ]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ctaid.z()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
ret i32 %x
}
define ptx_device i32 @test_ctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
-; RANGE_30: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_YZ]]
+; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
+; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ctaid.x()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
ret i32 %x
}
define ptx_device i32 @test_ctaid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.ctaid.w()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
ret i32 %x
}
define ptx_device i32 @test_nctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
-; RANGE: call i32 @llvm.ptx.read.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.nctaid.y()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
ret i32 %x
}
define ptx_device i32 @test_nctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
-; RANGE: call i32 @llvm.ptx.read.nctaid.z(), !range ![[GRID_SIZE_YZ]]
+; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.nctaid.z()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
ret i32 %x
}
define ptx_device i32 @test_nctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
-; RANGE_30: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
-; RANGE_20: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_YZ]]
+; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
+; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.nctaid.x()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
ret i32 %x
}
@@ -159,157 +159,157 @@ define ptx_device i32 @test_nctaid_x() {
define ptx_device i32 @test_nctaid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.nctaid.w()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
ret i32 %x
}
define ptx_device i32 @test_smid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.smid()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
ret i32 %x
}
define ptx_device i32 @test_nsmid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.nsmid()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
ret i32 %x
}
define ptx_device i32 @test_gridid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.gridid()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
ret i32 %x
}
define ptx_device i32 @test_lanemask_eq() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.lanemask.eq()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
ret i32 %x
}
define ptx_device i32 @test_lanemask_le() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.lanemask.le()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
ret i32 %x
}
define ptx_device i32 @test_lanemask_lt() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.lanemask.lt()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
ret i32 %x
}
define ptx_device i32 @test_lanemask_ge() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.lanemask.ge()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
ret i32 %x
}
define ptx_device i32 @test_lanemask_gt() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.lanemask.gt()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
ret i32 %x
}
define ptx_device i32 @test_clock() {
; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.clock()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
ret i32 %x
}
define ptx_device i64 @test_clock64() {
; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
; CHECK: ret;
- %x = call i64 @llvm.ptx.read.clock64()
+ %x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
ret i64 %x
}
define ptx_device i32 @test_pm0() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.pm0()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
ret i32 %x
}
define ptx_device i32 @test_pm1() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.pm1()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
ret i32 %x
}
define ptx_device i32 @test_pm2() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.pm2()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
ret i32 %x
}
define ptx_device i32 @test_pm3() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
; CHECK: ret;
- %x = call i32 @llvm.ptx.read.pm3()
+ %x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
ret i32 %x
}
define ptx_device void @test_bar_sync() {
; CHECK: bar.sync 0
; CHECK: ret;
- call void @llvm.ptx.bar.sync(i32 0)
+ call void @llvm.nvvm.bar.sync(i32 0)
ret void
}
-declare i32 @llvm.ptx.read.tid.x()
-declare i32 @llvm.ptx.read.tid.y()
-declare i32 @llvm.ptx.read.tid.z()
-declare i32 @llvm.ptx.read.tid.w()
-declare i32 @llvm.ptx.read.ntid.x()
-declare i32 @llvm.ptx.read.ntid.y()
-declare i32 @llvm.ptx.read.ntid.z()
-declare i32 @llvm.ptx.read.ntid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-declare i32 @llvm.ptx.read.laneid()
-declare i32 @llvm.ptx.read.warpid()
-declare i32 @llvm.ptx.read.nwarpid()
-
-declare i32 @llvm.ptx.read.ctaid.x()
-declare i32 @llvm.ptx.read.ctaid.y()
-declare i32 @llvm.ptx.read.ctaid.z()
-declare i32 @llvm.ptx.read.ctaid.w()
-declare i32 @llvm.ptx.read.nctaid.x()
-declare i32 @llvm.ptx.read.nctaid.y()
-declare i32 @llvm.ptx.read.nctaid.z()
-declare i32 @llvm.ptx.read.nctaid.w()
-
-declare i32 @llvm.ptx.read.smid()
-declare i32 @llvm.ptx.read.nsmid()
-declare i32 @llvm.ptx.read.gridid()
-
-declare i32 @llvm.ptx.read.lanemask.eq()
-declare i32 @llvm.ptx.read.lanemask.le()
-declare i32 @llvm.ptx.read.lanemask.lt()
-declare i32 @llvm.ptx.read.lanemask.ge()
-declare i32 @llvm.ptx.read.lanemask.gt()
-
-declare i32 @llvm.ptx.read.clock()
-declare i64 @llvm.ptx.read.clock64()
-
-declare i32 @llvm.ptx.read.pm0()
-declare i32 @llvm.ptx.read.pm1()
-declare i32 @llvm.ptx.read.pm2()
-declare i32 @llvm.ptx.read.pm3()
+declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
+declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
+declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.smid()
+declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
+declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
+declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.clock()
+declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
+
+declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
+declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
+declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
+declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
-declare void @llvm.ptx.bar.sync(i32 %i)
+declare void @llvm.nvvm.bar.sync(i32 %i)
; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
More information about the llvm-commits
mailing list