[llvm-commits] [llvm] r156196 [1/2] - in /llvm/trunk: ./ autoconf/ include/llvm/ include/llvm/ADT/ lib/Support/ lib/Target/ lib/Target/NVPTX/ lib/Target/NVPTX/InstPrinter/ lib/Target/NVPTX/MCTargetDesc/ lib/Target/NVPTX/TargetInfo/ projects/sampl

David Blaikie dblaikie at gmail.com
Mon May 14 14:48:16 PDT 2012


On Fri, May 4, 2012 at 1:18 PM, Justin Holewinski
<jholewinski at nvidia.com> wrote:
> Author: jholewinski
> Date: Fri May  4 15:18:50 2012
> New Revision: 156196
>
> URL: http://llvm.org/viewvc/llvm-project?rev=156196&view=rev
> Log:
> This patch adds a new NVPTX back-end to LLVM which supports code generation for NVIDIA PTX 3.0. This back-end will (eventually) replace the current PTX back-end, while maintaining compatibility with it.
>
> The new target machines are:
>
> nvptx (old ptx32) => 32-bit PTX
> nvptx64 (old ptx64) => 64-bit PTX
>
> The sources are based on the internal NVIDIA NVPTX back-end, and
> contain more functionality than the current PTX back-end currently
> provides.
>
> NV_CONTRIB
>
> Added:
>    llvm/trunk/include/llvm/IntrinsicsNVVM.td
>    llvm/trunk/lib/Target/NVPTX/
>    llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
>    llvm/trunk/lib/Target/NVPTX/InstPrinter/
>    llvm/trunk/lib/Target/NVPTX/InstPrinter/CMakeLists.txt
>    llvm/trunk/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
>    llvm/trunk/lib/Target/NVPTX/InstPrinter/Makefile
>    llvm/trunk/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
>    llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/Makefile
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
>    llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
>    llvm/trunk/lib/Target/NVPTX/Makefile
>    llvm/trunk/lib/Target/NVPTX/ManagedStringPool.h
>    llvm/trunk/lib/Target/NVPTX/NVPTX.h
>    llvm/trunk/lib/Target/NVPTX/NVPTX.td
>    llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXInstrFormats.td
>    llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
>    llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
>    llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXNumRegisters.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.td
>    llvm/trunk/lib/Target/NVPTX/NVPTXSection.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXTargetObjectFile.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.h
>    llvm/trunk/lib/Target/NVPTX/NVPTXVector.td
>    llvm/trunk/lib/Target/NVPTX/NVPTXutil.cpp
>    llvm/trunk/lib/Target/NVPTX/NVPTXutil.h
>    llvm/trunk/lib/Target/NVPTX/TargetInfo/
>    llvm/trunk/lib/Target/NVPTX/TargetInfo/CMakeLists.txt
>    llvm/trunk/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
>    llvm/trunk/lib/Target/NVPTX/TargetInfo/Makefile
>    llvm/trunk/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
>    llvm/trunk/lib/Target/NVPTX/VectorElementize.cpp
>    llvm/trunk/lib/Target/NVPTX/cl_common_defines.h
>    llvm/trunk/lib/Target/NVPTX/gen-register-defs.py
>    llvm/trunk/test/CodeGen/NVPTX/
>    llvm/trunk/test/CodeGen/NVPTX/annotations.ll
>    llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
>    llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
>    llvm/trunk/test/CodeGen/NVPTX/arithmetic-int.ll
>    llvm/trunk/test/CodeGen/NVPTX/calling-conv.ll
>    llvm/trunk/test/CodeGen/NVPTX/compare-int.ll
>    llvm/trunk/test/CodeGen/NVPTX/convert-fp.ll
>    llvm/trunk/test/CodeGen/NVPTX/convert-int-sm10.ll
>    llvm/trunk/test/CodeGen/NVPTX/convert-int-sm20.ll
>    llvm/trunk/test/CodeGen/NVPTX/fma-disable.ll
>    llvm/trunk/test/CodeGen/NVPTX/fma.ll
>    llvm/trunk/test/CodeGen/NVPTX/intrinsic-old.ll
>    llvm/trunk/test/CodeGen/NVPTX/ld-addrspace.ll
>    llvm/trunk/test/CodeGen/NVPTX/ld-generic.ll
>    llvm/trunk/test/CodeGen/NVPTX/lit.local.cfg
>    llvm/trunk/test/CodeGen/NVPTX/st-addrspace.ll
>    llvm/trunk/test/CodeGen/NVPTX/st-generic.ll
> Modified:
>    llvm/trunk/CMakeLists.txt
>    llvm/trunk/autoconf/configure.ac
>    llvm/trunk/configure
>    llvm/trunk/include/llvm/ADT/Triple.h
>    llvm/trunk/include/llvm/Intrinsics.td
>    llvm/trunk/lib/Support/Triple.cpp
>    llvm/trunk/lib/Target/LLVMBuild.txt
>    llvm/trunk/projects/sample/autoconf/configure.ac
>    llvm/trunk/projects/sample/configure
>
> Modified: llvm/trunk/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/CMakeLists.txt?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/CMakeLists.txt (original)
> +++ llvm/trunk/CMakeLists.txt Fri May  4 15:18:50 2012
> @@ -78,6 +78,7 @@
>   Mips
>   MBlaze
>   MSP430
> +  NVPTX
>   PowerPC
>   PTX
>   Sparc
>
> Modified: llvm/trunk/autoconf/configure.ac
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/autoconf/configure.ac (original)
> +++ llvm/trunk/autoconf/configure.ac Fri May  4 15:18:50 2012
> @@ -370,6 +370,7 @@
>   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
>   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
>   ptx-*)                  llvm_cv_target_arch="PTX" ;;
> +  nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
>   *)                      llvm_cv_target_arch="Unknown" ;;
>  esac])
>
> @@ -517,6 +518,7 @@
>     Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
>     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
>     PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
> +    NVPTX)       AC_SUBST(TARGET_HAS_JIT,0) ;;
>     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
>   esac
>  fi
> @@ -628,13 +630,13 @@
>  AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
>     [Build specific host targets: all or target1,target2,... Valid targets are:
>      host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
> -     xcore, msp430, ptx, and cpp (default=all)]),,
> +     xcore, msp430, ptx, nvptx, and cpp (default=all)]),,
>     enableval=all)
>  if test "$enableval" = host-only ; then
>   enableval=host
>  fi
>  case "$enableval" in
> -  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
> +  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
>   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
>       case "$a_target" in
>         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -651,6 +653,7 @@
>         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
>         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
>         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +        nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>         host) case "$llvm_cv_target_arch" in
>             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -664,6 +667,7 @@
>             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
>             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
>             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +            NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>             *)       AC_MSG_ERROR([Can not set target to build]) ;;
>           esac ;;
>         *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
>
> Modified: llvm/trunk/configure
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/configure?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/configure (original)
> +++ llvm/trunk/configure Fri May  4 15:18:50 2012
> @@ -1420,7 +1420,7 @@
>   --enable-targets        Build specific host targets: all or
>                           target1,target2,... Valid targets are: host, x86,
>                           x86_64, sparc, powerpc, arm, mips, spu, hexagon,
> -                          xcore, msp430, ptx, and cpp (default=all)
> +                          xcore, msp430, ptx, nvptx, and cpp (default=all)
>   --enable-bindings       Build specific language bindings:
>                           all,auto,none,{binding-name} (default=auto)
>   --enable-libffi         Check for the presence of libffi (default is NO)
> @@ -3903,6 +3903,7 @@
>   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
>   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
>   ptx-*)                  llvm_cv_target_arch="PTX" ;;
> +  nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
>   *)                      llvm_cv_target_arch="Unknown" ;;
>  esac
>  fi
> @@ -5126,6 +5127,8 @@
>  ;;
>     PTX)         TARGET_HAS_JIT=0
>  ;;
> +    NVPTX)       TARGET_HAS_JIT=0
> + ;;
>     *)           TARGET_HAS_JIT=0
>  ;;
>   esac
> @@ -5310,7 +5313,7 @@
>   enableval=host
>  fi
>  case "$enableval" in
> -  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
> +  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
>   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
>       case "$a_target" in
>         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -5327,6 +5330,7 @@
>         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
>         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
>         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +        nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>         host) case "$llvm_cv_target_arch" in
>             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -5340,6 +5344,7 @@
>             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
>             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
>             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +            NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
>  echo "$as_me: error: Can not set target to build" >&2;}
>    { (exit 1); exit 1; }; } ;;
> @@ -10401,7 +10406,7 @@
>   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>   lt_status=$lt_dlunknown
>   cat > conftest.$ac_ext <<EOF
> -#line 10404 "configure"
> +#line 10409 "configure"
>  #include "confdefs.h"
>
>  #if HAVE_DLFCN_H
>
> Modified: llvm/trunk/include/llvm/ADT/Triple.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/Triple.h?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/ADT/Triple.h (original)
> +++ llvm/trunk/include/llvm/ADT/Triple.h Fri May  4 15:18:50 2012
> @@ -64,6 +64,8 @@
>     mblaze,  // MBlaze: mblaze
>     ptx32,   // PTX: ptx (32-bit)
>     ptx64,   // PTX: ptx (64-bit)
> +    nvptx,   // NVPTX: 32-bit
> +    nvptx64, // NVPTX: 64-bit
>     le32,    // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
>     amdil   // amdil: amd IL
>   };
>
> Modified: llvm/trunk/include/llvm/Intrinsics.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Intrinsics.td?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Intrinsics.td (original)
> +++ llvm/trunk/include/llvm/Intrinsics.td Fri May  4 15:18:50 2012
> @@ -441,3 +441,4 @@
>  include "llvm/IntrinsicsXCore.td"
>  include "llvm/IntrinsicsPTX.td"
>  include "llvm/IntrinsicsHexagon.td"
> +include "llvm/IntrinsicsNVVM.td"
>
> Added: llvm/trunk/include/llvm/IntrinsicsNVVM.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsNVVM.td?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/include/llvm/IntrinsicsNVVM.td (added)
> +++ llvm/trunk/include/llvm/IntrinsicsNVVM.td Fri May  4 15:18:50 2012
> @@ -0,0 +1,872 @@
> +//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines all of the NVVM-specific intrinsics for use with NVPTX.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +def llvm_anyi64ptr_ty     : LLVMAnyPointerType<llvm_i64_ty>;     // (space)i64*
> +
> +//
> +// MISC
> +//
> +
> +  def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +//
> +// Min Max
> +//
> +
> +  def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
> +        , [IntrNoMem, Commutative]>;
> +  def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +//
> +// Multiplication
> +//
> +
> +  def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +//
> +// Div
> +//
> +
> +  def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +//
> +// Brev
> +//
> +
> +  def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
> +
> +//
> +// Sad
> +//
> +
> +  def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +//
> +// Floor  Ceil
> +//
> +
> +  def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Abs
> +//
> +
> +  def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
> +      Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Round
> +//
> +
> +  def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Trunc
> +//
> +
> +  def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Saturate
> +//
> +
> +  def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Exp2  Log2
> +//
> +
> +  def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Sin  Cos
> +//
> +
> +  def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +//
> +// Fma
> +//
> +
> +  def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
> +      Intrinsic<[llvm_double_ty],
> +        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
> +      Intrinsic<[llvm_double_ty],
> +        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
> +      Intrinsic<[llvm_double_ty],
> +        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
> +      Intrinsic<[llvm_double_ty],
> +        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +//
> +// Rcp
> +//
> +
> +  def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Sqrt
> +//
> +
> +  def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Rsqrt
> +//
> +
> +  def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +//
> +// Add
> +//
> +
> +  def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
> +      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +  def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
> +      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +//
> +// Convert
> +//
> +
> +  def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
> +      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
> +      Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
> +        [IntrNoMem, Commutative]>;
> +
> +  def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
> +      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
> +      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +  def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
> +      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
> +      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
> +      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
> +      Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
> +
> +//
> +// Bitcast
> +//
> +
> +  def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
> +      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
> +  def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
> +      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> +
> +  def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
> +      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
> +  def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
> +      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
> +
> +
> +// Atomic not available as an llvm intrinsic.
> +  def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
> +          [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
> +                                      [IntrReadWriteArgMem, NoCapture<0>]>;
> +  def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
> +          [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
> +                                      [IntrReadWriteArgMem, NoCapture<0>]>;
> +  def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
> +          [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
> +                                      [IntrReadWriteArgMem, NoCapture<0>]>;
> +
> +// Bar.Sync
> +  def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
> +      Intrinsic<[], [], []>;
> +  def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
> +      Intrinsic<[], [], []>;
> +  def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
> +  def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
> +  def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
> +      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
> +
> +  // Membar
> +  def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
> +      Intrinsic<[], [], []>;
> +  def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
> +      Intrinsic<[], [], []>;
> +  def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
> +      Intrinsic<[], [], []>;
> +
> +
> +// Accessing special registers
> +  def int_nvvm_read_ptx_sreg_tid_x :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
> +  def int_nvvm_read_ptx_sreg_tid_y :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
> +  def int_nvvm_read_ptx_sreg_tid_z :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
> +
> +  def int_nvvm_read_ptx_sreg_ntid_x :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
> +  def int_nvvm_read_ptx_sreg_ntid_y :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
> +  def int_nvvm_read_ptx_sreg_ntid_z :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
> +
> +  def int_nvvm_read_ptx_sreg_ctaid_x :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
> +  def int_nvvm_read_ptx_sreg_ctaid_y :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
> +  def int_nvvm_read_ptx_sreg_ctaid_z :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
> +
> +  def int_nvvm_read_ptx_sreg_nctaid_x :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
> +  def int_nvvm_read_ptx_sreg_nctaid_y :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
> +  def int_nvvm_read_ptx_sreg_nctaid_z :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
> +
> +  def int_nvvm_read_ptx_sreg_warpsize :
> +      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +      GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
> +
> +
> +// Generated within nvvm. Use for ldu on sm_20 or later
> +// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
> +def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
> +  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  "llvm.nvvm.ldu.global.i">;
> +def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
> +  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  "llvm.nvvm.ldu.global.f">;
> +def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
> +  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
> +  "llvm.nvvm.ldu.global.p">;
> +
> +
> +// Use for generic pointers
> +// - These intrinsics are used to convert address spaces.
> +// - The input pointer and output pointer must have the same type, except for
> +//   the address-space. (This restriction is not enforced here as there is
> +//   currently no way to describe it).
> +// - This complements the llvm bitcast, which can be used to cast one type
> +//   of pointer to another type of pointer, while the address space remains
> +//   the same.
> +def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.local.to.gen">;
> +def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.shared.to.gen">;
> +def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.global.to.gen">;
> +def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.constant.to.gen">;
> +
> +def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.gen.to.global">;
> +def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.gen.to.shared">;
> +def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.gen.to.local">;
> +def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
> +                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
> +                 "llvm.nvvm.ptr.gen.to.constant">;
> +
> +// Used in nvvm internally to help address space opt and ptx code generation
> +// This is for params that are passed to kernel functions by pointer by-val.
> +def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
> +                                     [llvm_anyptr_ty],
> +                                   [IntrNoMem, NoCapture<0>],
> +                                   "llvm.nvvm.ptr.gen.to.param">;
> +
> +// Move intrinsics, used in nvvm internally
> +
> +def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
> +  "llvm.nvvm.move.i8">;
> +def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
> +  "llvm.nvvm.move.i16">;
> +def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
> +  "llvm.nvvm.move.i32">;
> +def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
> +  "llvm.nvvm.move.i64">;
> +def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
> +  [IntrNoMem], "llvm.nvvm.move.float">;
> +def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
> +  [IntrNoMem], "llvm.nvvm.move.double">;
> +def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
> +  [IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
> +
> +
> +/// Error / Warn
> +def int_nvvm_compiler_error :
> +    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
> +def int_nvvm_compiler_warn :
> +    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
>
> Modified: llvm/trunk/lib/Support/Triple.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Support/Triple.cpp (original)
> +++ llvm/trunk/lib/Support/Triple.cpp Fri May  4 15:18:50 2012
> @@ -40,6 +40,8 @@
>   case mblaze:  return "mblaze";
>   case ptx32:   return "ptx32";
>   case ptx64:   return "ptx64";
> +  case nvptx:   return "nvptx";
> +  case nvptx64: return "nvptx64";
>   case le32:    return "le32";
>   case amdil:   return "amdil";
>   }
> @@ -76,6 +78,8 @@
>
>   case ptx32:   return "ptx";
>   case ptx64:   return "ptx";
> +  case nvptx:   return "nvptx";
> +  case nvptx64: return "nvptx";
>   case le32:    return "le32";
>   case amdil:   return "amdil";
>   }
> @@ -162,6 +166,8 @@
>     .Case("xcore", xcore)
>     .Case("ptx32", ptx32)
>     .Case("ptx64", ptx64)
> +    .Case("nvptx", nvptx)
> +    .Case("nvptx64", nvptx64)
>     .Case("le32", le32)
>     .Case("amdil", amdil)
>     .Default(UnknownArch);
> @@ -194,6 +200,8 @@
>     .Case("r600", Triple::r600)
>     .Case("ptx32", Triple::ptx32)
>     .Case("ptx64", Triple::ptx64)
> +    .Case("nvptx", Triple::nvptx)
> +    .Case("nvptx64", Triple::nvptx64)
>     .Case("amdil", Triple::amdil)
>     .Default(Triple::UnknownArch);
>  }
> @@ -217,6 +225,8 @@
>     .Case("r600", "r600")
>     .Case("ptx32", "ptx32")
>     .Case("ptx64", "ptx64")
> +    .Case("nvptx", "nvptx")
> +    .Case("nvptx64", "nvptx64")
>     .Case("le32", "le32")
>     .Case("amdil", "amdil")
>     .Default(NULL);
> @@ -251,6 +261,8 @@
>     .Case("xcore", Triple::xcore)
>     .Case("ptx32", Triple::ptx32)
>     .Case("ptx64", Triple::ptx64)
> +    .Case("nvptx", Triple::nvptx)
> +    .Case("nvptx64", Triple::nvptx64)
>     .Case("le32", Triple::le32)
>     .Case("amdil", Triple::amdil)
>     .Default(Triple::UnknownArch);
> @@ -652,6 +664,7 @@
>   case llvm::Triple::mblaze:
>   case llvm::Triple::mips:
>   case llvm::Triple::mipsel:
> +  case llvm::Triple::nvptx:
>   case llvm::Triple::ppc:
>   case llvm::Triple::ptx32:
>   case llvm::Triple::r600:
> @@ -664,6 +677,7 @@
>
>   case llvm::Triple::mips64:
>   case llvm::Triple::mips64el:
> +  case llvm::Triple::nvptx64:
>   case llvm::Triple::ppc64:
>   case llvm::Triple::ptx64:
>   case llvm::Triple::sparcv9:
> @@ -701,6 +715,7 @@
>   case Triple::mblaze:
>   case Triple::mips:
>   case Triple::mipsel:
> +  case Triple::nvptx:
>   case Triple::ppc:
>   case Triple::ptx32:
>   case Triple::r600:
> @@ -714,6 +729,7 @@
>
>   case Triple::mips64:    T.setArch(Triple::mips);    break;
>   case Triple::mips64el:  T.setArch(Triple::mipsel);  break;
> +  case Triple::nvptx64:   T.setArch(Triple::nvptx);   break;
>   case Triple::ppc64:     T.setArch(Triple::ppc);   break;
>   case Triple::ptx64:     T.setArch(Triple::ptx32);   break;
>   case Triple::sparcv9:   T.setArch(Triple::sparc);   break;
> @@ -742,6 +758,7 @@
>
>   case Triple::mips64:
>   case Triple::mips64el:
> +  case Triple::nvptx64:
>   case Triple::ppc64:
>   case Triple::ptx64:
>   case Triple::sparcv9:
> @@ -751,6 +768,7 @@
>
>   case Triple::mips:    T.setArch(Triple::mips64);    break;
>   case Triple::mipsel:  T.setArch(Triple::mips64el);  break;
> +  case Triple::nvptx:   T.setArch(Triple::nvptx64);   break;
>   case Triple::ppc:     T.setArch(Triple::ppc64);     break;
>   case Triple::ptx32:   T.setArch(Triple::ptx64);     break;
>   case Triple::sparc:   T.setArch(Triple::sparcv9);   break;
>
> Modified: llvm/trunk/lib/Target/LLVMBuild.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/LLVMBuild.txt?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/LLVMBuild.txt (original)
> +++ llvm/trunk/lib/Target/LLVMBuild.txt Fri May  4 15:18:50 2012
> @@ -16,7 +16,7 @@
>  ;===------------------------------------------------------------------------===;
>
>  [common]
> -subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
> +subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore
>
>  ; This is a special group whose required libraries are extended (by llvm-build)
>  ; with the best execution engine (the native JIT, if available, or the
>
> Added: llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/CMakeLists.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/CMakeLists.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/CMakeLists.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,33 @@
> +set(LLVM_TARGET_DEFINITIONS NVPTX.td)
> +
> +
> +tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
> +tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
> +tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
> +tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
> +tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
> +add_public_tablegen_target(NVPTXCommonTableGen)
> +
> +set(NVPTXCodeGen_sources
> +  NVPTXFrameLowering.cpp
> +  NVPTXInstrInfo.cpp
> +  NVPTXISelDAGToDAG.cpp
> +  NVPTXISelLowering.cpp
> +  NVPTXRegisterInfo.cpp
> +  NVPTXSubtarget.cpp
> +  NVPTXTargetMachine.cpp
> +  NVPTXSplitBBatBar.cpp
> +  NVPTXLowerAggrCopies.cpp
> +  NVPTXutil.cpp
> +  NVPTXAllocaHoisting.cpp
> +  NVPTXAsmPrinter.cpp
> +  NVPTXUtilities.cpp
> +  VectorElementize.cpp
> +  )
> +
> +add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
> +
> +
> +add_subdirectory(TargetInfo)
> +add_subdirectory(InstPrinter)
> +add_subdirectory(MCTargetDesc)
>
> Added: llvm/trunk/lib/Target/NVPTX/InstPrinter/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/InstPrinter/CMakeLists.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/InstPrinter/CMakeLists.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/InstPrinter/CMakeLists.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,7 @@
> +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
> +
> +add_llvm_library(LLVMNVPTXAsmPrinter
> +  NVPTXInstPrinter.cpp
> +  )
> +
> +add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen)
>
> Added: llvm/trunk/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,23 @@
> +;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
> +;
> +;                     The LLVM Compiler Infrastructure
> +;
> +; This file is distributed under the University of Illinois Open Source
> +; License. See LICENSE.TXT for details.
> +;
> +;===------------------------------------------------------------------------===;
> +;
> +; This is an LLVMBuild description file for the components in this subdirectory.
> +;
> +; For more information on the LLVMBuild system, please see:
> +;
> +;   http://llvm.org/docs/LLVMBuild.html
> +;
> +;===------------------------------------------------------------------------===;
> +
> +[component_0]
> +type = Library
> +name = NVPTXAsmPrinter
> +parent = NVPTX
> +required_libraries = MC Support
> +add_to_library_groups = NVPTX
>
> Added: llvm/trunk/lib/Target/NVPTX/InstPrinter/Makefile
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/InstPrinter/Makefile?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/InstPrinter/Makefile (added)
> +++ llvm/trunk/lib/Target/NVPTX/InstPrinter/Makefile Fri May  4 15:18:50 2012
> @@ -0,0 +1,15 @@
> +##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===##
> +#
> +#                                                                                      The LLVM Compiler Infrastructure
> +#
> +# This file is distributed under the University of Illinois Open Source
> +# License. See LICENSE.TXT for details.
> +#
> +##===----------------------------------------------------------------------===##
> +LEVEL = ../../../..
> +LIBRARYNAME = LLVMNVPTXAsmPrinter
> +
> +# Hack: we need to include 'main' ptx target directory to grab private headers
> +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
> +
> +include $(LEVEL)/Makefile.common
>
> Added: llvm/trunk/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1 @@
> +// Placeholder
>
> Added: llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,32 @@
> +;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===;
> +;
> +;                     The LLVM Compiler Infrastructure
> +;
> +; This file is distributed under the University of Illinois Open Source
> +; License. See LICENSE.TXT for details.
> +;
> +;===------------------------------------------------------------------------===;
> +;
> +; This is an LLVMBuild description file for the components in this subdirectory.
> +;
> +; For more information on the LLVMBuild system, please see:
> +;
> +;   http://llvm.org/docs/LLVMBuild.html
> +;
> +;===------------------------------------------------------------------------===;
> +
> +[common]
> +subdirectories = InstPrinter MCTargetDesc TargetInfo
> +
> +[component_0]
> +type = TargetGroup
> +name = NVPTX
> +parent = Target
> +has_asmprinter = 1
> +
> +[component_1]
> +type = Library
> +name = NVPTXCodeGen
> +parent = NVPTX
> +required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils
> +add_to_library_groups = NVPTX
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,9 @@
> +add_llvm_library(LLVMNVPTXDesc
> +  NVPTXMCAsmInfo.cpp
> +  NVPTXMCTargetDesc.cpp
> +  )
> +
> +add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen)
> +
> +# Hack: we need to include 'main' target directory to grab private headers
> +#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,23 @@
> +;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
> +;
> +;                     The LLVM Compiler Infrastructure
> +;
> +; This file is distributed under the University of Illinois Open Source
> +; License. See LICENSE.TXT for details.
> +;
> +;===------------------------------------------------------------------------===;
> +;
> +; This is an LLVMBuild description file for the components in this subdirectory.
> +;
> +; For more information on the LLVMBuild system, please see:
> +;
> +;   http://llvm.org/docs/LLVMBuild.html
> +;
> +;===------------------------------------------------------------------------===;
> +
> +[component_0]
> +type = Library
> +name = NVPTXDesc
> +parent = NVPTX
> +required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support
> +add_to_library_groups = NVPTX
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/Makefile
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/Makefile?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/Makefile (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/Makefile Fri May  4 15:18:50 2012
> @@ -0,0 +1,16 @@
> +##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===##
> +#
> +#                     The LLVM Compiler Infrastructure
> +#
> +# This file is distributed under the University of Illinois Open Source
> +# License. See LICENSE.TXT for details.
> +#
> +##===----------------------------------------------------------------------===##
> +
> +LEVEL = ../../../..
> +LIBRARYNAME = LLVMNVPTXDesc
> +
> +# Hack: we need to include 'main' target directory to grab private headers
> +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
> +
> +include $(LEVEL)/Makefile.common
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,88 @@
> +//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains small standalone helper functions and enum definitions for
> +// the NVPTX target useful for the compiler back-end and the MC libraries.
> +// As such, it deliberately does not include references to LLVM core
> +// code gen types, passes, etc..
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXBASEINFO_H
> +#define NVPTXBASEINFO_H
> +
> +namespace llvm {
> +
> +enum AddressSpace {
> +  ADDRESS_SPACE_GENERIC = 0,
> +  ADDRESS_SPACE_GLOBAL = 1,
> +  ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
> +  ADDRESS_SPACE_SHARED = 3,
> +  ADDRESS_SPACE_CONST = 4,
> +  ADDRESS_SPACE_LOCAL = 5,
> +
> +  // NVVM Internal
> +  ADDRESS_SPACE_PARAM = 101
> +};
> +
> +enum PropertyAnnotation {
> +  PROPERTY_MAXNTID_X = 0,
> +  PROPERTY_MAXNTID_Y,
> +  PROPERTY_MAXNTID_Z,
> +  PROPERTY_REQNTID_X,
> +  PROPERTY_REQNTID_Y,
> +  PROPERTY_REQNTID_Z,
> +  PROPERTY_MINNCTAPERSM,
> +  PROPERTY_ISTEXTURE,
> +  PROPERTY_ISSURFACE,
> +  PROPERTY_ISSAMPLER,
> +  PROPERTY_ISREADONLY_IMAGE_PARAM,
> +  PROPERTY_ISWRITEONLY_IMAGE_PARAM,
> +  PROPERTY_ISKERNEL_FUNCTION,
> +  PROPERTY_ALIGN,
> +
> +  // last property
> +  PROPERTY_LAST
> +};
> +
> +const unsigned AnnotationNameLen = 8; // length of each annotation name
> +const char
> +PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
> +  "maxntidx",               // PROPERTY_MAXNTID_X
> +  "maxntidy",               // PROPERTY_MAXNTID_Y
> +  "maxntidz",               // PROPERTY_MAXNTID_Z
> +  "reqntidx",               // PROPERTY_REQNTID_X
> +  "reqntidy",               // PROPERTY_REQNTID_Y
> +  "reqntidz",               // PROPERTY_REQNTID_Z
> +  "minctasm",               // PROPERTY_MINNCTAPERSM
> +  "texture",                // PROPERTY_ISTEXTURE
> +  "surface",                // PROPERTY_ISSURFACE
> +  "sampler",                // PROPERTY_ISSAMPLER
> +  "rdoimage",               // PROPERTY_ISREADONLY_IMAGE_PARAM
> +  "wroimage",               // PROPERTY_ISWRITEONLY_IMAGE_PARAM
> +  "kernel",                 // PROPERTY_ISKERNEL_FUNCTION
> +  "align",                  // PROPERTY_ALIGN
> +
> +  // last property
> +  "proplast",               // PROPERTY_LAST
> +};
> +
> +// name of named metadata used for global annotations
> +#if defined(__GNUC__)
> +// As this is declared to be static but some of the .cpp files that
> +// include NVVM.h do not use this array, gcc gives a warning when
> +// compiling those .cpp files, hence __attribute__((unused)).
> +__attribute__((unused))
> +#endif
> +static const char* NamedMDForAnnotations = "nvvm.annotations";
> +
> +}
> +
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,63 @@
> +//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the declarations of the NVPTXMCAsmInfo properties.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTXMCAsmInfo.h"
> +#include "llvm/ADT/Triple.h"
> +#include "llvm/Support/CommandLine.h"
> +
> +using namespace llvm;
> +
> +bool CompileForDebugging;
> +
> +// -debug-compile - Command line option to inform opt and llc passes to
> +// compile for debugging
> +static cl::opt<bool, true>
> +Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
> +      cl::location(CompileForDebugging),
> +      cl::init(false));
> +
> +void NVPTXMCAsmInfo::anchor() { }
> +
> +NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
> +  Triple TheTriple(TT);
> +  if (TheTriple.getArch() == Triple::nvptx64)
> +    PointerSize = 8;
> +
> +  CommentString = "//";
> +
> +  PrivateGlobalPrefix = "$L__";
> +
> +  AllowPeriodsInName = false;
> +
> +  HasSetDirective = false;
> +
> +  HasSingleParameterDotFile = false;
> +
> +  InlineAsmStart = " inline asm";
> +  InlineAsmEnd = " inline asm";
> +
> +  SupportsDebugInformation = CompileForDebugging;
> +  HasDotTypeDotSizeDirective = false;
> +
> +  Data8bitsDirective = " .b8 ";
> +  Data16bitsDirective = " .b16 ";
> +  Data32bitsDirective = " .b32 ";
> +  Data64bitsDirective = " .b64 ";
> +  PrivateGlobalPrefix = "";
> +  ZeroDirective =  " .b8";
> +  AsciiDirective = " .b8";
> +  AscizDirective = " .b8";
> +
> +  // @TODO: Can we just disable this?
> +  GlobalDirective = "\t// .globl\t";
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,30 @@
> +//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the declaration of the NVPTXMCAsmInfo class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTX_MCASM_INFO_H
> +#define NVPTX_MCASM_INFO_H
> +
> +#include "llvm/MC/MCAsmInfo.h"
> +
> +namespace llvm {
> +class Target;
> +class StringRef;
> +
> +class NVPTXMCAsmInfo : public MCAsmInfo {
> +  virtual void anchor();
> +public:
> +  explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
> +};
> +} // namespace llvm
> +
> +#endif // NVPTX_MCASM_INFO_H
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,91 @@
> +//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file provides NVPTX specific target descriptions.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTXMCTargetDesc.h"
> +#include "NVPTXMCAsmInfo.h"
> +#include "llvm/MC/MCCodeGenInfo.h"
> +#include "llvm/MC/MCInstrInfo.h"
> +#include "llvm/MC/MCRegisterInfo.h"
> +#include "llvm/MC/MCSubtargetInfo.h"
> +#include "llvm/Support/TargetRegistry.h"
> +
> +#define GET_INSTRINFO_MC_DESC
> +#include "NVPTXGenInstrInfo.inc"
> +
> +#define GET_SUBTARGETINFO_MC_DESC
> +#include "NVPTXGenSubtargetInfo.inc"
> +
> +#define GET_REGINFO_MC_DESC
> +#include "NVPTXGenRegisterInfo.inc"
> +
> +
> +using namespace llvm;
> +
> +static MCInstrInfo *createNVPTXMCInstrInfo() {
> +  MCInstrInfo *X = new MCInstrInfo();
> +  InitNVPTXMCInstrInfo(X);
> +  return X;
> +}
> +
> +static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
> +  MCRegisterInfo *X = new MCRegisterInfo();
> +  // PTX does not have a return address register.
> +  InitNVPTXMCRegisterInfo(X, 0);
> +  return X;
> +}
> +
> +static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
> +                                                   StringRef FS) {
> +  MCSubtargetInfo *X = new MCSubtargetInfo();
> +  InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
> +  return X;
> +}
> +
> +static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
> +                                               CodeModel::Model CM,
> +                                               CodeGenOpt::Level OL) {
> +  MCCodeGenInfo *X = new MCCodeGenInfo();
> +  X->InitMCCodeGenInfo(RM, CM, OL);
> +  return X;
> +}
> +
> +
> +// Force static initialization.
> +extern "C" void LLVMInitializeNVPTXTargetMC() {
> +  // Register the MC asm info.
> +  RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
> +  RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
> +
> +  // Register the MC codegen info.
> +  TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
> +                                        createNVPTXMCCodeGenInfo);
> +  TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
> +                                        createNVPTXMCCodeGenInfo);
> +
> +  // Register the MC instruction info.
> +  TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
> +  TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
> +
> +  // Register the MC register info.
> +  TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
> +                                    createNVPTXMCRegisterInfo);
> +  TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
> +                                    createNVPTXMCRegisterInfo);
> +
> +  // Register the MC subtarget info.
> +  TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
> +                                          createNVPTXMCSubtargetInfo);
> +  TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
> +                                          createNVPTXMCSubtargetInfo);
> +
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,36 @@
> +//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file provides NVPTX specific target descriptions.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXMCTARGETDESC_H
> +#define NVPTXMCTARGETDESC_H
> +
> +namespace llvm {
> +class Target;
> +
> +extern Target TheNVPTXTarget32;
> +extern Target TheNVPTXTarget64;
> +
> +} // End llvm namespace
> +
> +// Defines symbolic names for PTX registers.
> +#define GET_REGINFO_ENUM
> +#include "NVPTXGenRegisterInfo.inc"
> +
> +// Defines symbolic names for the PTX instructions.
> +#define GET_INSTRINFO_ENUM
> +#include "NVPTXGenInstrInfo.inc"
> +
> +#define GET_SUBTARGETINFO_ENUM
> +#include "NVPTXGenSubtargetInfo.inc"
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/Makefile
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/Makefile?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/Makefile (added)
> +++ llvm/trunk/lib/Target/NVPTX/Makefile Fri May  4 15:18:50 2012
> @@ -0,0 +1,23 @@
> +##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===##
> +#
> +#                     The LLVM Compiler Infrastructure
> +#
> +# This file is distributed under the University of Illinois Open Source
> +# License. See LICENSE.TXT for details.
> +#
> +##===----------------------------------------------------------------------===##
> +
> +LEVEL = ../../..
> +LIBRARYNAME = LLVMNVPTXCodeGen
> +TARGET = NVPTX
> +
> +# Make sure that tblgen is run, first thing.
> +BUILT_SOURCES = NVPTXGenAsmWriter.inc \
> +               NVPTXGenDAGISel.inc \
> +               NVPTXGenInstrInfo.inc \
> +               NVPTXGenRegisterInfo.inc \
> +               NVPTXGenSubtargetInfo.inc
> +
> +DIRS = InstPrinter TargetInfo MCTargetDesc
> +
> +include $(LEVEL)/Makefile.common
>
> Added: llvm/trunk/lib/Target/NVPTX/ManagedStringPool.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/ManagedStringPool.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/ManagedStringPool.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/ManagedStringPool.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,49 @@
> +//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// The strings allocated from a managed string pool are owned by the string
> +// pool and will be deleted together with the managed string pool.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +
> +#ifndef LLVM_SUPPORT_MANAGED_STRING_H
> +#define LLVM_SUPPORT_MANAGED_STRING_H
> +
> +#include "llvm/ADT/SmallVector.h"
> +#include <string>
> +
> +namespace llvm {
> +
> +/// ManagedStringPool - The strings allocated from a managed string pool are
> +/// owned by the string pool and will be deleted together with the managed
> +/// string pool.
> +class ManagedStringPool {
> +  SmallVector<std::string *, 8> Pool;
> +
> +public:
> +  ManagedStringPool() {}
> +  ~ManagedStringPool() {
> +    SmallVector<std::string *, 8>::iterator Current = Pool.begin();
> +    while (Current != Pool.end()) {
> +      delete *Current;
> +      Current++;
> +    }
> +  }
> +
> +  std::string *getManagedString(const char *S) {
> +    std::string *Str = new std::string(S);
> +    Pool.push_back(Str);
> +    return Str;
> +  }
> +};
> +
> +}
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTX.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTX.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTX.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,137 @@
> +//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the entry points for global functions defined in
> +// the LLVM NVPTX back-end.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_TARGET_NVPTX_H
> +#define LLVM_TARGET_NVPTX_H
> +
> +#include <cassert>
> +#include <iosfwd>
> +#include "llvm/Value.h"
> +#include "llvm/Module.h"
> +#include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Target/TargetMachine.h"
> +#include "MCTargetDesc/NVPTXBaseInfo.h"
> +
> +namespace llvm {
> +class NVPTXTargetMachine;
> +class FunctionPass;
> +class formatted_raw_ostream;
> +
> +namespace NVPTXCC {
> +enum CondCodes {
> +  EQ,
> +  NE,
> +  LT,
> +  LE,
> +  GT,
> +  GE
> +};
> +}
> +
> +inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
> +  switch (CC) {
> +  default: assert(0 && "Unknown condition code");
> +  case NVPTXCC::NE:  return "ne";
> +  case NVPTXCC::EQ:   return "eq";
> +  case NVPTXCC::LT:   return "lt";
> +  case NVPTXCC::LE:  return "le";
> +  case NVPTXCC::GT:  return "gt";
> +  case NVPTXCC::GE:   return "ge";
> +  }
> +}
> +
> +FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
> +                                 llvm::CodeGenOpt::Level OptLevel);
> +FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
> +FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
> +FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
> +FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
> +
> +bool isImageOrSamplerVal(const Value *, const Module *);
> +
> +extern Target TheNVPTXTarget32;
> +extern Target TheNVPTXTarget64;
> +
> +namespace NVPTX
> +{
> +enum DrvInterface {
> +  NVCL,
> +  CUDA,
> +  TEST
> +};
> +
> +// A field inside TSFlags needs a shift and a mask. The usage is
> +// always as follows :
> +// ((TSFlags & fieldMask) >> fieldShift)
> +// The enum keeps the mask, the shift, and all valid values of the
> +// field in one place.
> +enum VecInstType {
> +  VecInstTypeShift = 0,
> +  VecInstTypeMask = 0xF,
> +
> +  VecNOP = 0,
> +  VecLoad = 1,
> +  VecStore = 2,
> +  VecBuild = 3,
> +  VecShuffle = 4,
> +  VecExtract = 5,
> +  VecInsert = 6,
> +  VecDest = 7,
> +  VecOther = 15
> +};
> +
> +enum SimpleMove {
> +  SimpleMoveMask = 0x10,
> +  SimpleMoveShift = 4
> +};
> +enum LoadStore {
> +  isLoadMask = 0x20,
> +  isLoadShift = 5,
> +  isStoreMask = 0x40,
> +  isStoreShift = 6
> +};
> +
> +namespace PTXLdStInstCode {
> +enum AddressSpace{
> +  GENERIC = 0,
> +  GLOBAL = 1,
> +  CONSTANT = 2,
> +  SHARED = 3,
> +  PARAM = 4,
> +  LOCAL = 5
> +};
> +enum FromType {
> +  Unsigned = 0,
> +  Signed,
> +  Float
> +};
> +enum VecType {
> +  Scalar = 1,
> +  V2 = 2,
> +  V4 = 4
> +};
> +}
> +}
> +} // end namespace llvm;
> +
> +// Defines symbolic names for NVPTX registers.  This defines a mapping from
> +// register name to register number.
> +#define GET_REGINFO_ENUM
> +#include "NVPTXGenRegisterInfo.inc"
> +
> +// Defines symbolic names for the NVPTX instructions.
> +#define GET_INSTRINFO_ENUM
> +#include "NVPTXGenInstrInfo.inc"
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTX.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.td?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTX.td (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTX.td Fri May  4 15:18:50 2012
> @@ -0,0 +1,44 @@
> +//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +// This is the top level entry point for the NVPTX target.
> +//===----------------------------------------------------------------------===//
> +
> +//===----------------------------------------------------------------------===//
> +// Target-independent interfaces
> +//===----------------------------------------------------------------------===//
> +
> +include "llvm/Target/Target.td"
> +
> +include "NVPTXRegisterInfo.td"
> +include "NVPTXInstrInfo.td"
> +
> +//===----------------------------------------------------------------------===//
> +// Subtarget Features.
> +// - We use the SM version number instead of explicit feature table.
> +// - Need at least one feature to avoid generating zero sized array by
> +//   TableGen in NVPTXGenSubtarget.inc.
> +//===----------------------------------------------------------------------===//
> +def FeatureDummy  : SubtargetFeature<"dummy", "dummy", "true", "">;
> +
> +//===----------------------------------------------------------------------===//
> +// NVPTX supported processors.
> +//===----------------------------------------------------------------------===//
> +
> +class Proc<string Name, list<SubtargetFeature> Features>
> + : Processor<Name, NoItineraries, Features>;
> +
> +def : Proc<"sm_10", [FeatureDummy]>;
> +
> +
> +def NVPTXInstrInfo : InstrInfo {
> +}
> +
> +def NVPTX : Target {
> +  let InstructionSet = NVPTXInstrInfo;
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,48 @@
> +//===-- AllocaHoisting.cpp - Hosist allocas to the entry block --*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/Function.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/Constants.h"
> +#include "NVPTXAllocaHoisting.h"
> +
> +namespace llvm {
> +
> +bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
> +  bool               functionModified    = false;
> +  Function::iterator I                   = function.begin();
> +  TerminatorInst    *firstTerminatorInst = (I++)->getTerminator();
> +
> +  for (Function::iterator E = function.end(); I != E; ++I) {
> +    for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
> +      AllocaInst *allocaInst = dyn_cast<AllocaInst>(BI++);
> +      if (allocaInst && isa<ConstantInt>(allocaInst->getArraySize())) {
> +        allocaInst->moveBefore(firstTerminatorInst);
> +        functionModified = true;
> +      }
> +    }
> +  }
> +
> +  return functionModified;
> +}
> +
> +char NVPTXAllocaHoisting::ID = 1;
> +RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
> +                                    "Hoisting alloca instructsion in non-entry "
> +                                    "blocks to the entry block");
> +
> +FunctionPass *createAllocaHoisting() {
> +  return new NVPTXAllocaHoisting();
> +}
> +
> +} // end namespace llvm
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXAllocaHoisting.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,49 @@
> +//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTX_ALLOCA_HOISTING_H_
> +#define NVPTX_ALLOCA_HOISTING_H_
> +
> +#include "llvm/CodeGen/MachineFunctionAnalysis.h"
> +#include "llvm/Pass.h"
> +#include "llvm/Target/TargetData.h"
> +
> +namespace llvm {
> +
> +class FunctionPass;
> +class Function;
> +
> +// Hoisting the alloca instructions in the non-entry blocks to the entry
> +// block.
> +class NVPTXAllocaHoisting : public FunctionPass {
> +public:
> +  static char ID; // Pass ID
> +  NVPTXAllocaHoisting() : FunctionPass(ID) {}
> +
> +  void getAnalysisUsage(AnalysisUsage &AU) const {
> +    AU.addRequired<TargetData>();
> +    AU.addPreserved<MachineFunctionAnalysis>();
> +  }
> +
> +  virtual const char *getPassName() const {
> +    return "NVPTX specific alloca hoisting";
> +  }
> +
> +  virtual bool runOnFunction(Function &function);
> +};
> +
> +extern FunctionPass *createAllocaHoisting();
> +
> +} // end namespace llvm
> +
> +#endif // NVPTX_ALLOCA_HOISTING_H_
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,2068 @@
> +//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains a printer that converts from our internal representation
> +// of machine-dependent LLVM code to NVPTX assembly language.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTX.h"
> +#include "NVPTXInstrInfo.h"
> +#include "NVPTXTargetMachine.h"
> +#include "NVPTXRegisterInfo.h"
> +#include "NVPTXAsmPrinter.h"
> +#include "MCTargetDesc/NVPTXMCAsmInfo.h"
> +#include "NVPTXNumRegisters.h"
> +#include "../lib/CodeGen/AsmPrinter/DwarfDebug.h"
> +#include "llvm/ADT/StringExtras.h"
> +#include "llvm/GlobalVariable.h"
> +#include "llvm/Function.h"
> +#include "llvm/Module.h"
> +#include "llvm/CodeGen/Analysis.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/MachineModuleInfo.h"
> +#include "llvm/MC/MCStreamer.h"
> +#include "llvm/MC/MCSymbol.h"
> +#include "llvm/Target/Mangler.h"
> +#include "llvm/Target/TargetLoweringObjectFile.h"
> +#include "llvm/Support/TargetRegistry.h"
> +#include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/FormattedStream.h"
> +#include "llvm/DerivedTypes.h"
> +#include "NVPTXUtilities.h"
> +#include "llvm/Support/TimeValue.h"
> +#include <sstream>
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/Analysis/DebugInfo.h"
> +#include "llvm/Analysis/ConstantFolding.h"
> +#include "llvm/Support/Path.h"
> +#include "llvm/Assembly/Writer.h"
> +#include "cl_common_defines.h"
> +
> +
> +using namespace llvm;
> +
> +
> +#include "NVPTXGenAsmWriter.inc"
> +
> +bool RegAllocNilUsed = true;
> +
> +#define DEPOTNAME "__local_depot"
> +
> +static cl::opt<bool>
> +EmitLineNumbers("nvptx-emit-line-numbers",
> +                cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
> +                cl::init(true));
> +
> +namespace llvm  {
> +bool InterleaveSrcInPtx = false;
> +}
> +
> +static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
> +                                        cl::ZeroOrMore,
> +                       cl::desc("NVPTX Specific: Emit source line in ptx file"),
> +                                        cl::location(llvm::InterleaveSrcInPtx));
> +
> +
> +
> +
> +// @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
> +// cannot just link to the existing version.
> +/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
> +///
> +using namespace nvptx;
> +const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
> +  MCContext &Ctx = AP.OutContext;
> +
> +  if (CV->isNullValue() || isa<UndefValue>(CV))
> +    return MCConstantExpr::Create(0, Ctx);
> +
> +  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
> +    return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
> +
> +  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
> +    return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
> +
> +  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
> +    return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
> +
> +  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
> +  if (CE == 0)
> +    llvm_unreachable("Unknown constant value to lower!");
> +
> +
> +  switch (CE->getOpcode()) {
> +  default:
> +    // If the code isn't optimized, there may be outstanding folding
> +    // opportunities. Attempt to fold the expression using TargetData as a
> +    // last resort before giving up.
> +    if (Constant *C =
> +        ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
> +      if (C != CE)
> +        return LowerConstant(C, AP);
> +
> +    // Otherwise report the problem to the user.
> +    {
> +        std::string S;
> +        raw_string_ostream OS(S);
> +        OS << "Unsupported expression in static initializer: ";
> +        WriteAsOperand(OS, CE, /*PrintType=*/false,
> +                       !AP.MF ? 0 : AP.MF->getFunction()->getParent());
> +        report_fatal_error(OS.str());
> +    }
> +  case Instruction::GetElementPtr: {
> +    const TargetData &TD = *AP.TM.getTargetData();
> +    // Generate a symbolic expression for the byte address
> +    const Constant *PtrVal = CE->getOperand(0);
> +    SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
> +    int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
> +
> +    const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
> +    if (Offset == 0)
> +      return Base;
> +
> +    // Truncate/sext the offset to the pointer size.
> +    if (TD.getPointerSizeInBits() != 64) {
> +      int SExtAmount = 64-TD.getPointerSizeInBits();
> +      Offset = (Offset << SExtAmount) >> SExtAmount;
> +    }
> +
> +    return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
> +                                   Ctx);
> +  }
> +
> +  case Instruction::Trunc:
> +    // We emit the value and depend on the assembler to truncate the generated
> +    // expression properly.  This is important for differences between
> +    // blockaddress labels.  Since the two labels are in the same function, it
> +    // is reasonable to treat their delta as a 32-bit value.
> +    // FALL THROUGH.
> +  case Instruction::BitCast:
> +    return LowerConstant(CE->getOperand(0), AP);
> +
> +  case Instruction::IntToPtr: {
> +    const TargetData &TD = *AP.TM.getTargetData();
> +    // Handle casts to pointers by changing them into casts to the appropriate
> +    // integer type.  This promotes constant folding and simplifies this code.
> +    Constant *Op = CE->getOperand(0);
> +    Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
> +                                      false/*ZExt*/);
> +    return LowerConstant(Op, AP);
> +  }
> +
> +  case Instruction::PtrToInt: {
> +    const TargetData &TD = *AP.TM.getTargetData();
> +    // Support only foldable casts to/from pointers that can be eliminated by
> +    // changing the pointer to the appropriately sized integer type.
> +    Constant *Op = CE->getOperand(0);
> +    Type *Ty = CE->getType();
> +
> +    const MCExpr *OpExpr = LowerConstant(Op, AP);
> +
> +    // We can emit the pointer value into this slot if the slot is an
> +    // integer slot equal to the size of the pointer.
> +    if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
> +      return OpExpr;
> +
> +    // Otherwise the pointer is smaller than the resultant integer, mask off
> +    // the high bits so we are sure to get a proper truncation if the input is
> +    // a constant expr.
> +    unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
> +    const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
> +    return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
> +  }
> +
> +  // The MC library also has a right-shift operator, but it isn't consistently
> +  // signed or unsigned between different targets.
> +  case Instruction::Add:
> +  case Instruction::Sub:
> +  case Instruction::Mul:
> +  case Instruction::SDiv:
> +  case Instruction::SRem:
> +  case Instruction::Shl:
> +  case Instruction::And:
> +  case Instruction::Or:
> +  case Instruction::Xor: {
> +    const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
> +    const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
> +    switch (CE->getOpcode()) {
> +    default: llvm_unreachable("Unknown binary operator constant cast expr");
> +    case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
> +    case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
> +    case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
> +    case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
> +    case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
> +    case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
> +    case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
> +    case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
> +    case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
> +    }
> +  }
> +  }
> +}
> +
> +
> +void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
> +{
> +  if (!EmitLineNumbers)
> +    return;
> +  if (ignoreLoc(MI))
> +    return;
> +
> +  DebugLoc curLoc = MI.getDebugLoc();
> +
> +  if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
> +    return;
> +
> +  if (prevDebugLoc == curLoc)
> +    return;
> +
> +  prevDebugLoc = curLoc;
> +
> +  if (curLoc.isUnknown())
> +    return;
> +
> +
> +  const MachineFunction *MF = MI.getParent()->getParent();
> +  //const TargetMachine &TM = MF->getTarget();
> +
> +  const LLVMContext &ctx = MF->getFunction()->getContext();
> +  DIScope Scope(curLoc.getScope(ctx));
> +
> +  if (!Scope.Verify())
> +    return;
> +
> +  StringRef fileName(Scope.getFilename());
> +  StringRef dirName(Scope.getDirectory());
> +  SmallString<128> FullPathName = dirName;
> +  if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
> +    sys::path::append(FullPathName, fileName);
> +    fileName = FullPathName.str();
> +  }
> +
> +  if (filenameMap.find(fileName.str()) == filenameMap.end())
> +    return;
> +
> +
> +  // Emit the line from the source file.
> +  if (llvm::InterleaveSrcInPtx)
> +    this->emitSrcInText(fileName.str(), curLoc.getLine());
> +
> +  std::stringstream temp;
> +  temp << "\t.loc " << filenameMap[fileName.str()]
> +       << " " << curLoc.getLine() << " " << curLoc.getCol();
> +  OutStreamer.EmitRawText(Twine(temp.str().c_str()));
> +}
> +
> +void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
> +  SmallString<128> Str;
> +  raw_svector_ostream OS(Str);
> +  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
> +    emitLineNumberAsDotLoc(*MI);
> +  printInstruction(MI, OS);
> +  OutStreamer.EmitRawText(OS.str());
> +}
> +
> +void NVPTXAsmPrinter::printReturnValStr(const Function *F,
> +                                        raw_ostream &O)
> +{
> +  const TargetData *TD = TM.getTargetData();
> +  const TargetLowering *TLI = TM.getTargetLowering();
> +
> +  Type *Ty = F->getReturnType();
> +
> +  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
> +
> +  if (Ty->getTypeID() == Type::VoidTyID)
> +    return;
> +
> +  O << " (";
> +
> +  if (isABI) {
> +    if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
> +      unsigned size = 0;
> +      if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
> +        size = ITy->getBitWidth();
> +        if (size < 32) size = 32;
> +      } else {
> +        assert(Ty->isFloatingPointTy() &&
> +               "Floating point type expected here");
> +        size = Ty->getPrimitiveSizeInBits();
> +      }
> +
> +      O << ".param .b" << size << " func_retval0";
> +    }
> +    else if (isa<PointerType>(Ty)) {
> +      O << ".param .b" << TLI->getPointerTy().getSizeInBits()
> +            << " func_retval0";
> +    } else {
> +      if ((Ty->getTypeID() == Type::StructTyID) ||
> +          isa<VectorType>(Ty)) {
> +        SmallVector<EVT, 16> vtparts;
> +        ComputeValueVTs(*TLI, Ty, vtparts);
> +        unsigned totalsz = 0;
> +        for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
> +          unsigned elems = 1;
> +          EVT elemtype = vtparts[i];
> +          if (vtparts[i].isVector()) {
> +            elems = vtparts[i].getVectorNumElements();
> +            elemtype = vtparts[i].getVectorElementType();
> +          }
> +          for (unsigned j=0, je=elems; j!=je; ++j) {
> +            unsigned sz = elemtype.getSizeInBits();
> +            if (elemtype.isInteger() && (sz < 8)) sz = 8;
> +            totalsz += sz/8;
> +          }
> +        }
> +        unsigned retAlignment = 0;
> +        if (!llvm::getAlign(*F, 0, retAlignment))
> +          retAlignment = TD->getABITypeAlignment(Ty);
> +        O << ".param .align "
> +            << retAlignment
> +            << " .b8 func_retval0["
> +            << totalsz << "]";
> +      } else
> +        assert(false &&
> +               "Unknown return type");
> +    }
> +  } else {
> +    SmallVector<EVT, 16> vtparts;
> +    ComputeValueVTs(*TLI, Ty, vtparts);
> +    unsigned idx = 0;
> +    for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
> +      unsigned elems = 1;
> +      EVT elemtype = vtparts[i];
> +      if (vtparts[i].isVector()) {
> +        elems = vtparts[i].getVectorNumElements();
> +        elemtype = vtparts[i].getVectorElementType();
> +      }
> +
> +      for (unsigned j=0, je=elems; j!=je; ++j) {
> +        unsigned sz = elemtype.getSizeInBits();
> +        if (elemtype.isInteger() && (sz < 32)) sz = 32;
> +        O << ".reg .b" << sz << " func_retval" << idx;
> +        if (j<je-1) O << ", ";
> +        ++idx;
> +      }
> +      if (i < e-1)
> +        O << ", ";
> +    }
> +  }
> +  O << ") ";
> +  return;
> +}
> +
> +void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
> +                                        raw_ostream &O) {
> +  const Function *F = MF.getFunction();
> +  printReturnValStr(F, O);
> +}
> +
> +void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
> +  SmallString<128> Str;
> +  raw_svector_ostream O(Str);
> +
> +  // Set up
> +  MRI = &MF->getRegInfo();
> +  F = MF->getFunction();
> +  emitLinkageDirective(F,O);
> +  if (llvm::isKernelFunction(*F))
> +    O << ".entry ";
> +  else {
> +    O << ".func ";
> +    printReturnValStr(*MF, O);
> +  }
> +
> +  O << *CurrentFnSym;
> +
> +  emitFunctionParamList(*MF, O);
> +
> +  if (llvm::isKernelFunction(*F))
> +    emitKernelFunctionDirectives(*F, O);
> +
> +  OutStreamer.EmitRawText(O.str());
> +
> +  prevDebugLoc = DebugLoc();
> +}
> +
> +void NVPTXAsmPrinter::EmitFunctionBodyStart() {
> +  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
> +  unsigned numRegClasses = TRI.getNumRegClasses();
> +  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
> +  OutStreamer.EmitRawText(StringRef("{\n"));
> +  setAndEmitFunctionVirtualRegisters(*MF);
> +
> +  SmallString<128> Str;
> +  raw_svector_ostream O(Str);
> +  emitDemotedVars(MF->getFunction(), O);
> +  OutStreamer.EmitRawText(O.str());
> +}
> +
> +void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
> +  OutStreamer.EmitRawText(StringRef("}\n"));
> +  delete []VRidGlobal2LocalMap;
> +}
> +
> +
> +void
> +NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
> +                                              raw_ostream &O) const {
> +  // If the NVVM IR has some of reqntid* specified, then output
> +  // the reqntid directive, and set the unspecified ones to 1.
> +  // If none of reqntid* is specified, don't output reqntid directive.
> +  unsigned reqntidx, reqntidy, reqntidz;
> +  bool specified = false;
> +  if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
> +  else specified = true;
> +  if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
> +  else specified = true;
> +  if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
> +  else specified = true;
> +
> +  if (specified)
> +    O << ".reqntid " << reqntidx << ", "
> +    << reqntidy << ", " << reqntidz << "\n";
> +
> +  // If the NVVM IR has some of maxntid* specified, then output
> +  // the maxntid directive, and set the unspecified ones to 1.
> +  // If none of maxntid* is specified, don't output maxntid directive.
> +  unsigned maxntidx, maxntidy, maxntidz;
> +  specified = false;
> +  if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
> +  else specified = true;
> +  if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
> +  else specified = true;
> +  if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
> +  else specified = true;
> +
> +  if (specified)
> +    O << ".maxntid " << maxntidx << ", "
> +    << maxntidy << ", " << maxntidz << "\n";
> +
> +  unsigned mincta;
> +  if (llvm::getMinCTASm(F, mincta))
> +    O << ".minnctapersm " << mincta << "\n";
> +}
> +
> +void
> +NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
> +                                        raw_ostream &O) {
> +  const TargetRegisterClass * RC = MRI->getRegClass(vr);
> +  unsigned id = RC->getID();
> +
> +  std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
> +  unsigned mapped_vr = regmap[vr];
> +
> +  if (!isVec) {
> +    O << getNVPTXRegClassStr(RC) << mapped_vr;
> +    return;
> +  }
> +  // Vector virtual register
> +  if (getNVPTXVectorSize(RC) == 4)
> +    O << "{"
> +    << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
> +    << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
> +    << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
> +    << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
> +    << "}";
> +  else if (getNVPTXVectorSize(RC) == 2)
> +    O << "{"
> +    << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
> +    << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
> +    << "}";
> +  else
> +    assert(0 && "Unsupported vector size");
> +}
> +
> +void
> +NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
> +                                     raw_ostream &O) {
> +  getVirtualRegisterName(vr, isVec, O);
> +}
> +
> +void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
> +                                                const char *Modifier,
> +                                                raw_ostream &O) {
> +char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
> +  int Imm = (int)MO.getImm();
> +  if(0 == strcmp(Modifier, "vecelem"))
> +    O << "_" << vecelem[Imm];
> +  else if(0 == strcmp(Modifier, "vecv4comm1")) {
> +    if((Imm < 0) || (Imm > 3))
> +      O << "//";
> +  }
> +  else if(0 == strcmp(Modifier, "vecv4comm2")) {
> +    if((Imm < 4) || (Imm > 7))
> +      O << "//";
> +  }
> +  else if(0 == strcmp(Modifier, "vecv4pos")) {
> +    if(Imm < 0) Imm = 0;
> +    O << "_" << vecelem[Imm%4];
> +  }
> +  else if(0 == strcmp(Modifier, "vecv2comm1")) {
> +    if((Imm < 0) || (Imm > 1))
> +      O << "//";
> +  }
> +  else if(0 == strcmp(Modifier, "vecv2comm2")) {
> +    if((Imm < 2) || (Imm > 3))
> +      O << "//";
> +  }
> +  else if(0 == strcmp(Modifier, "vecv2pos")) {
> +    if(Imm < 0) Imm = 0;
> +    O << "_" << vecelem[Imm%2];
> +  }
> +  else
> +    assert(0 && "Unknown Modifier on immediate operand");
> +}
> +
> +void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
> +                                   raw_ostream &O, const char *Modifier) {
> +  const MachineOperand &MO = MI->getOperand(opNum);
> +  switch (MO.getType()) {
> +  case MachineOperand::MO_Register:
> +    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
> +      if (MO.getReg() == NVPTX::VRDepot)
> +        O << DEPOTNAME << getFunctionNumber();
> +      else
> +        O << getRegisterName(MO.getReg());
> +    } else {
> +      if (!Modifier)
> +        emitVirtualRegister(MO.getReg(), false, O);
> +      else {
> +        if (strcmp(Modifier, "vecfull") == 0)
> +          emitVirtualRegister(MO.getReg(), true, O);
> +        else
> +          assert(0 &&
> +                 "Don't know how to handle the modifier on virtual register.");
> +      }
> +    }
> +    return;
> +
> +  case MachineOperand::MO_Immediate:
> +    if (!Modifier)
> +      O << MO.getImm();
> +    else if (strstr(Modifier, "vec") == Modifier)
> +      printVecModifiedImmediate(MO, Modifier, O);
> +    else
> +      assert(0 && "Don't know how to handle modifier on immediate operand");
> +    return;
> +
> +  case MachineOperand::MO_FPImmediate:
> +    printFPConstant(MO.getFPImm(), O);
> +    break;
> +
> +  case MachineOperand::MO_GlobalAddress:
> +    O << *Mang->getSymbol(MO.getGlobal());
> +    break;
> +
> +  case MachineOperand::MO_ExternalSymbol: {
> +    const char * symbname = MO.getSymbolName();
> +    if (strstr(symbname, ".PARAM") == symbname) {
> +      unsigned index;
> +      sscanf(symbname+6, "%u[];", &index);
> +      printParamName(index, O);
> +    }
> +    else if (strstr(symbname, ".HLPPARAM") == symbname) {
> +      unsigned index;
> +      sscanf(symbname+9, "%u[];", &index);
> +      O << *CurrentFnSym << "_param_" << index << "_offset";
> +    }
> +    else
> +      O << symbname;
> +    break;
> +  }
> +
> +  case MachineOperand::MO_MachineBasicBlock:
> +    O << *MO.getMBB()->getSymbol();
> +    return;
> +
> +  default:
> +    assert(0 && " Operand type not supported.");
> +  }
> +}
> +
> +void NVPTXAsmPrinter::
> +printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
> +#ifndef __OPTIMIZE__
> +  O << "\t// Implicit def :";
> +  //printOperand(MI, 0);
> +  O << "\n";
> +#endif
> +}
> +
> +void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
> +                                      raw_ostream &O, const char *Modifier) {
> +  printOperand(MI, opNum, O);
> +
> +  if (Modifier && !strcmp(Modifier, "add")) {
> +    O << ", ";
> +    printOperand(MI, opNum+1, O);
> +  } else {
> +    if (MI->getOperand(opNum+1).isImm() &&
> +        MI->getOperand(opNum+1).getImm() == 0)
> +      return; // don't print ',0' or '+0'
> +    O << "+";
> +    printOperand(MI, opNum+1, O);
> +  }
> +}
> +
> +void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
> +                                    raw_ostream &O, const char *Modifier)
> +{
> +  if (Modifier) {
> +    const MachineOperand &MO = MI->getOperand(opNum);
> +    int Imm = (int)MO.getImm();
> +    if (!strcmp(Modifier, "volatile")) {
> +      if (Imm)
> +        O << ".volatile";
> +    } else if (!strcmp(Modifier, "addsp")) {
> +      switch (Imm) {
> +      case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
> +      case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
> +      case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
> +      case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
> +      case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
> +      case NVPTX::PTXLdStInstCode::GENERIC:
> +        if (!nvptxSubtarget.hasGenericLdSt())
> +          O << ".global";
> +        break;
> +      default:
> +        assert("wrong value");
> +      }
> +    }
> +    else if (!strcmp(Modifier, "sign")) {
> +      if (Imm==NVPTX::PTXLdStInstCode::Signed)
> +        O << "s";
> +      else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
> +        O << "u";
> +      else
> +        O << "f";
> +    }
> +    else if (!strcmp(Modifier, "vec")) {
> +      if (Imm==NVPTX::PTXLdStInstCode::V2)
> +        O << ".v2";
> +      else if (Imm==NVPTX::PTXLdStInstCode::V4)
> +        O << ".v4";
> +    }
> +    else
> +      assert("unknown modifier");
> +  }
> +  else
> +    assert("unknown modifier");
> +}
> +
> +void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
> +
> +  emitLinkageDirective(F,O);
> +  if (llvm::isKernelFunction(*F))
> +    O << ".entry ";
> +  else
> +    O << ".func ";
> +  printReturnValStr(F, O);
> +  O << *CurrentFnSym << "\n";
> +  emitFunctionParamList(F, O);
> +  O << ";\n";
> +}
> +
> +static bool usedInGlobalVarDef(const Constant *C)
> +{
> +  if (!C)
> +    return false;
> +
> +  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
> +    if (GV->getName().str() == "llvm.used")
> +      return false;
> +    return true;
> +  }
> +
> +  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
> +      ui!=ue; ++ui) {
> +    const Constant *C = dyn_cast<Constant>(*ui);
> +    if (usedInGlobalVarDef(C))
> +      return true;
> +  }
> +  return false;
> +}
> +
> +static bool usedInOneFunc(const User *U, Function const *&oneFunc)
> +{
> +  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
> +    if (othergv->getName().str() == "llvm.used")
> +      return true;
> +  }
> +
> +  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
> +    if (instr->getParent() && instr->getParent()->getParent()) {
> +      const Function *curFunc = instr->getParent()->getParent();
> +      if (oneFunc && (curFunc != oneFunc))
> +        return false;
> +      oneFunc = curFunc;
> +      return true;
> +    }
> +    else
> +      return false;
> +  }
> +
> +  if (const MDNode *md = dyn_cast<MDNode>(U))
> +    if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
> +        (md->getName().str() == "llvm.dbg.sp")))
> +      return true;
> +
> +
> +  for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
> +      ui!=ue; ++ui) {
> +    if (usedInOneFunc(*ui, oneFunc) == false)
> +      return false;
> +  }
> +  return true;
> +}
> +
> +/* Find out if a global variable can be demoted to local scope.
> + * Currently, this is valid for CUDA shared variables, which have local
> + * scope and global lifetime. So the conditions to check are :
> + * 1. Is the global variable in shared address space?
> + * 2. Does it have internal linkage?
> + * 3. Is the global variable referenced only in one function?
> + */
> +static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
> +  if (gv->hasInternalLinkage() == false)
> +    return false;
> +  const PointerType *Pty = gv->getType();
> +  if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
> +    return false;
> +
> +  const Function *oneFunc = 0;
> +
> +  bool flag = usedInOneFunc(gv, oneFunc);
> +  if (flag == false)
> +    return false;
> +  if (!oneFunc)
> +    return false;
> +  f = oneFunc;
> +  return true;
> +}
> +
> +static bool useFuncSeen(const Constant *C,
> +                        llvm::DenseMap<const Function *, bool> &seenMap) {
> +  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
> +      ui!=ue; ++ui) {
> +    if (const Constant *cu = dyn_cast<Constant>(*ui)) {
> +      if (useFuncSeen(cu, seenMap))
> +        return true;
> +    } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
> +      const BasicBlock *bb = I->getParent();
> +      if (!bb) continue;
> +      const Function *caller = bb->getParent();
> +      if (!caller) continue;
> +      if (seenMap.find(caller) != seenMap.end())
> +        return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
> +  llvm::DenseMap<const Function *, bool> seenMap;
> +  for (Module::const_iterator FI=M.begin(), FE=M.end();
> +      FI!=FE; ++FI) {
> +    const Function *F = FI;
> +
> +    if (F->isDeclaration()) {
> +      if (F->use_empty())
> +        continue;
> +      if (F->getIntrinsicID())
> +        continue;
> +      CurrentFnSym = Mang->getSymbol(F);
> +      emitDeclaration(F, O);
> +      continue;
> +    }
> +    for (Value::const_use_iterator iter=F->use_begin(),
> +        iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
> +      if (const Constant *C = dyn_cast<Constant>(*iter)) {
> +        if (usedInGlobalVarDef(C)) {
> +          // The use is in the initialization of a global variable
> +          // that is a function pointer, so print a declaration
> +          // for the original function
> +          CurrentFnSym = Mang->getSymbol(F);
> +          emitDeclaration(F, O);
> +          break;
> +        }
> +        // Emit a declaration of this function if the function that
> +        // uses this constant expr has already been seen.
> +        if (useFuncSeen(C, seenMap)) {
> +          CurrentFnSym = Mang->getSymbol(F);
> +          emitDeclaration(F, O);
> +          break;
> +        }
> +      }
> +
> +      if (!isa<Instruction>(*iter)) continue;
> +      const Instruction *instr = cast<Instruction>(*iter);
> +      const BasicBlock *bb = instr->getParent();
> +      if (!bb) continue;
> +      const Function *caller = bb->getParent();
> +      if (!caller) continue;
> +
> +      // If a caller has already been seen, then the caller is
> +      // appearing in the module before the callee. so print out
> +      // a declaration for the callee.
> +      if (seenMap.find(caller) != seenMap.end()) {
> +        CurrentFnSym = Mang->getSymbol(F);
> +        emitDeclaration(F, O);
> +        break;
> +      }
> +    }
> +    seenMap[F] = true;
> +  }
> +}
> +
> +void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
> +  DebugInfoFinder DbgFinder;
> +  DbgFinder.processModule(M);
> +
> +  unsigned i=1;
> +  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
> +      E = DbgFinder.compile_unit_end(); I != E; ++I) {
> +    DICompileUnit DIUnit(*I);
> +    StringRef Filename(DIUnit.getFilename());
> +    StringRef Dirname(DIUnit.getDirectory());
> +    SmallString<128> FullPathName = Dirname;
> +    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
> +      sys::path::append(FullPathName, Filename);
> +      Filename = FullPathName.str();
> +    }
> +    if (filenameMap.find(Filename.str()) != filenameMap.end())
> +      continue;
> +    filenameMap[Filename.str()] = i;
> +    OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
> +    ++i;
> +  }
> +
> +  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
> +      E = DbgFinder.subprogram_end(); I != E; ++I) {
> +    DISubprogram SP(*I);
> +    StringRef Filename(SP.getFilename());
> +    StringRef Dirname(SP.getDirectory());
> +    SmallString<128> FullPathName = Dirname;
> +    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
> +      sys::path::append(FullPathName, Filename);
> +      Filename = FullPathName.str();
> +    }
> +    if (filenameMap.find(Filename.str()) != filenameMap.end())
> +      continue;
> +    filenameMap[Filename.str()] = i;
> +    ++i;
> +  }
> +}
> +
> +bool NVPTXAsmPrinter::doInitialization (Module &M) {
> +
> +  SmallString<128> Str1;
> +  raw_svector_ostream OS1(Str1);
> +
> +  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
> +  MMI->AnalyzeModule(M);
> +
> +  // We need to call the parent's one explicitly.
> +  //bool Result = AsmPrinter::doInitialization(M);
> +
> +  // Initialize TargetLoweringObjectFile.
> +  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
> +          .Initialize(OutContext, TM);
> +
> +  Mang = new Mangler(OutContext, *TM.getTargetData());
> +
> +  // Emit header before any dwarf directives are emitted below.
> +  emitHeader(M, OS1);
> +  OutStreamer.EmitRawText(OS1.str());
> +
> +
> +  // Already commented out
> +  //bool Result = AsmPrinter::doInitialization(M);
> +
> +
> +  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
> +    recordAndEmitFilenames(M);
> +
> +  SmallString<128> Str2;
> +  raw_svector_ostream OS2(Str2);
> +
> +  emitDeclarations(M, OS2);
> +
> +  // Print out module-level global variables here.
> +  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
> +      I != E; ++I)
> +    printModuleLevelGV(I, OS2);
> +
> +  OS2 << '\n';
> +
> +  OutStreamer.EmitRawText(OS2.str());
> +  return false;  // success
> +}
> +
> +void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
> +  O << "//\n";
> +  O << "// Generated by LLVM NVPTX Back-End\n";
> +  O << "//\n";
> +  O << "\n";
> +
> +  O << ".version 3.0\n";
> +
> +  O << ".target ";
> +  O << nvptxSubtarget.getTargetName();
> +
> +  if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
> +    O << ", texmode_independent";
> +  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
> +    if (!nvptxSubtarget.hasDouble())
> +      O << ", map_f64_to_f32";
> +  }
> +
> +  if (MAI->doesSupportDebugInformation())
> +    O << ", debug";
> +
> +  O << "\n";
> +
> +  O << ".address_size ";
> +  if (nvptxSubtarget.is64Bit())
> +    O << "64";
> +  else
> +    O << "32";
> +  O << "\n";
> +
> +  O << "\n";
> +}
> +
> +bool NVPTXAsmPrinter::doFinalization(Module &M) {
> +  // XXX Temproarily remove global variables so that doFinalization() will not
> +  // emit them again (global variables are emitted at beginning).
> +
> +  Module::GlobalListType &global_list = M.getGlobalList();
> +  int i, n = global_list.size();
> +  GlobalVariable **gv_array = new GlobalVariable* [n];
> +
> +  // first, back-up GlobalVariable in gv_array
> +  i = 0;
> +  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
> +      I != E; ++I)
> +    gv_array[i++] = &*I;
> +
> +  // second, empty global_list
> +  while (!global_list.empty())
> +    global_list.remove(global_list.begin());
> +
> +  // call doFinalization
> +  bool ret = AsmPrinter::doFinalization(M);
> +
> +  // now we restore global variables
> +  for (i = 0; i < n; i ++)
> +    global_list.insert(global_list.end(), gv_array[i]);
> +
> +  delete[] gv_array;
> +  return ret;
> +
> +
> +  //bool Result = AsmPrinter::doFinalization(M);
> +  // Instead of calling the parents doFinalization, we may
> +  // clone parents doFinalization and customize here.
> +  // Currently, we if NVISA out the EmitGlobals() in
> +  // parent's doFinalization, which is too intrusive.
> +  //
> +  // Same for the doInitialization.
> +  //return Result;
> +}
> +
> +// This function emits appropriate linkage directives for
> +// functions and global variables.
> +//
> +// extern function declaration            -> .extern
> +// extern function definition             -> .visible
> +// external global variable with init     -> .visible
> +// external without init                  -> .extern
> +// appending                              -> not allowed, assert.
> +
> +void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
> +{
> +  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
> +    if (V->hasExternalLinkage()) {
> +      if (isa<GlobalVariable>(V)) {
> +        const GlobalVariable *GVar = cast<GlobalVariable>(V);
> +        if (GVar) {
> +          if (GVar->hasInitializer())
> +            O << ".visible ";
> +          else
> +            O << ".extern ";
> +        }
> +      } else if (V->isDeclaration())
> +        O << ".extern ";
> +      else
> +        O << ".visible ";
> +    } else if (V->hasAppendingLinkage()) {
> +      std::string msg;
> +      msg.append("Error: ");
> +      msg.append("Symbol ");
> +      if (V->hasName())
> +        msg.append(V->getName().str());
> +      msg.append("has unsupported appending linkage type");
> +      llvm_unreachable(msg.c_str());
> +    }
> +  }
> +}
> +
> +
> +void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
> +                                         bool processDemoted) {
> +
> +  // Skip meta data
> +  if (GVar->hasSection()) {
> +    if (GVar->getSection() == "llvm.metadata")
> +      return;
> +  }
> +
> +  const TargetData *TD = TM.getTargetData();
> +
> +  // GlobalVariables are always constant pointers themselves.
> +  const PointerType *PTy = GVar->getType();
> +  Type *ETy = PTy->getElementType();
> +
> +  if (GVar->hasExternalLinkage()) {
> +    if (GVar->hasInitializer())
> +      O << ".visible ";
> +    else
> +      O << ".extern ";
> +  }
> +
> +  if (llvm::isTexture(*GVar)) {
> +    O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
> +    return;
> +  }
> +
> +  if (llvm::isSurface(*GVar)) {
> +    O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
> +    return;
> +  }
> +
> +  if (GVar->isDeclaration()) {
> +    // (extern) declarations, no definition or initializer
> +    // Currently the only known declaration is for an automatic __local
> +    // (.shared) promoted to global.
> +    emitPTXGlobalVariable(GVar, O);
> +    O << ";\n";
> +    return;
> +  }
> +
> +  if (llvm::isSampler(*GVar)) {
> +    O << ".global .samplerref " << llvm::getSamplerName(*GVar);
> +
> +    Constant *Initializer = NULL;
> +    if (GVar->hasInitializer())
> +      Initializer = GVar->getInitializer();
> +    ConstantInt *CI = NULL;
> +    if (Initializer)
> +      CI = dyn_cast<ConstantInt>(Initializer);
> +    if (CI) {
> +      unsigned sample=CI->getZExtValue();
> +
> +      O << " = { ";
> +
> +      for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
> +          __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
> +        O << "addr_mode_" << i << " = ";
> +        switch (addr) {
> +        case 0: O << "wrap"; break;
> +        case 1: O << "clamp_to_border"; break;
> +        case 2: O << "clamp_to_edge"; break;
> +        case 3: O << "wrap"; break;
> +        case 4: O << "mirror"; break;
> +        }
> +        O <<", ";
> +      }
> +      O << "filter_mode = ";
> +      switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
> +      case 0: O << "nearest"; break;
> +      case 1: O << "linear";  break;
> +      case 2: assert ( 0 && "Anisotropic filtering is not supported");
> +      default: O << "nearest"; break;
> +      }
> +      if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
> +        O << ", force_unnormalized_coords = 1";
> +      }
> +      O << " }";
> +    }
> +
> +    O << ";\n";
> +    return;
> +  }
> +
> +  if (GVar->hasPrivateLinkage()) {
> +
> +    if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
> +      return;
> +
> +    // FIXME - need better way (e.g. Metadata) to avoid generating this global
> +    if (!strncmp(GVar->getName().data(), "filename", 8))
> +      return;
> +    if (GVar->use_empty())
> +      return;
> +  }
> +
> +  const Function *demotedFunc = 0;
> +  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
> +    O << "// " << GVar->getName().str() << " has been demoted\n";
> +    if (localDecls.find(demotedFunc) != localDecls.end())
> +      localDecls[demotedFunc].push_back(GVar);
> +    else {
> +      std::vector<GlobalVariable *> temp;
> +      temp.push_back(GVar);
> +      localDecls[demotedFunc] = temp;
> +    }
> +    return;
> +  }
> +
> +  O << ".";
> +  emitPTXAddressSpace(PTy->getAddressSpace(), O);
> +  if (GVar->getAlignment() == 0)
> +    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
> +  else
> +    O << " .align " << GVar->getAlignment();
> +
> +
> +  if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
> +    O << " .";
> +    O << getPTXFundamentalTypeStr(ETy, false);
> +    O << " ";
> +    O << *Mang->getSymbol(GVar);
> +
> +    // Ptx allows variable initilization only for constant and global state
> +    // spaces.
> +    if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
> +        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
> +        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
> +        && GVar->hasInitializer()) {
> +      Constant *Initializer = GVar->getInitializer();
> +      if (!Initializer->isNullValue()) {
> +        O << " = " ;
> +        printScalarConstant(Initializer, O);
> +      }
> +    }
> +  } else {
> +    unsigned int ElementSize =0;
> +
> +    // Although PTX has direct support for struct type and array type and
> +    // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
> +    // targets that support these high level field accesses. Structs, arrays
> +    // and vectors are lowered into arrays of bytes.
> +    switch (ETy->getTypeID()) {
> +    case Type::StructTyID:
> +    case Type::ArrayTyID:
> +    case Type::VectorTyID:
> +      ElementSize = TD->getTypeStoreSize(ETy);
> +      // Ptx allows variable initilization only for constant and
> +      // global state spaces.
> +      if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
> +          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
> +          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
> +          && GVar->hasInitializer()) {
> +        Constant *Initializer = GVar->getInitializer();
> +        if (!isa<UndefValue>(Initializer) &&
> +            !Initializer->isNullValue()) {
> +          AggBuffer aggBuffer(ElementSize, O, *this);
> +          bufferAggregateConstant(Initializer, &aggBuffer);
> +          if (aggBuffer.numSymbols) {
> +            if (nvptxSubtarget.is64Bit()) {
> +              O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
> +              O << ElementSize/8;
> +            }
> +            else {
> +              O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
> +              O << ElementSize/4;
> +            }
> +            O << "]";
> +          }
> +          else {
> +            O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
> +            O << ElementSize;
> +            O << "]";
> +          }
> +          O << " = {" ;
> +          aggBuffer.print();
> +          O << "}";
> +        }
> +        else {
> +          O << " .b8 " << *Mang->getSymbol(GVar) ;
> +          if (ElementSize) {
> +            O <<"[" ;
> +            O << ElementSize;
> +            O << "]";
> +          }
> +        }
> +      }
> +      else {
> +        O << " .b8 " << *Mang->getSymbol(GVar);
> +        if (ElementSize) {
> +          O <<"[" ;
> +          O << ElementSize;
> +          O << "]";
> +        }
> +      }
> +      break;
> +    default:
> +      assert( 0 && "type not supported yet");
> +    }
> +
> +  }
> +  O << ";\n";
> +}
> +
> +void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
> +  if (localDecls.find(f) == localDecls.end())
> +    return;
> +
> +  std::vector<GlobalVariable *> &gvars = localDecls[f];
> +
> +  for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
> +    O << "\t// demoted variable\n\t";
> +    printModuleLevelGV(gvars[i], O, true);
> +  }
> +}
> +
> +void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
> +                                          raw_ostream &O) const {
> +  switch (AddressSpace) {
> +  case llvm::ADDRESS_SPACE_LOCAL:
> +    O << "local" ;
> +    break;
> +  case llvm::ADDRESS_SPACE_GLOBAL:
> +    O << "global" ;
> +    break;
> +  case llvm::ADDRESS_SPACE_CONST:
> +    // This logic should be consistent with that in
> +    // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
> +    if (nvptxSubtarget.hasGenericLdSt())
> +      O << "global" ;
> +    else
> +      O << "const" ;
> +    break;
> +  case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
> +    O << "const" ;
> +    break;
> +  case llvm::ADDRESS_SPACE_SHARED:
> +    O << "shared" ;
> +    break;
> +  default:
> +    assert(0 && "unexpected address space");
> +  }
> +}
> +
> +std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
> +                                                      bool useB4PTR) const {
> +  switch (Ty->getTypeID()) {
> +  default:
> +    llvm_unreachable("unexpected type");
> +    break;
> +  case Type::IntegerTyID: {
> +    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
> +    if (NumBits == 1)
> +      return "pred";
> +    else if (NumBits <= 64) {
> +      std::string name = "u";
> +      return name + utostr(NumBits);
> +    } else {
> +      llvm_unreachable("Integer too large");
> +      break;
> +    }
> +    break;
> +  }
> +  case Type::FloatTyID:
> +    return "f32";
> +  case Type::DoubleTyID:
> +    return "f64";
> +  case Type::PointerTyID:
> +    if (nvptxSubtarget.is64Bit())
> +      if (useB4PTR) return "b64";
> +      else return "u64";
> +    else
> +      if (useB4PTR) return "b32";
> +      else return "u32";
> +  }
> +  llvm_unreachable("unexpected type");
> +  return NULL;
> +}
> +
> +void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
> +                                            raw_ostream &O) {
> +
> +  const TargetData *TD = TM.getTargetData();
> +
> +  // GlobalVariables are always constant pointers themselves.
> +  const PointerType *PTy = GVar->getType();
> +  Type *ETy = PTy->getElementType();
> +
> +  O << ".";
> +  emitPTXAddressSpace(PTy->getAddressSpace(), O);
> +  if (GVar->getAlignment() == 0)
> +    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
> +  else
> +    O << " .align " << GVar->getAlignment();
> +
> +  if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
> +    O << " .";
> +    O << getPTXFundamentalTypeStr(ETy);
> +    O << " ";
> +    O << *Mang->getSymbol(GVar);
> +    return;
> +  }
> +
> +  int64_t ElementSize =0;
> +
> +  // Although PTX has direct support for struct type and array type and LLVM IR
> +  // is very similar to PTX, the LLVM CodeGen does not support for targets that
> +  // support these high level field accesses. Structs and arrays are lowered
> +  // into arrays of bytes.
> +  switch (ETy->getTypeID()) {
> +  case Type::StructTyID:
> +  case Type::ArrayTyID:
> +  case Type::VectorTyID:
> +    ElementSize = TD->getTypeStoreSize(ETy);
> +    O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
> +    if (ElementSize) {
> +      O << itostr(ElementSize) ;
> +    }
> +    O << "]";
> +    break;
> +  default:
> +    assert( 0 && "type not supported yet");
> +  }
> +  return ;
> +}
> +
> +
> +static unsigned int
> +getOpenCLAlignment(const TargetData *TD,
> +                   Type *Ty) {
> +  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
> +    return TD->getPrefTypeAlignment(Ty);
> +
> +  const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
> +  if (ATy)
> +    return getOpenCLAlignment(TD, ATy->getElementType());
> +
> +  const VectorType *VTy = dyn_cast<VectorType>(Ty);
> +  if (VTy) {
> +    Type *ETy = VTy->getElementType();
> +    unsigned int numE = VTy->getNumElements();
> +    unsigned int alignE = TD->getPrefTypeAlignment(ETy);
> +    if (numE == 3)
> +      return 4*alignE;
> +    else
> +      return numE*alignE;
> +  }
> +
> +  const StructType *STy = dyn_cast<StructType>(Ty);
> +  if (STy) {
> +    unsigned int alignStruct = 1;
> +    // Go through each element of the struct and find the
> +    // largest alignment.
> +    for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
> +      Type *ETy = STy->getElementType(i);
> +      unsigned int align = getOpenCLAlignment(TD, ETy);
> +      if (align > alignStruct)
> +        alignStruct = align;
> +    }
> +    return alignStruct;
> +  }
> +
> +  const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
> +  if (FTy)
> +    return TD->getPointerPrefAlignment();
> +  return TD->getPrefTypeAlignment(Ty);
> +}
> +
> +void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
> +                                     int paramIndex, raw_ostream &O) {
> +  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
> +      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
> +    O << *CurrentFnSym << "_param_" << paramIndex;
> +  else {
> +    std::string argName = I->getName();
> +    const char *p = argName.c_str();
> +    while (*p) {
> +      if (*p == '.')
> +        O << "_";
> +      else
> +        O << *p;
> +      p++;
> +    }
> +  }
> +}
> +
> +void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
> +  Function::const_arg_iterator I, E;
> +  int i = 0;
> +
> +  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
> +      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
> +    O << *CurrentFnSym << "_param_" << paramIndex;
> +    return;
> +  }
> +
> +  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
> +    if (i==paramIndex) {
> +      printParamName(I, paramIndex, O);
> +      return;
> +    }
> +  }
> +  llvm_unreachable("paramIndex out of bound");
> +}
> +
> +void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
> +                                            raw_ostream &O) {
> +  const TargetData *TD = TM.getTargetData();
> +  const AttrListPtr &PAL = F->getAttributes();
> +  const TargetLowering *TLI = TM.getTargetLowering();
> +  Function::const_arg_iterator I, E;
> +  unsigned paramIndex = 0;
> +  bool first = true;
> +  bool isKernelFunc = llvm::isKernelFunction(*F);
> +  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
> +  MVT thePointerTy = TLI->getPointerTy();
> +
> +  O << "(\n";
> +
> +  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
> +    const Type *Ty = I->getType();
> +
> +    if (!first)
> +      O << ",\n";
> +
> +    first = false;
> +
> +    // Handle image/sampler parameters
> +    if (llvm::isSampler(*I) || llvm::isImage(*I)) {
> +      if (llvm::isImage(*I)) {
> +        std::string sname = I->getName();
> +        if (llvm::isImageWriteOnly(*I))
> +          O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
> +        else // Default image is read_only
> +          O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
> +      }
> +      else // Should be llvm::isSampler(*I)
> +        O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
> +        << paramIndex;
> +      continue;
> +    }
> +
> +    if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) {
> +      // Just a scalar
> +      const PointerType *PTy = dyn_cast<PointerType>(Ty);
> +      if (isKernelFunc) {
> +        if (PTy) {
> +          // Special handling for pointer arguments to kernel
> +          O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
> +
> +          if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
> +            Type *ETy = PTy->getElementType();
> +            int addrSpace = PTy->getAddressSpace();
> +            switch(addrSpace) {
> +            default:
> +              O << ".ptr ";
> +              break;
> +            case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
> +              O << ".ptr .const ";
> +              break;
> +            case llvm::ADDRESS_SPACE_SHARED:
> +              O << ".ptr .shared ";
> +              break;
> +            case llvm::ADDRESS_SPACE_GLOBAL:
> +            case llvm::ADDRESS_SPACE_CONST:
> +              O << ".ptr .global ";
> +              break;
> +            }
> +            O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
> +          }
> +          printParamName(I, paramIndex, O);
> +          continue;
> +        }
> +
> +        // non-pointer scalar to kernel func
> +        O << "\t.param ."
> +            << getPTXFundamentalTypeStr(Ty) << " ";
> +        printParamName(I, paramIndex, O);
> +        continue;
> +      }
> +      // Non-kernel function, just print .param .b<size> for ABI
> +      // and .reg .b<size> for non ABY
> +      unsigned sz = 0;
> +      if (isa<IntegerType>(Ty)) {
> +        sz = cast<IntegerType>(Ty)->getBitWidth();
> +        if (sz < 32) sz = 32;
> +      }
> +      else if (isa<PointerType>(Ty))
> +        sz = thePointerTy.getSizeInBits();
> +      else
> +        sz = Ty->getPrimitiveSizeInBits();
> +      if (isABI)
> +        O << "\t.param .b" << sz << " ";
> +      else
> +        O << "\t.reg .b" << sz << " ";
> +      printParamName(I, paramIndex, O);
> +      continue;
> +    }
> +
> +    // param has byVal attribute. So should be a pointer
> +    const PointerType *PTy = dyn_cast<PointerType>(Ty);
> +    assert(PTy &&
> +           "Param with byval attribute should be a pointer type");
> +    Type *ETy = PTy->getElementType();
> +
> +    if (isABI || isKernelFunc) {
> +      // Just print .param .b8 .align <a> .param[size];
> +      // <a> = PAL.getparamalignment
> +      // size = typeallocsize of element type
> +      unsigned align = PAL.getParamAlignment(paramIndex+1);
> +      unsigned sz = TD->getTypeAllocSize(ETy);
> +      O << "\t.param .align " << align
> +          << " .b8 ";
> +      printParamName(I, paramIndex, O);
> +      O << "[" << sz << "]";
> +      continue;
> +    } else {
> +      // Split the ETy into constituent parts and
> +      // print .param .b<size> <name> for each part.
> +      // Further, if a part is vector, print the above for
> +      // each vector element.
> +      SmallVector<EVT, 16> vtparts;
> +      ComputeValueVTs(*TLI, ETy, vtparts);
> +      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
> +        unsigned elems = 1;
> +        EVT elemtype = vtparts[i];
> +        if (vtparts[i].isVector()) {
> +          elems = vtparts[i].getVectorNumElements();
> +          elemtype = vtparts[i].getVectorElementType();
> +        }
> +
> +        for (unsigned j=0,je=elems; j!=je; ++j) {
> +          unsigned sz = elemtype.getSizeInBits();
> +          if (elemtype.isInteger() && (sz < 32)) sz = 32;
> +          O << "\t.reg .b" << sz << " ";
> +          printParamName(I, paramIndex, O);
> +          if (j<je-1) O << ",\n";
> +          ++paramIndex;
> +        }
> +        if (i<e-1)
> +          O << ",\n";
> +      }
> +      --paramIndex;
> +      continue;
> +    }
> +  }
> +
> +  O << "\n)\n";
> +}
> +
> +void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
> +                                            raw_ostream &O) {
> +  const Function *F = MF.getFunction();
> +  emitFunctionParamList(F, O);
> +}
> +
> +
> +void NVPTXAsmPrinter::
> +setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
> +  SmallString<128> Str;
> +  raw_svector_ostream O(Str);
> +
> +  // Map the global virtual register number to a register class specific
> +  // virtual register number starting from 1 with that class.
> +  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
> +  //unsigned numRegClasses = TRI->getNumRegClasses();
> +
> +  // Emit the Fake Stack Object
> +  const MachineFrameInfo *MFI = MF.getFrameInfo();
> +  int NumBytes = (int) MFI->getStackSize();
> +  if (NumBytes) {
> +    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
> +        << DEPOTNAME
> +        << getFunctionNumber() << "[" << NumBytes << "];\n";
> +    if (nvptxSubtarget.is64Bit()) {
> +      O << "\t.reg .b64 \t%SP;\n";
> +      O << "\t.reg .b64 \t%SPL;\n";
> +    }
> +    else {
> +      O << "\t.reg .b32 \t%SP;\n";
> +      O << "\t.reg .b32 \t%SPL;\n";
> +    }
> +  }
> +
> +  // Go through all virtual registers to establish the mapping between the
> +  // global virtual
> +  // register number and the per class virtual register number.
> +  // We use the per class virtual register number in the ptx output.
> +  unsigned int numVRs = MRI->getNumVirtRegs();
> +  for (unsigned i=0; i< numVRs; i++) {
> +    unsigned int vr = TRI->index2VirtReg(i);
> +    const TargetRegisterClass *RC = MRI->getRegClass(vr);
> +    std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
> +    int n = regmap.size();
> +    regmap.insert(std::make_pair(vr, n+1));
> +  }
> +
> +  // Emit register declarations
> +  // @TODO: Extract out the real register usage
> +  O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
> +  O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
> +  O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
> +  O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
> +  O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
> +  O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
> +  O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
> +
> +  // Emit declaration of the virtual registers or 'physical' registers for
> +  // each register class
> +  //for (unsigned i=0; i< numRegClasses; i++) {
> +  //    std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[i];
> +  //    const TargetRegisterClass *RC = TRI->getRegClass(i);
> +  //    std::string rcname = getNVPTXRegClassName(RC);
> +  //    std::string rcStr = getNVPTXRegClassStr(RC);
> +  //    //int n = regmap.size();
> +  //    if (!isNVPTXVectorRegClass(RC)) {
> +  //      O << "\t.reg " << rcname << " \t" << rcStr << "<"
> +  //        << NVPTXNumRegisters << ">;\n";
> +  //    }
> +
> +  // Only declare those registers that may be used. And do not emit vector
> +  // registers as
> +  // they are all elementized to scalar registers.
> +  //if (n && !isNVPTXVectorRegClass(RC)) {
> +  //    if (RegAllocNilUsed) {
> +  //        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
> +  //          << ">;\n";
> +  //    }
> +  //    else {
> +  //        O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
> +  //          << "<" << 32 << ">;\n";
> +  //    }
> +  //}
> +  //}
> +
> +  OutStreamer.EmitRawText(O.str());
> +}
> +
> +
> +void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
> +  APFloat APF = APFloat(Fp->getValueAPF());  // make a copy
> +  bool ignored;
> +  unsigned int numHex;
> +  const char *lead;
> +
> +  if (Fp->getType()->getTypeID()==Type::FloatTyID) {
> +    numHex = 8;
> +    lead = "0f";
> +    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
> +                &ignored);
> +  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
> +    numHex = 16;
> +    lead = "0d";
> +    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
> +                &ignored);
> +  } else
> +    llvm_unreachable("unsupported fp type");
> +
> +  APInt API = APF.bitcastToAPInt();
> +  std::string hexstr(utohexstr(API.getZExtValue()));
> +  O << lead;
> +  if (hexstr.length() < numHex)
> +    O << std::string(numHex - hexstr.length(), '0');
> +  O << utohexstr(API.getZExtValue());
> +}
> +
> +void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
> +  if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
> +    O << CI->getValue();
> +    return;
> +  }
> +  if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
> +    printFPConstant(CFP, O);
> +    return;
> +  }
> +  if (isa<ConstantPointerNull>(CPV)) {
> +    O << "0";
> +    return;
> +  }
> +  if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
> +    O << *Mang->getSymbol(GVar);
> +    return;
> +  }
> +  if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
> +    Value *v = Cexpr->stripPointerCasts();
> +    if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
> +      O << *Mang->getSymbol(GVar);
> +      return;
> +    } else {
> +      O << *LowerConstant(CPV, *this);
> +      return;
> +    }
> +  }
> +  llvm_unreachable("Not scalar type found in printScalarConstant()");
> +}
> +
> +
> +void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
> +                                   AggBuffer *aggBuffer) {
> +
> +  const TargetData *TD = TM.getTargetData();
> +
> +  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
> +    int s = TD->getTypeAllocSize(CPV->getType());
> +    if (s<Bytes)
> +      s = Bytes;
> +    aggBuffer->addZeros(s);
> +    return;
> +  }
> +
> +  unsigned char *ptr;
> +  switch (CPV->getType()->getTypeID()) {
> +
> +  case Type::IntegerTyID: {
> +    const Type *ETy = CPV->getType();
> +    if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
> +      unsigned char c =
> +          (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
> +      ptr = &c;
> +      aggBuffer->addBytes(ptr, 1, Bytes);
> +    } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
> +      short int16 =
> +          (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
> +      ptr = (unsigned char*)&int16;
> +      aggBuffer->addBytes(ptr, 2, Bytes);
> +    } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
> +      if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
> +        int int32 =(int)(constInt->getZExtValue());
> +        ptr = (unsigned char*)&int32;
> +        aggBuffer->addBytes(ptr, 4, Bytes);
> +        break;
> +      }
> +      else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
> +        if (ConstantInt *constInt =
> +            dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
> +                Cexpr, TD))) {
> +          int int32 =(int)(constInt->getZExtValue());
> +          ptr = (unsigned char*)&int32;
> +          aggBuffer->addBytes(ptr, 4, Bytes);
> +          break;
> +        }
> +        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
> +          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
> +          aggBuffer->addSymbol(v);
> +          aggBuffer->addZeros(4);
> +          break;
> +        }
> +      }
> +      assert(0 && "unsupported integer const type");
> +    } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
> +      if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
> +        long long int64 =(long long)(constInt->getZExtValue());
> +        ptr = (unsigned char*)&int64;
> +        aggBuffer->addBytes(ptr, 8, Bytes);
> +        break;
> +      }
> +      else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
> +        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
> +            ConstantFoldConstantExpression(Cexpr, TD))) {
> +          long long int64 =(long long)(constInt->getZExtValue());
> +          ptr = (unsigned char*)&int64;
> +          aggBuffer->addBytes(ptr, 8, Bytes);
> +          break;
> +        }
> +        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
> +          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
> +          aggBuffer->addSymbol(v);
> +          aggBuffer->addZeros(8);
> +          break;
> +        }
> +      }
> +      llvm_unreachable("unsupported integer const type");
> +    }
> +    else
> +      llvm_unreachable("unsupported integer const type");
> +    break;
> +  }
> +  case Type::FloatTyID:
> +  case Type::DoubleTyID: {
> +    ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
> +    const Type* Ty = CFP->getType();
> +    if (Ty == Type::getFloatTy(CPV->getContext())) {
> +      float float32 = (float)CFP->getValueAPF().convertToFloat();
> +      ptr = (unsigned char*)&float32;
> +      aggBuffer->addBytes(ptr, 4, Bytes);
> +    } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
> +      double float64 = CFP->getValueAPF().convertToDouble();
> +      ptr = (unsigned char*)&float64;
> +      aggBuffer->addBytes(ptr, 8, Bytes);
> +    }
> +    else {
> +      llvm_unreachable("unsupported fp const type");
> +    }
> +    break;
> +  }
> +  case Type::PointerTyID: {
> +    if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
> +      aggBuffer->addSymbol(GVar);
> +    }
> +    else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
> +      Value *v = Cexpr->stripPointerCasts();
> +      aggBuffer->addSymbol(v);
> +    }
> +    unsigned int s = TD->getTypeAllocSize(CPV->getType());
> +    aggBuffer->addZeros(s);
> +    break;
> +  }
> +
> +  case Type::ArrayTyID:
> +  case Type::VectorTyID:
> +  case Type::StructTyID: {
> +    if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
> +        isa<ConstantStruct>(CPV)) {
> +      int ElementSize = TD->getTypeAllocSize(CPV->getType());
> +      bufferAggregateConstant(CPV, aggBuffer);
> +      if ( Bytes > ElementSize )
> +        aggBuffer->addZeros(Bytes-ElementSize);
> +    }
> +    else if (isa<ConstantAggregateZero>(CPV))
> +      aggBuffer->addZeros(Bytes);
> +    else
> +      llvm_unreachable("Unexpected Constant type");
> +    break;
> +  }
> +
> +  default:
> +    llvm_unreachable("unsupported type");
> +  }
> +}
> +
> +void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
> +                                              AggBuffer *aggBuffer) {
> +  const TargetData *TD = TM.getTargetData();
> +  int Bytes;
> +
> +  // Old constants
> +  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
> +    if (CPV->getNumOperands())
> +      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
> +        bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
> +    return;
> +  }
> +
> +  if (const ConstantDataSequential *CDS =
> +      dyn_cast<ConstantDataSequential>(CPV)) {
> +    if (CDS->getNumElements())
> +      for (unsigned i = 0; i < CDS->getNumElements(); ++i)
> +        bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
> +                     aggBuffer);
> +    return;
> +  }
> +
> +
> +  if (isa<ConstantStruct>(CPV)) {
> +    if (CPV->getNumOperands()) {
> +      StructType *ST = cast<StructType>(CPV->getType());
> +      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
> +        if ( i == (e - 1))
> +          Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
> +          TD->getTypeAllocSize(ST)
> +          - TD->getStructLayout(ST)->getElementOffset(i);
> +        else
> +          Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
> +          TD->getStructLayout(ST)->getElementOffset(i);
> +        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
> +                     aggBuffer);
> +      }
> +    }
> +    return;
> +  }
> +  assert(0 && "unsupported constant type in printAggregateConstant()");
> +}
> +
> +// buildTypeNameMap - Run through symbol table looking for type names.
> +//
> +
> +
> +bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
> +
> +  std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
> +
> +  if (PI != TypeNameMap.end() &&
> +      (!PI->second.compare("struct._image1d_t") ||
> +          !PI->second.compare("struct._image2d_t") ||
> +          !PI->second.compare("struct._image3d_t")))
> +    return true;
> +
> +  return false;
> +}
> +
> +/// PrintAsmOperand - Print out an operand for an inline asm expression.
> +///
> +bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> +                                      unsigned AsmVariant,
> +                                      const char *ExtraCode,
> +                                      raw_ostream &O) {
> +  if (ExtraCode && ExtraCode[0]) {
> +    if (ExtraCode[1] != 0) return true; // Unknown modifier.
> +
> +    switch (ExtraCode[0]) {
> +    default: return true;  // Unknown modifier.
> +    case 'r':
> +      break;
> +    }
> +  }
> +
> +  printOperand(MI, OpNo, O);
> +
> +  return false;
> +}
> +
> +bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
> +                                            unsigned OpNo,
> +                                            unsigned AsmVariant,
> +                                            const char *ExtraCode,
> +                                            raw_ostream &O) {
> +  if (ExtraCode && ExtraCode[0])
> +    return true;  // Unknown modifier
> +
> +  O << '[';
> +  printMemOperand(MI, OpNo, O);
> +  O << ']';
> +
> +  return false;
> +}
> +
> +bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
> +{
> +  switch(MI.getOpcode()) {
> +  default:
> +    return false;
> +  case NVPTX::CallArgBeginInst:  case NVPTX::CallArgEndInst0:
> +  case NVPTX::CallArgEndInst1:  case NVPTX::CallArgF32:
> +  case NVPTX::CallArgF64:  case NVPTX::CallArgI16:
> +  case NVPTX::CallArgI32:  case NVPTX::CallArgI32imm:
> +  case NVPTX::CallArgI64:  case NVPTX::CallArgI8:
> +  case NVPTX::CallArgParam:  case NVPTX::CallVoidInst:
> +  case NVPTX::CallVoidInstReg:  case NVPTX::Callseq_End:
> +  case NVPTX::CallVoidInstReg64:
> +  case NVPTX::DeclareParamInst:  case NVPTX::DeclareRetMemInst:
> +  case NVPTX::DeclareRetRegInst:  case NVPTX::DeclareRetScalarInst:
> +  case NVPTX::DeclareScalarParamInst:  case NVPTX::DeclareScalarRegInst:
> +  case NVPTX::StoreParamF32:  case NVPTX::StoreParamF64:
> +  case NVPTX::StoreParamI16:  case NVPTX::StoreParamI32:
> +  case NVPTX::StoreParamI64:  case NVPTX::StoreParamI8:
> +  case NVPTX::StoreParamS32I8:  case NVPTX::StoreParamU32I8:
> +  case NVPTX::StoreParamS32I16:  case NVPTX::StoreParamU32I16:
> +  case NVPTX::StoreParamScalar2F32:  case NVPTX::StoreParamScalar2F64:
> +  case NVPTX::StoreParamScalar2I16:  case NVPTX::StoreParamScalar2I32:
> +  case NVPTX::StoreParamScalar2I64:  case NVPTX::StoreParamScalar2I8:
> +  case NVPTX::StoreParamScalar4F32:  case NVPTX::StoreParamScalar4I16:
> +  case NVPTX::StoreParamScalar4I32:  case NVPTX::StoreParamScalar4I8:
> +  case NVPTX::StoreParamV2F32:  case NVPTX::StoreParamV2F64:
> +  case NVPTX::StoreParamV2I16:  case NVPTX::StoreParamV2I32:
> +  case NVPTX::StoreParamV2I64:  case NVPTX::StoreParamV2I8:
> +  case NVPTX::StoreParamV4F32:  case NVPTX::StoreParamV4I16:
> +  case NVPTX::StoreParamV4I32:  case NVPTX::StoreParamV4I8:
> +  case NVPTX::StoreRetvalF32:  case NVPTX::StoreRetvalF64:
> +  case NVPTX::StoreRetvalI16:  case NVPTX::StoreRetvalI32:
> +  case NVPTX::StoreRetvalI64:  case NVPTX::StoreRetvalI8:
> +  case NVPTX::StoreRetvalScalar2F32:  case NVPTX::StoreRetvalScalar2F64:
> +  case NVPTX::StoreRetvalScalar2I16:  case NVPTX::StoreRetvalScalar2I32:
> +  case NVPTX::StoreRetvalScalar2I64:  case NVPTX::StoreRetvalScalar2I8:
> +  case NVPTX::StoreRetvalScalar4F32:  case NVPTX::StoreRetvalScalar4I16:
> +  case NVPTX::StoreRetvalScalar4I32:  case NVPTX::StoreRetvalScalar4I8:
> +  case NVPTX::StoreRetvalV2F32:  case NVPTX::StoreRetvalV2F64:
> +  case NVPTX::StoreRetvalV2I16:  case NVPTX::StoreRetvalV2I32:
> +  case NVPTX::StoreRetvalV2I64:  case NVPTX::StoreRetvalV2I8:
> +  case NVPTX::StoreRetvalV4F32:  case NVPTX::StoreRetvalV4I16:
> +  case NVPTX::StoreRetvalV4I32:  case NVPTX::StoreRetvalV4I8:
> +  case NVPTX::LastCallArgF32:  case NVPTX::LastCallArgF64:
> +  case NVPTX::LastCallArgI16:  case NVPTX::LastCallArgI32:
> +  case NVPTX::LastCallArgI32imm:  case NVPTX::LastCallArgI64:
> +  case NVPTX::LastCallArgI8:  case NVPTX::LastCallArgParam:
> +  case NVPTX::LoadParamMemF32:  case NVPTX::LoadParamMemF64:
> +  case NVPTX::LoadParamMemI16:  case NVPTX::LoadParamMemI32:
> +  case NVPTX::LoadParamMemI64:  case NVPTX::LoadParamMemI8:
> +  case NVPTX::LoadParamRegF32:  case NVPTX::LoadParamRegF64:
> +  case NVPTX::LoadParamRegI16:  case NVPTX::LoadParamRegI32:
> +  case NVPTX::LoadParamRegI64:  case NVPTX::LoadParamRegI8:
> +  case NVPTX::LoadParamScalar2F32:  case NVPTX::LoadParamScalar2F64:
> +  case NVPTX::LoadParamScalar2I16:  case NVPTX::LoadParamScalar2I32:
> +  case NVPTX::LoadParamScalar2I64:  case NVPTX::LoadParamScalar2I8:
> +  case NVPTX::LoadParamScalar4F32:  case NVPTX::LoadParamScalar4I16:
> +  case NVPTX::LoadParamScalar4I32:  case NVPTX::LoadParamScalar4I8:
> +  case NVPTX::LoadParamV2F32:  case NVPTX::LoadParamV2F64:
> +  case NVPTX::LoadParamV2I16:  case NVPTX::LoadParamV2I32:
> +  case NVPTX::LoadParamV2I64:  case NVPTX::LoadParamV2I8:
> +  case NVPTX::LoadParamV4F32:  case NVPTX::LoadParamV4I16:
> +  case NVPTX::LoadParamV4I32:  case NVPTX::LoadParamV4I8:
> +  case NVPTX::PrototypeInst:   case NVPTX::DBG_VALUE:
> +    return true;
> +  }
> +  return false;
> +}
> +
> +// Force static initialization.
> +extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
> +  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
> +  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
> +}
> +
> +
> +void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
> +  std::stringstream temp;
> +  LineReader * reader = this->getReader(filename.str());
> +  temp << "\n//";
> +  temp << filename.str();
> +  temp << ":";
> +  temp << line;
> +  temp << " ";
> +  temp << reader->readLine(line);
> +  temp << "\n";
> +  this->OutStreamer.EmitRawText(Twine(temp.str()));
> +}
> +
> +
> +LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
> +  if (reader == NULL)  {
> +    reader =  new LineReader(filename);
> +  }
> +
> +  if (reader->fileName() != filename) {
> +    delete reader;
> +    reader =  new LineReader(filename);
> +  }
> +
> +  return reader;
> +}
> +
> +
> +std::string
> +LineReader::readLine(unsigned lineNum) {
> +  if (lineNum < theCurLine) {
> +    theCurLine = 0;
> +    fstr.seekg(0,std::ios::beg);
> +  }
> +  while (theCurLine < lineNum) {
> +    fstr.getline(buff,500);
> +    theCurLine++;
> +  }
> +  return buff;
> +}
> +
> +// Force static initialization.
> +extern "C" void LLVMInitializeNVPTXAsmPrinter() {
> +  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
> +  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,318 @@
> +//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains a printer that converts from our internal representation
> +// of machine-dependent LLVM code to NVPTX assembly language.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXASMPRINTER_H
> +#define NVPTXASMPRINTER_H
> +
> +#include "NVPTX.h"
> +#include "NVPTXTargetMachine.h"
> +#include "NVPTXSubtarget.h"
> +#include "llvm/Function.h"
> +#include "llvm/CodeGen/AsmPrinter.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/MC/MCAsmInfo.h"
> +#include "llvm/MC/MCExpr.h"
> +#include "llvm/Target/TargetMachine.h"
> +#include "llvm/MC/MCSymbol.h"
> +#include "llvm/Support/FormattedStream.h"
> +#include "llvm/Target/Mangler.h"
> +#include "llvm/ADT/SmallString.h"
> +#include "llvm/ADT/StringExtras.h"
> +#include <fstream>
> +
> +// The ptx syntax and format is very different from that usually seem in a .s
> +// file,
> +// therefore we are not able to use the MCAsmStreamer interface here.
> +//
> +// We are handcrafting the output method here.
> +//
> +// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
> +// (subclass of MCStreamer).
> +
> +// This is defined in AsmPrinter.cpp.
> +// Used to process the constant expressions in initializers.
> +namespace nvptx {
> +const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
> +                                  llvm::AsmPrinter &AP) ;
> +}
> +
> +namespace llvm {
> +
> +class LineReader {
> +private:
> +  unsigned theCurLine ;
> +  std::ifstream fstr;
> +  char buff[512];
> +  std::string theFileName;
> +  SmallVector<unsigned, 32> lineOffset;
> +public:
> +  LineReader(std::string filename) {
> +    theCurLine = 0;
> +    fstr.open(filename.c_str());
> +    theFileName = filename;
> +  }
> +  std::string fileName() { return theFileName; }
> +  ~LineReader() {
> +    fstr.close();
> +  }
> +  std::string readLine(unsigned line);
> +};
> +
> +
> +
> +class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
> +
> +
> +  class AggBuffer {
> +    // Used to buffer the emitted string for initializing global
> +    // aggregates.
> +    //
> +    // Normally an aggregate (array, vector or structure) is emitted
> +    // as a u8[]. However, if one element/field of the aggregate
> +    // is a non-NULL address, then the aggregate is emitted as u32[]
> +    // or u64[].
> +    //
> +    // We first layout the aggregate in 'buffer' in bytes, except for
> +    // those symbol addresses. For the i-th symbol address in the
> +    //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
> +    // are filled with 0s. symbolPosInBuffer[i-1] records its position
> +    // in 'buffer', and Symbols[i-1] records the Value*.
> +    //
> +    // Once we have this AggBuffer setup, we can choose how to print
> +    // it out.
> +  public:
> +    unsigned size;   // size of the buffer in bytes
> +    unsigned char *buffer; // the buffer
> +    unsigned numSymbols;   // number of symbol addresses
> +    SmallVector<unsigned, 4> symbolPosInBuffer;
> +    SmallVector<Value *, 4> Symbols;
> +
> +  private:
> +    unsigned curpos;
> +    raw_ostream &O;
> +    NVPTXAsmPrinter &AP;
> +
> +  public:
> +    AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
> +    :O(_O),AP(_AP) {
> +      buffer = new unsigned char[_size];
> +      size = _size;
> +      curpos = 0;
> +      numSymbols = 0;
> +    }
> +    ~AggBuffer() {
> +      delete [] buffer;
> +    }
> +    unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
> +      assert((curpos+Num) <= size);
> +      assert((curpos+Bytes) <= size);
> +      for ( int i= 0; i < Num; ++i) {
> +        buffer[curpos] = Ptr[i];
> +        curpos ++;
> +      }
> +      for ( int i=Num; i < Bytes ; ++i) {
> +        buffer[curpos] = 0;
> +        curpos ++;
> +      }
> +      return curpos;
> +    }
> +    unsigned addZeros(int Num) {
> +      assert((curpos+Num) <= size);
> +      for ( int i= 0; i < Num; ++i) {
> +        buffer[curpos] = 0;
> +        curpos ++;
> +      }
> +      return curpos;
> +    }
> +    void addSymbol(Value *GVar) {
> +      symbolPosInBuffer.push_back(curpos);
> +      Symbols.push_back(GVar);
> +      numSymbols++;
> +    }
> +    void print() {
> +      if (numSymbols == 0) {
> +        // print out in bytes
> +        for (unsigned i=0; i<size; i++) {
> +          if (i)
> +            O << ", ";
> +          O << (unsigned int)buffer[i];
> +        }
> +      }
> +      else {
> +        // print out in 4-bytes or 8-bytes
> +        unsigned int pos = 0;
> +        unsigned int nSym = 0;
> +        unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
> +        unsigned int nBytes = 4;
> +        if (AP.nvptxSubtarget.is64Bit())
> +          nBytes = 8;
> +        for (pos=0; pos<size; pos+=nBytes) {
> +          if (pos)
> +            O << ", ";
> +          if (pos == nextSymbolPos) {
> +            Value *v = Symbols[nSym];
> +            if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
> +              MCSymbol *Name = AP.Mang->getSymbol(GVar);
> +              O << *Name;
> +            }
> +            else if (ConstantExpr *Cexpr =
> +                dyn_cast<ConstantExpr>(v)) {
> +              O << *nvptx::LowerConstant(Cexpr, AP);
> +            }
> +            else
> +              assert(0 && "symbol type unknown");
> +            nSym++;
> +            if (nSym >= numSymbols)
> +              nextSymbolPos = size+1;
> +            else
> +              nextSymbolPos = symbolPosInBuffer[nSym];
> +          }
> +          else
> +            if (nBytes == 4)
> +              O << *(unsigned int*)(buffer+pos);
> +            else
> +              O << *(unsigned long long*)(buffer+pos);
> +        }
> +      }
> +    }
> +  };
> +
> +  friend class AggBuffer;
> +
> +  virtual void emitSrcInText(StringRef filename, unsigned line);
> +
> +private :
> +  virtual const char *getPassName() const {
> +    return "NVPTX Assembly Printer";
> +  }
> +
> +  const Function *F;
> +  std::string CurrentFnName;
> +
> +  void EmitFunctionEntryLabel();
> +  void EmitFunctionBodyStart();
> +  void EmitFunctionBodyEnd();
> +
> +  void EmitInstruction(const MachineInstr *);
> +
> +  void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
> +
> +  void printGlobalVariable(const GlobalVariable *GVar);
> +  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
> +                    const char *Modifier=0);
> +  void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
> +                     const char *Modifier=0);
> +  void printVecModifiedImmediate(const MachineOperand &MO,
> +                                 const char *Modifier, raw_ostream &O);
> +  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
> +                       const char *Modifier=0);
> +  void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
> +  // definition autogenerated.
> +  void printInstruction(const MachineInstr *MI, raw_ostream &O);
> +  void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
> +                          bool=false);
> +  void printParamName(int paramIndex, raw_ostream &O);
> +  void printParamName(Function::const_arg_iterator I, int paramIndex,
> +                      raw_ostream &O);
> +  void emitHeader(Module &M, raw_ostream &O);
> +  void emitKernelFunctionDirectives(const Function& F,
> +                                    raw_ostream &O) const;
> +  void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
> +  void emitFunctionExternParamList(const MachineFunction &MF);
> +  void emitFunctionParamList(const Function *, raw_ostream &O);
> +  void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
> +  void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
> +  void emitFunctionTempData(const MachineFunction &MF,
> +                            unsigned &FrameSize);
> +  bool isImageType(const Type *Ty);
> +  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
> +                       unsigned AsmVariant, const char *ExtraCode,
> +                       raw_ostream &);
> +  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
> +                             unsigned AsmVariant, const char *ExtraCode,
> +                             raw_ostream &);
> +  void printReturnValStr(const Function *, raw_ostream &O);
> +  void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
> +
> +protected:
> +  bool doInitialization(Module &M);
> +  bool doFinalization(Module &M);
> +
> +private:
> +  std::string CurrentBankselLabelInBasicBlock;
> +
> +  // This is specific per MachineFunction.
> +  const MachineRegisterInfo *MRI;
> +  // The contents are specific for each
> +  // MachineFunction. But the size of the
> +  // array is not.
> +  std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
> +  // cache the subtarget here.
> +  const NVPTXSubtarget &nvptxSubtarget;
> +  // Build the map between type name and ID based on module's type
> +  // symbol table.
> +  std::map<const Type *, std::string> TypeNameMap;
> +
> +  // List of variables demoted to a function scope.
> +  std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
> +
> +  // To record filename to ID mapping
> +  std::map<std::string, unsigned> filenameMap;
> +  void recordAndEmitFilenames(Module &);
> +
> +  void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
> +  void emitPTXAddressSpace(unsigned int AddressSpace,
> +                           raw_ostream &O) const;
> +  std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
> +  void printScalarConstant(Constant *CPV, raw_ostream &O) ;
> +  void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
> +  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
> +  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
> +
> +  void printOperandProper(const MachineOperand &MO);
> +
> +  void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
> +  void emitDeclarations(Module &, raw_ostream &O);
> +  void emitDeclaration(const Function *, raw_ostream &O);
> +
> +  static const char *getRegisterName(unsigned RegNo);
> +  void emitDemotedVars(const Function *, raw_ostream &);
> +
> +  LineReader *reader;
> +  LineReader *getReader(std::string);
> +public:
> +  NVPTXAsmPrinter(TargetMachine &TM,
> +                  MCStreamer &Streamer)
> +  : AsmPrinter(TM, Streamer),
> +    nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
> +    CurrentBankselLabelInBasicBlock = "";
> +    VRidGlobal2LocalMap = NULL;
> +    reader = NULL;
> +  }
> +
> +  ~NVPTXAsmPrinter() {
> +    if (!reader)
> +      delete reader;
> +  }
> +
> +  bool ignoreLoc(const MachineInstr &);
> +
> +  virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
> +
> +  DebugLoc prevDebugLoc;
> +  void emitLineNumberAsDotLoc(const MachineInstr &);
> +};
> +} // end of namespace
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,76 @@
> +//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the NVPTX implementation of TargetFrameLowering class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTXFrameLowering.h"
> +#include "NVPTX.h"
> +#include "NVPTXRegisterInfo.h"
> +#include "NVPTXSubtarget.h"
> +#include "NVPTXTargetMachine.h"
> +#include "llvm/ADT/BitVector.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/MC/MachineLocation.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +
> +using namespace llvm;
> +
> +bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
> +  return true;
> +}
> +
> +void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
> +  if (MF.getFrameInfo()->hasStackObjects()) {
> +    MachineBasicBlock &MBB = MF.front();
> +    // Insert "mov.u32 %SP, %Depot"
> +    MachineBasicBlock::iterator MBBI = MBB.begin();
> +    // This instruction really occurs before first instruction
> +    // in the BB, so giving it no debug location.
> +    DebugLoc dl = DebugLoc();
> +
> +    if (tm.getSubtargetImpl()->hasGenericLdSt()) {
> +      // mov %SPL, %depot;
> +      // cvta.local %SP, %SPL;
> +      if (is64bit) {
> +        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
> +                               tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
> +                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
> +        BuildMI(MBB, MI, dl,
> +                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
> +        .addReg(NVPTX::VRDepot);
> +      } else {
> +        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
> +                                  tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
> +                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
> +        BuildMI(MBB, MI, dl,
> +                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
> +        .addReg(NVPTX::VRDepot);
> +      }
> +    }
> +    else {
> +      // mov %SP, %depot;
> +      if (is64bit)
> +        BuildMI(MBB, MBBI, dl,
> +                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
> +                .addReg(NVPTX::VRDepot);
> +      else
> +        BuildMI(MBB, MBBI, dl,
> +                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
> +                .addReg(NVPTX::VRDepot);
> +    }
> +  }
> +}
> +
> +void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
> +                                      MachineBasicBlock &MBB) const {
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXFrameLowering.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,40 @@
> +//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +//
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTX_FRAMELOWERING_H
> +#define NVPTX_FRAMELOWERING_H
> +
> +#include "llvm/Target/TargetFrameLowering.h"
> +
> +
> +namespace llvm {
> +class NVPTXTargetMachine;
> +
> +class NVPTXFrameLowering : public TargetFrameLowering {
> +  NVPTXTargetMachine &tm;
> +  bool is64bit;
> +
> +public:
> +  explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
> +  : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
> +    tm(_tm), is64bit(_is64bit) {}
> +
> +  virtual bool hasFP(const MachineFunction &MF) const;
> +  virtual void emitPrologue(MachineFunction &MF) const;
> +  virtual void emitEpilogue(MachineFunction &MF,
> +                            MachineBasicBlock &MBB) const;
> +};
> +
> +} // End llvm namespace
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,681 @@
> +//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines an instruction selector for the NVPTX target.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +
> +#include "llvm/Instructions.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "NVPTXISelDAGToDAG.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/Target/TargetIntrinsicInfo.h"
> +#include "llvm/GlobalValue.h"
> +
> +#undef DEBUG_TYPE
> +#define DEBUG_TYPE "nvptx-isel"
> +
> +using namespace llvm;
> +
> +
> +static cl::opt<bool>
> +UseFMADInstruction("nvptx-mad-enable",
> +                   cl::ZeroOrMore,
> +                cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
> +                   cl::init(false));
> +
> +static cl::opt<int>
> +FMAContractLevel("nvptx-fma-level",
> +                 cl::ZeroOrMore,
> +                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
> +                     " 1: do it  2: do it aggressively"),
> +                     cl::init(2));
> +
> +
> +static cl::opt<int>
> +UsePrecDivF32("nvptx-prec-divf32",
> +              cl::ZeroOrMore,
> +             cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
> +                  " IEEE Compliant F32 div.rnd if avaiable."),
> +                  cl::init(2));
> +
> +/// createNVPTXISelDag - This pass converts a legalized DAG into a
> +/// NVPTX-specific DAG, ready for instruction scheduling.
> +FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
> +                                       llvm::CodeGenOpt::Level OptLevel) {
> +  return new NVPTXDAGToDAGISel(TM, OptLevel);
> +}
> +
> +
> +NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
> +                                     CodeGenOpt::Level OptLevel)
> +: SelectionDAGISel(tm, OptLevel),
> +  Subtarget(tm.getSubtarget<NVPTXSubtarget>())
> +{
> +  // Always do fma.f32 fpcontract if the target supports the instruction.
> +  // Always do fma.f64 fpcontract if the target supports the instruction.
> +  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
> +  // support fma.f32.
> +
> +  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
> +  doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
> +      (FMAContractLevel>=1);
> +  doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
> +      (FMAContractLevel>=1);
> +  doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
> +      (FMAContractLevel==2);
> +  doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
> +      (FMAContractLevel==2);
> +
> +  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
> +
> +  doMulWide = (OptLevel > 0);
> +
> +  // Decide how to translate f32 div
> +  do_DIVF32_PREC = UsePrecDivF32;
> +  // sm less than sm_20 does not support div.rnd. Use div.full.
> +  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
> +    do_DIVF32_PREC = 1;
> +
> +}
> +
> +/// Select - Select instructions not customized! Used for
> +/// expanded, promoted and normal instructions.
> +SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
> +
> +  if (N->isMachineOpcode())
> +    return NULL;   // Already selected.
> +
> +  SDNode *ResNode = NULL;
> +  switch (N->getOpcode()) {
> +  case ISD::LOAD:
> +    ResNode = SelectLoad(N);
> +    break;
> +  case ISD::STORE:
> +    ResNode = SelectStore(N);
> +    break;
> +  }
> +  if (ResNode)
> +    return ResNode;
> +  return SelectCode(N);
> +}
> +
> +
> +static unsigned int
> +getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
> +{
> +  const Value *Src = N->getSrcValue();
> +  if (!Src)
> +    return NVPTX::PTXLdStInstCode::LOCAL;
> +
> +  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
> +    switch (PT->getAddressSpace()) {
> +    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
> +    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
> +    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
> +    case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
> +      return NVPTX::PTXLdStInstCode::CONSTANT;
> +    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
> +    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
> +    case llvm::ADDRESS_SPACE_CONST:
> +      // If the arch supports generic address space, translate it to GLOBAL
> +      // for correctness.
> +      // If the arch does not support generic address space, then the arch
> +      // does not really support ADDRESS_SPACE_CONST, translate it to
> +      // to CONSTANT for better performance.
> +      if (Subtarget.hasGenericLdSt())
> +        return NVPTX::PTXLdStInstCode::GLOBAL;
> +      else
> +        return NVPTX::PTXLdStInstCode::CONSTANT;
> +    default: break;
> +    }
> +  }
> +  return NVPTX::PTXLdStInstCode::LOCAL;
> +}
> +
> +
> +SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
> +  DebugLoc dl = N->getDebugLoc();
> +  LoadSDNode *LD = cast<LoadSDNode>(N);
> +  EVT LoadedVT = LD->getMemoryVT();
> +  SDNode *NVPTXLD= NULL;
> +
> +  // do not support pre/post inc/dec
> +  if (LD->isIndexed())
> +    return NULL;
> +
> +  if (!LoadedVT.isSimple())
> +    return NULL;
> +
> +  // Address Space Setting
> +  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
> +
> +  // Volatile Setting
> +  // - .volatile is only availalble for .global and .shared
> +  bool isVolatile = LD->isVolatile();
> +  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
> +      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
> +      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
> +    isVolatile = false;
> +
> +  // Vector Setting
> +  MVT SimpleVT = LoadedVT.getSimpleVT();
> +  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
> +  if (SimpleVT.isVector()) {
> +    unsigned num = SimpleVT.getVectorNumElements();
> +    if (num == 2)
> +      vecType = NVPTX::PTXLdStInstCode::V2;
> +    else if (num == 4)
> +      vecType = NVPTX::PTXLdStInstCode::V4;
> +    else
> +      return NULL;
> +  }
> +
> +  // Type Setting: fromType + fromTypeWidth
> +  //
> +  // Sign   : ISD::SEXTLOAD
> +  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
> +  //          type is integer
> +  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
> +  MVT ScalarVT = SimpleVT.getScalarType();
> +  unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
> +  unsigned int fromType;
> +  if ((LD->getExtensionType() == ISD::SEXTLOAD))
> +    fromType = NVPTX::PTXLdStInstCode::Signed;
> +  else if (ScalarVT.isFloatingPoint())
> +    fromType = NVPTX::PTXLdStInstCode::Float;
> +  else
> +    fromType = NVPTX::PTXLdStInstCode::Unsigned;
> +
> +  // Create the machine instruction DAG
> +  SDValue Chain = N->getOperand(0);
> +  SDValue N1 = N->getOperand(1);
> +  SDValue Addr;
> +  SDValue Offset, Base;
> +  unsigned Opcode;
> +  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
> +
> +  if (SelectDirectAddr(N1, Addr)) {
> +    switch (TargetVT) {
> +    case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
> +    case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
> +    case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
> +    case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
> +    case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
> +    case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
> +    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_avar; break;
> +    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
> +    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
> +    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
> +    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
> +    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
> +    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_avar; break;
> +    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
> +    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
> +    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(fromType),
> +                      getI32Imm(fromTypeWidth),
> +                      Addr, Chain };
> +    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
> +                                     MVT::Other, Ops, 7);
> +  } else if (Subtarget.is64Bit()?
> +      SelectADDRsi64(N1.getNode(), N1, Base, Offset):
> +      SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
> +    switch (TargetVT) {
> +    case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
> +    case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
> +    case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
> +    case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
> +    case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
> +    case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
> +    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_asi; break;
> +    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
> +    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
> +    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
> +    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
> +    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
> +    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_asi; break;
> +    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
> +    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
> +    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(fromType),
> +                      getI32Imm(fromTypeWidth),
> +                      Base, Offset, Chain };
> +    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
> +                                     MVT::Other, Ops, 8);
> +  } else if (Subtarget.is64Bit()?
> +      SelectADDRri64(N1.getNode(), N1, Base, Offset):
> +      SelectADDRri(N1.getNode(), N1, Base, Offset)) {
> +    switch (TargetVT) {
> +    case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
> +    case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
> +    case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
> +    case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
> +    case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
> +    case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
> +    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_ari; break;
> +    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
> +    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
> +    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
> +    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
> +    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
> +    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_ari; break;
> +    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
> +    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
> +    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(fromType),
> +                      getI32Imm(fromTypeWidth),
> +                      Base, Offset, Chain };
> +    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
> +                                     MVT::Other, Ops, 8);
> +  }
> +  else {
> +    switch (TargetVT) {
> +    case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
> +    case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
> +    case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
> +    case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
> +    case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
> +    case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
> +    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_areg; break;
> +    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
> +    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
> +    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
> +    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
> +    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
> +    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_areg; break;
> +    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
> +    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
> +    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(fromType),
> +                      getI32Imm(fromTypeWidth),
> +                      N1, Chain };
> +    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
> +                                     MVT::Other, Ops, 7);
> +  }
> +
> +  if (NVPTXLD != NULL) {
> +    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
> +    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
> +    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
> +  }
> +
> +  return NVPTXLD;
> +}
> +
> +SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
> +  DebugLoc dl = N->getDebugLoc();
> +  StoreSDNode *ST = cast<StoreSDNode>(N);
> +  EVT StoreVT = ST->getMemoryVT();
> +  SDNode *NVPTXST = NULL;
> +
> +  // do not support pre/post inc/dec
> +  if (ST->isIndexed())
> +    return NULL;
> +
> +  if (!StoreVT.isSimple())
> +    return NULL;
> +
> +  // Address Space Setting
> +  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
> +
> +  // Volatile Setting
> +  // - .volatile is only availalble for .global and .shared
> +  bool isVolatile = ST->isVolatile();
> +  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
> +      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
> +      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
> +    isVolatile = false;
> +
> +  // Vector Setting
> +  MVT SimpleVT = StoreVT.getSimpleVT();
> +  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
> +  if (SimpleVT.isVector()) {
> +    unsigned num = SimpleVT.getVectorNumElements();
> +    if (num == 2)
> +      vecType = NVPTX::PTXLdStInstCode::V2;
> +    else if (num == 4)
> +      vecType = NVPTX::PTXLdStInstCode::V4;
> +    else
> +      return NULL;
> +  }
> +
> +  // Type Setting: toType + toTypeWidth
> +  // - for integer type, always use 'u'
> +  //
> +  MVT ScalarVT = SimpleVT.getScalarType();
> +  unsigned toTypeWidth =  ScalarVT.getSizeInBits();
> +  unsigned int toType;
> +  if (ScalarVT.isFloatingPoint())
> +    toType = NVPTX::PTXLdStInstCode::Float;
> +  else
> +    toType = NVPTX::PTXLdStInstCode::Unsigned;
> +
> +  // Create the machine instruction DAG
> +  SDValue Chain = N->getOperand(0);
> +  SDValue N1 = N->getOperand(1);
> +  SDValue N2 = N->getOperand(2);
> +  SDValue Addr;
> +  SDValue Offset, Base;
> +  unsigned Opcode;
> +  MVT::SimpleValueType SourceVT =
> +      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
> +
> +  if (SelectDirectAddr(N2, Addr)) {
> +    switch (SourceVT) {
> +    case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
> +    case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
> +    case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
> +    case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
> +    case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
> +    case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
> +    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_avar; break;
> +    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
> +    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
> +    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
> +    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
> +    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
> +    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_avar; break;
> +    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
> +    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
> +    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { N1,
> +                      getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(toType),
> +                      getI32Imm(toTypeWidth),
> +                      Addr, Chain };
> +    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
> +                                     MVT::Other, Ops, 8);
> +  } else if (Subtarget.is64Bit()?
> +      SelectADDRsi64(N2.getNode(), N2, Base, Offset):
> +      SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
> +    switch (SourceVT) {
> +    case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
> +    case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
> +    case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
> +    case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
> +    case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
> +    case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
> +    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_asi; break;
> +    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
> +    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
> +    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
> +    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
> +    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
> +    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_asi; break;
> +    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
> +    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
> +    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { N1,
> +                      getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(toType),
> +                      getI32Imm(toTypeWidth),
> +                      Base, Offset, Chain };
> +    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
> +                                     MVT::Other, Ops, 9);
> +  } else if (Subtarget.is64Bit()?
> +      SelectADDRri64(N2.getNode(), N2, Base, Offset):
> +      SelectADDRri(N2.getNode(), N2, Base, Offset)) {
> +    switch (SourceVT) {
> +    case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
> +    case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
> +    case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
> +    case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
> +    case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
> +    case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
> +    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_ari; break;
> +    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
> +    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
> +    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
> +    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
> +    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
> +    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_ari; break;
> +    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
> +    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
> +    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { N1,
> +                      getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(toType),
> +                      getI32Imm(toTypeWidth),
> +                      Base, Offset, Chain };
> +    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
> +                                     MVT::Other, Ops, 9);
> +  } else {
> +    switch (SourceVT) {
> +    case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
> +    case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
> +    case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
> +    case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
> +    case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
> +    case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
> +    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_areg; break;
> +    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
> +    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
> +    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
> +    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
> +    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
> +    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_areg; break;
> +    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
> +    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
> +    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
> +    default: return NULL;
> +    }
> +    SDValue Ops[] = { N1,
> +                      getI32Imm(isVolatile),
> +                      getI32Imm(codeAddrSpace),
> +                      getI32Imm(vecType),
> +                      getI32Imm(toType),
> +                      getI32Imm(toTypeWidth),
> +                      N2, Chain };
> +    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
> +                                     MVT::Other, Ops, 8);
> +  }
> +
> +  if (NVPTXST != NULL) {
> +    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
> +    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
> +    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
> +  }
> +
> +  return NVPTXST;
> +}
> +
> +// SelectDirectAddr - Match a direct address for DAG.
> +// A direct address could be a globaladdress or externalsymbol.
> +bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
> +  // Return true if TGA or ES.
> +  if (N.getOpcode() == ISD::TargetGlobalAddress
> +      || N.getOpcode() == ISD::TargetExternalSymbol) {
> +    Address = N;
> +    return true;
> +  }
> +  if (N.getOpcode() == NVPTXISD::Wrapper) {
> +    Address = N.getOperand(0);
> +    return true;
> +  }
> +  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
> +    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
> +    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
> +      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
> +        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
> +  }
> +  return false;
> +}
> +
> +// symbol+offset
> +bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
> +                                         SDValue &Base, SDValue &Offset,
> +                                         MVT mvt) {
> +  if (Addr.getOpcode() == ISD::ADD) {
> +    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
> +      SDValue base=Addr.getOperand(0);
> +      if (SelectDirectAddr(base, Base)) {
> +        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
> +        return true;
> +      }
> +    }
> +  }
> +  return false;
> +}
> +
> +// symbol+offset
> +bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
> +                                     SDValue &Base, SDValue &Offset) {
> +  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
> +}
> +
> +// symbol+offset
> +bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
> +                                       SDValue &Base, SDValue &Offset) {
> +  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
> +}
> +
> +// register+offset
> +bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
> +                                         SDValue &Base, SDValue &Offset,
> +                                         MVT mvt) {
> +  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
> +    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
> +    Offset = CurDAG->getTargetConstant(0, mvt);
> +    return true;
> +  }
> +  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
> +      Addr.getOpcode() == ISD::TargetGlobalAddress)
> +    return false;  // direct calls.
> +
> +  if (Addr.getOpcode() == ISD::ADD) {
> +    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
> +      return false;
> +    }
> +    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
> +      if (FrameIndexSDNode *FIN =
> +          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
> +        // Constant offset from frame ref.
> +        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
> +      else
> +        Base = Addr.getOperand(0);
> +      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
> +      return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +// register+offset
> +bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
> +                                     SDValue &Base, SDValue &Offset) {
> +  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
> +}
> +
> +// register+offset
> +bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
> +                                       SDValue &Base, SDValue &Offset) {
> +  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
> +}
> +
> +bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
> +                                                 unsigned int spN) const {
> +  const Value *Src = NULL;
> +  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
> +  // the classof() for MemSDNode does not include MemIntrinsicSDNode
> +  // (See SelectionDAGNodes.h). So we need to check for both.
> +  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
> +    Src = mN->getSrcValue();
> +  }
> +  else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
> +    Src = mN->getSrcValue();
> +  }
> +  if (!Src)
> +    return false;
> +  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
> +    return (PT->getAddressSpace() == spN);
> +  return false;
> +}
> +
> +/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
> +/// inline asm expressions.
> +bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
> +                                                     char ConstraintCode,
> +                                                 std::vector<SDValue> &OutOps) {
> +  SDValue Op0, Op1;
> +  switch (ConstraintCode) {
> +  default: return true;
> +  case 'm':   // memory
> +    if (SelectDirectAddr(Op, Op0)) {
> +      OutOps.push_back(Op0);
> +      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
> +      return false;
> +    }
> +    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
> +      OutOps.push_back(Op0);
> +      OutOps.push_back(Op1);
> +      return false;
> +    }
> +    break;
> +  }
> +  return true;
> +}
> +
> +// Return true if N is a undef or a constant.
> +// If N was undef, return a (i8imm 0) in Retval
> +// If N was imm, convert it to i8imm and return in Retval
> +// Note: The convert to i8imm is required, otherwise the
> +// pattern matcher inserts a bunch of IMOVi8rr to convert
> +// the imm to i8imm, and this causes instruction selection
> +// to fail.
> +bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
> +                                   SDValue &Retval) {
> +  if (!(N.getOpcode() == ISD::UNDEF) &&
> +      !(N.getOpcode() == ISD::Constant))
> +    return false;
> +
> +  if (N.getOpcode() == ISD::UNDEF)
> +    Retval = CurDAG->getTargetConstant(0, MVT::i8);
> +  else {
> +    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
> +    unsigned retval = cn->getZExtValue();
> +    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
> +  }
> +  return true;
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,105 @@
> +//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines an instruction selector for the NVPTX target.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#define DEBUG_TYPE "nvptx-isel"
> +
> +#include "NVPTX.h"
> +#include "NVPTXISelLowering.h"
> +#include "NVPTXRegisterInfo.h"
> +#include "NVPTXTargetMachine.h"
> +#include "llvm/CodeGen/SelectionDAGISel.h"
> +#include "llvm/Support/Compiler.h"
> +#include "llvm/Intrinsics.h"
> +using namespace llvm;
> +
> +namespace {
> +
> +class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
> +
> +  // If true, generate corresponding FPCONTRACT. This is
> +  // language dependent (i.e. CUDA and OpenCL works differently).
> +  bool doFMADF32;
> +  bool doFMAF64;
> +  bool doFMAF32;
> +  bool doFMAF64AGG;
> +  bool doFMAF32AGG;
> +  bool allowFMA;
> +
> +  // 0: use div.approx
> +  // 1: use div.full
> +  // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
> +  //    Otherwise, use div.full
> +  int do_DIVF32_PREC;
> +
> +  // If true, add .ftz to f32 instructions.
> +  // This is only meaningful for sm_20 and later, as the default
> +  // is not ftz.
> +  // For sm earlier than sm_20, f32 denorms are always ftz by the
> +  // hardware.
> +  // We always add the .ftz modifier regardless of the sm value
> +  // when Use32FTZ is true.
> +  bool UseF32FTZ;
> +
> +  // If true, generate mul.wide from sext and mul
> +  bool doMulWide;
> +
> +public:
> +  explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
> +                             CodeGenOpt::Level OptLevel);
> +
> +  // Pass Name
> +  virtual const char *getPassName() const {
> +    return "NVPTX DAG->DAG Pattern Instruction Selection";
> +  }
> +
> +  const NVPTXSubtarget &Subtarget;
> +
> +  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
> +                                            char ConstraintCode,
> +                                            std::vector<SDValue> &OutOps);
> +private:
> +  // Include the pieces autogenerated from the target description.
> +#include "NVPTXGenDAGISel.inc"
> +
> +  SDNode *Select(SDNode *N);
> +  SDNode* SelectLoad(SDNode *N);
> +  SDNode* SelectStore(SDNode *N);
> +
> +  inline SDValue getI32Imm(unsigned Imm) {
> +    return CurDAG->getTargetConstant(Imm, MVT::i32);
> +  }
> +
> +  // Match direct address complex pattern.
> +  bool SelectDirectAddr(SDValue N, SDValue &Address);
> +
> +  bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
> +                        SDValue &Offset, MVT mvt);
> +  bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
> +                    SDValue &Offset);
> +  bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
> +                      SDValue &Offset);
> +
> +  bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
> +                        SDValue &Offset, MVT mvt);
> +  bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
> +                    SDValue &Offset);
> +  bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
> +                      SDValue &Offset);
> +
> +
> +  bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
> +
> +  bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
> +
> +};
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,1294 @@
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines the interfaces that NVPTX uses to lower LLVM code into a
> +// selection DAG.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +
> +#include "NVPTX.h"
> +#include "NVPTXISelLowering.h"
> +#include "NVPTXTargetMachine.h"
> +#include "NVPTXTargetObjectFile.h"
> +#include "NVPTXUtilities.h"
> +#include "llvm/Intrinsics.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/DerivedTypes.h"
> +#include "llvm/GlobalValue.h"
> +#include "llvm/Module.h"
> +#include "llvm/Function.h"
> +#include "llvm/CodeGen/Analysis.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Support/CallSite.h"
> +#include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
> +#include "llvm/MC/MCSectionELF.h"
> +#include <sstream>
> +
> +#undef DEBUG_TYPE
> +#define DEBUG_TYPE "nvptx-lower"
> +
> +using namespace llvm;
> +
> +static unsigned int uniqueCallSite = 0;
> +
> +static cl::opt<bool>
> +RetainVectorOperands("nvptx-codegen-vectors",
> +     cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
> +                     cl::init(true));
> +
> +static cl::opt<bool>
> +sched4reg("nvptx-sched4reg",
> +          cl::desc("NVPTX Specific: schedule for register pressue"),
> +          cl::init(false));
> +
> +// NVPTXTargetLowering Constructor.
> +NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
> +: TargetLowering(TM, new NVPTXTargetObjectFile()),
> +  nvTM(&TM),
> +  nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
> +
> +  // always lower memset, memcpy, and memmove intrinsics to load/store
> +  // instructions, rather
> +  // then generating calls to memset, mempcy or memmove.
> +  maxStoresPerMemset = (unsigned)0xFFFFFFFF;
> +  maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
> +  maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
> +
> +  setBooleanContents(ZeroOrNegativeOneBooleanContent);
> +
> +  // Jump is Expensive. Don't create extra control flow for 'and', 'or'
> +  // condition branches.
> +  setJumpIsExpensive(true);
> +
> +  // By default, use the Source scheduling
> +  if (sched4reg)
> +    setSchedulingPreference(Sched::RegPressure);
> +  else
> +    setSchedulingPreference(Sched::Source);
> +
> +  addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
> +  addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
> +  addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
> +  addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
> +  addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
> +  addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
> +  addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
> +
> +  if (RetainVectorOperands) {
> +    addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
> +    addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
> +    addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
> +    addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
> +    addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
> +    addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
> +    addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
> +    addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
> +    addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
> +    addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
> +
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8   , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16  , Custom);
> +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8   , Custom);
> +
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8   , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16  , Custom);
> +    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8   , Custom);
> +  }
> +
> +  // Operations not directly supported by NVPTX.
> +  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
> +  setOperationAction(ISD::BR_CC,             MVT::Other, Expand);
> +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
> +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
> +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
> +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
> +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
> +
> +  if (nvptxSubtarget.hasROT64()) {
> +    setOperationAction(ISD::ROTL , MVT::i64, Legal);
> +    setOperationAction(ISD::ROTR , MVT::i64, Legal);
> +  }
> +  else {
> +    setOperationAction(ISD::ROTL , MVT::i64, Expand);
> +    setOperationAction(ISD::ROTR , MVT::i64, Expand);
> +  }
> +  if (nvptxSubtarget.hasROT32()) {
> +    setOperationAction(ISD::ROTL , MVT::i32, Legal);
> +    setOperationAction(ISD::ROTR , MVT::i32, Legal);
> +  }
> +  else {
> +    setOperationAction(ISD::ROTL , MVT::i32, Expand);
> +    setOperationAction(ISD::ROTR , MVT::i32, Expand);
> +  }
> +
> +  setOperationAction(ISD::ROTL , MVT::i16, Expand);
> +  setOperationAction(ISD::ROTR , MVT::i16, Expand);
> +  setOperationAction(ISD::ROTL , MVT::i8, Expand);
> +  setOperationAction(ISD::ROTR , MVT::i8, Expand);
> +  setOperationAction(ISD::BSWAP , MVT::i16, Expand);
> +  setOperationAction(ISD::BSWAP , MVT::i32, Expand);
> +  setOperationAction(ISD::BSWAP , MVT::i64, Expand);
> +
> +  // Indirect branch is not supported.
> +  // This also disables Jump Table creation.
> +  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
> +  setOperationAction(ISD::BRIND,             MVT::Other, Expand);
> +
> +  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
> +  setOperationAction(ISD::GlobalAddress   , MVT::i64  , Custom);
> +
> +  // We want to legalize constant related memmove and memcopy
> +  // intrinsics.
> +  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
> +
> +  // Turn FP extload into load/fextend
> +  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
> +  // Turn FP truncstore into trunc + store.
> +  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
> +
> +  // PTX does not support load / store predicate registers
> +  setOperationAction(ISD::LOAD, MVT::i1, Expand);
> +  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
> +  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
> +  setOperationAction(ISD::STORE, MVT::i1, Expand);
> +  setTruncStoreAction(MVT::i64, MVT::i1, Expand);
> +  setTruncStoreAction(MVT::i32, MVT::i1, Expand);
> +  setTruncStoreAction(MVT::i16, MVT::i1, Expand);
> +  setTruncStoreAction(MVT::i8, MVT::i1, Expand);
> +
> +  // This is legal in NVPTX
> +  setOperationAction(ISD::ConstantFP,         MVT::f64, Legal);
> +  setOperationAction(ISD::ConstantFP,         MVT::f32, Legal);
> +
> +  // TRAP can be lowered to PTX trap
> +  setOperationAction(ISD::TRAP,               MVT::Other, Legal);
> +
> +  // By default, CONCAT_VECTORS is implemented via store/load
> +  // through stack. It is slow and uses local memory. We need
> +  // to custom-lowering them.
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8   , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16  , Custom);
> +  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8   , Custom);
> +
> +  // Expand vector int to float and float to int conversions
> +  // - For SINT_TO_FP and UINT_TO_FP, the src type
> +  //   (Node->getOperand(0).getValueType())
> +  //   is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
> +  //   the dest type (Node->getValueType(0)) is used.
> +  //
> +  //   See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
> +  //   case, and
> +  //   SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
> +  //
> +  //   That is why v4i32 or v2i32 are used here.
> +  //
> +  //   The expansion for vectors happens in VectorLegalizer::LegalizeOp()
> +  //   (LegalizeVectorOps.cpp).
> +  setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
> +  setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
> +  setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
> +  setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
> +  setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
> +  setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
> +  setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
> +  setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
> +
> +  // Now deduce the information based on the above mentioned
> +  // actions
> +  computeRegisterProperties();
> +}
> +
> +
> +const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
> +  switch (Opcode) {
> +  default: return 0;
> +  case NVPTXISD::CALL:            return "NVPTXISD::CALL";
> +  case NVPTXISD::RET_FLAG:        return "NVPTXISD::RET_FLAG";
> +  case NVPTXISD::Wrapper:         return "NVPTXISD::Wrapper";
> +  case NVPTXISD::NVBuiltin:       return "NVPTXISD::NVBuiltin";
> +  case NVPTXISD::DeclareParam:    return "NVPTXISD::DeclareParam";
> +  case NVPTXISD::DeclareScalarParam:
> +    return "NVPTXISD::DeclareScalarParam";
> +  case NVPTXISD::DeclareRet:      return "NVPTXISD::DeclareRet";
> +  case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
> +  case NVPTXISD::PrintCall:       return "NVPTXISD::PrintCall";
> +  case NVPTXISD::LoadParam:       return "NVPTXISD::LoadParam";
> +  case NVPTXISD::StoreParam:      return "NVPTXISD::StoreParam";
> +  case NVPTXISD::StoreParamS32:   return "NVPTXISD::StoreParamS32";
> +  case NVPTXISD::StoreParamU32:   return "NVPTXISD::StoreParamU32";
> +  case NVPTXISD::MoveToParam:     return "NVPTXISD::MoveToParam";
> +  case NVPTXISD::CallArgBegin:    return "NVPTXISD::CallArgBegin";
> +  case NVPTXISD::CallArg:         return "NVPTXISD::CallArg";
> +  case NVPTXISD::LastCallArg:     return "NVPTXISD::LastCallArg";
> +  case NVPTXISD::CallArgEnd:      return "NVPTXISD::CallArgEnd";
> +  case NVPTXISD::CallVoid:        return "NVPTXISD::CallVoid";
> +  case NVPTXISD::CallVal:         return "NVPTXISD::CallVal";
> +  case NVPTXISD::CallSymbol:      return "NVPTXISD::CallSymbol";
> +  case NVPTXISD::Prototype:       return "NVPTXISD::Prototype";
> +  case NVPTXISD::MoveParam:       return "NVPTXISD::MoveParam";
> +  case NVPTXISD::MoveRetval:      return "NVPTXISD::MoveRetval";
> +  case NVPTXISD::MoveToRetval:    return "NVPTXISD::MoveToRetval";
> +  case NVPTXISD::StoreRetval:     return "NVPTXISD::StoreRetval";
> +  case NVPTXISD::PseudoUseParam:  return "NVPTXISD::PseudoUseParam";
> +  case NVPTXISD::RETURN:          return "NVPTXISD::RETURN";
> +  case NVPTXISD::CallSeqBegin:    return "NVPTXISD::CallSeqBegin";
> +  case NVPTXISD::CallSeqEnd:      return "NVPTXISD::CallSeqEnd";
> +  }
> +}
> +
> +
> +SDValue
> +NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
> +  DebugLoc dl = Op.getDebugLoc();
> +  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
> +  Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
> +  return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
> +}
> +
> +std::string NVPTXTargetLowering::getPrototype(Type *retTy,
> +                                              const ArgListTy &Args,
> +                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
> +                                              unsigned retAlignment) const {
> +
> +  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
> +
> +  std::stringstream O;
> +  O << "prototype_" << uniqueCallSite << " : .callprototype ";
> +
> +  if (retTy->getTypeID() == Type::VoidTyID)
> +    O << "()";
> +  else {
> +    O << "(";
> +    if (isABI) {
> +      if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
> +        unsigned size = 0;
> +        if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
> +          size = ITy->getBitWidth();
> +          if (size < 32) size = 32;
> +        }
> +        else {
> +          assert(retTy->isFloatingPointTy() &&
> +                 "Floating point type expected here");
> +          size = retTy->getPrimitiveSizeInBits();
> +        }
> +
> +        O << ".param .b" << size << " _";
> +      }
> +      else if (isa<PointerType>(retTy))
> +        O << ".param .b" << getPointerTy().getSizeInBits()
> +        << " _";
> +      else {
> +        if ((retTy->getTypeID() == Type::StructTyID) ||
> +            isa<VectorType>(retTy)) {
> +          SmallVector<EVT, 16> vtparts;
> +          ComputeValueVTs(*this, retTy, vtparts);
> +          unsigned totalsz = 0;
> +          for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
> +            unsigned elems = 1;
> +            EVT elemtype = vtparts[i];
> +            if (vtparts[i].isVector()) {
> +              elems = vtparts[i].getVectorNumElements();
> +              elemtype = vtparts[i].getVectorElementType();
> +            }
> +            for (unsigned j=0, je=elems; j!=je; ++j) {
> +              unsigned sz = elemtype.getSizeInBits();
> +              if (elemtype.isInteger() && (sz < 8)) sz = 8;
> +              totalsz += sz/8;
> +            }
> +          }
> +          O << ".param .align "
> +              << retAlignment
> +              << " .b8 _["
> +              << totalsz << "]";
> +        }
> +        else {
> +          assert(false &&
> +                 "Unknown return type");
> +        }
> +      }
> +    }
> +    else {
> +      SmallVector<EVT, 16> vtparts;
> +      ComputeValueVTs(*this, retTy, vtparts);
> +      unsigned idx = 0;
> +      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
> +        unsigned elems = 1;
> +        EVT elemtype = vtparts[i];
> +        if (vtparts[i].isVector()) {
> +          elems = vtparts[i].getVectorNumElements();
> +          elemtype = vtparts[i].getVectorElementType();
> +        }
> +
> +        for (unsigned j=0, je=elems; j!=je; ++j) {
> +          unsigned sz = elemtype.getSizeInBits();
> +          if (elemtype.isInteger() && (sz < 32)) sz = 32;
> +          O << ".reg .b" << sz << " _";
> +          if (j<je-1) O << ", ";
> +          ++idx;
> +        }
> +        if (i < e-1)
> +          O << ", ";
> +      }
> +    }
> +    O << ") ";
> +  }
> +  O << "_ (";
> +
> +  bool first = true;
> +  MVT thePointerTy = getPointerTy();
> +
> +  for (unsigned i=0,e=Args.size(); i!=e; ++i) {
> +    const Type *Ty = Args[i].Ty;
> +    if (!first) {
> +      O << ", ";
> +    }
> +    first = false;
> +
> +    if (Outs[i].Flags.isByVal() == false) {
> +      unsigned sz = 0;
> +      if (isa<IntegerType>(Ty)) {
> +        sz = cast<IntegerType>(Ty)->getBitWidth();
> +        if (sz < 32) sz = 32;
> +      }
> +      else if (isa<PointerType>(Ty))
> +        sz = thePointerTy.getSizeInBits();
> +      else
> +        sz = Ty->getPrimitiveSizeInBits();
> +      if (isABI)
> +        O << ".param .b" << sz << " ";
> +      else
> +        O << ".reg .b" << sz << " ";
> +      O << "_";
> +      continue;
> +    }
> +    const PointerType *PTy = dyn_cast<PointerType>(Ty);
> +    assert(PTy &&
> +           "Param with byval attribute should be a pointer type");
> +    Type *ETy = PTy->getElementType();
> +
> +    if (isABI) {
> +      unsigned align = Outs[i].Flags.getByValAlign();
> +      unsigned sz = getTargetData()->getTypeAllocSize(ETy);
> +      O << ".param .align " << align
> +          << " .b8 ";
> +      O << "_";
> +      O << "[" << sz << "]";
> +      continue;
> +    }
> +    else {
> +      SmallVector<EVT, 16> vtparts;
> +      ComputeValueVTs(*this, ETy, vtparts);
> +      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
> +        unsigned elems = 1;
> +        EVT elemtype = vtparts[i];
> +        if (vtparts[i].isVector()) {
> +          elems = vtparts[i].getVectorNumElements();
> +          elemtype = vtparts[i].getVectorElementType();
> +        }
> +
> +        for (unsigned j=0,je=elems; j!=je; ++j) {
> +          unsigned sz = elemtype.getSizeInBits();
> +          if (elemtype.isInteger() && (sz < 32)) sz = 32;
> +          O << ".reg .b" << sz << " ";
> +          O << "_";
> +          if (j<je-1) O << ", ";
> +        }
> +        if (i<e-1)
> +          O << ", ";
> +      }
> +      continue;
> +    }
> +  }
> +  O << ");";
> +  return O.str();
> +}
> +
> +
> +#if 0
> +SDValue
> +NVPTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
> +                               CallingConv::ID CallConv, bool isVarArg,
> +                               bool doesNotRet, bool &isTailCall,
> +                               const SmallVectorImpl<ISD::OutputArg> &Outs,
> +                               const SmallVectorImpl<SDValue> &OutVals,
> +                               const SmallVectorImpl<ISD::InputArg> &Ins,
> +                               DebugLoc dl, SelectionDAG &DAG,
> +                               SmallVectorImpl<SDValue> &InVals, Type *retTy,
> +                               const ArgListTy &Args) const {
> +  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
> +
> +  SDValue tempChain = Chain;
> +  Chain = DAG.getCALLSEQ_START(Chain,
> +                               DAG.getIntPtrConstant(uniqueCallSite, true));
> +  SDValue InFlag = Chain.getValue(1);
> +
> +  assert((Outs.size() == Args.size()) &&
> +         "Unexpected number of arguments to function call");
> +  unsigned paramCount = 0;
> +  // Declare the .params or .reg need to pass values
> +  // to the function
> +  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
> +    EVT VT = Outs[i].VT;
> +
> +    if (Outs[i].Flags.isByVal() == false) {
> +      // Plain scalar
> +      // for ABI,    declare .param .b<size> .param<n>;
> +      // for nonABI, declare .reg .b<size> .param<n>;
> +      unsigned isReg = 1;
> +      if (isABI)
> +        isReg = 0;
> +      unsigned sz = VT.getSizeInBits();
> +      if (VT.isInteger() && (sz < 32)) sz = 32;
> +      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +      SDValue DeclareParamOps[] = { Chain,
> +                                    DAG.getConstant(paramCount, MVT::i32),
> +                                    DAG.getConstant(sz, MVT::i32),
> +                                    DAG.getConstant(isReg, MVT::i32),
> +                                    InFlag };
> +      Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
> +                          DeclareParamOps, 5);
> +      InFlag = Chain.getValue(1);
> +      SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
> +                             DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
> +
> +      unsigned opcode = NVPTXISD::StoreParam;
> +      if (isReg)
> +        opcode = NVPTXISD::MoveToParam;
> +      else {
> +        if (Outs[i].Flags.isZExt())
> +          opcode = NVPTXISD::StoreParamU32;
> +        else if (Outs[i].Flags.isSExt())
> +          opcode = NVPTXISD::StoreParamS32;
> +      }
> +      Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
> +
> +      InFlag = Chain.getValue(1);
> +      ++paramCount;
> +      continue;
> +    }
> +    // struct or vector
> +    SmallVector<EVT, 16> vtparts;
> +    const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
> +    assert(PTy &&
> +           "Type of a byval parameter should be pointer");
> +    ComputeValueVTs(*this, PTy->getElementType(), vtparts);
> +
> +    if (isABI) {
> +      // declare .param .align 16 .b8 .param<n>[<size>];
> +      unsigned sz = Outs[i].Flags.getByValSize();
> +      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +      // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
> +      // don't need to
> +      // worry about natural alignment or not. See TargetLowering::LowerCallTo()
> +      SDValue DeclareParamOps[] = { Chain,
> +                       DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
> +                                    DAG.getConstant(paramCount, MVT::i32),
> +                                    DAG.getConstant(sz, MVT::i32),
> +                                    InFlag };
> +      Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
> +                          DeclareParamOps, 5);
> +      InFlag = Chain.getValue(1);
> +      unsigned curOffset = 0;
> +      for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
> +        unsigned elems = 1;
> +        EVT elemtype = vtparts[j];
> +        if (vtparts[j].isVector()) {
> +          elems = vtparts[j].getVectorNumElements();
> +          elemtype = vtparts[j].getVectorElementType();
> +        }
> +        for (unsigned k=0,ke=elems; k!=ke; ++k) {
> +          unsigned sz = elemtype.getSizeInBits();
> +          if (elemtype.isInteger() && (sz < 8)) sz = 8;
> +          SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
> +                                        OutVals[i],
> +                                        DAG.getConstant(curOffset,
> +                                                        getPointerTy()));
> +          SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
> +                                MachinePointerInfo(), false, false, false, 0);
> +          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +          SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
> +                                                            MVT::i32),
> +                                           DAG.getConstant(curOffset, MVT::i32),
> +                                                            theVal, InFlag };
> +          Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
> +                              CopyParamOps, 5);
> +          InFlag = Chain.getValue(1);
> +          curOffset += sz/8;
> +        }
> +      }
> +      ++paramCount;
> +      continue;
> +    }
> +    // Non-abi, struct or vector
> +    // Declare a bunch or .reg .b<size> .param<n>
> +    unsigned curOffset = 0;
> +    for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
> +      unsigned elems = 1;
> +      EVT elemtype = vtparts[j];
> +      if (vtparts[j].isVector()) {
> +        elems = vtparts[j].getVectorNumElements();
> +        elemtype = vtparts[j].getVectorElementType();
> +      }
> +      for (unsigned k=0,ke=elems; k!=ke; ++k) {
> +        unsigned sz = elemtype.getSizeInBits();
> +        if (elemtype.isInteger() && (sz < 32)) sz = 32;
> +        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
> +                                                             MVT::i32),
> +                                                  DAG.getConstant(sz, MVT::i32),
> +                                                   DAG.getConstant(1, MVT::i32),
> +                                                             InFlag };
> +        Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
> +                            DeclareParamOps, 5);
> +        InFlag = Chain.getValue(1);
> +        SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
> +                                      DAG.getConstant(curOffset,
> +                                                      getPointerTy()));
> +        SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
> +                                  MachinePointerInfo(), false, false, false, 0);
> +        SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +        SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
> +                                   DAG.getConstant(0, MVT::i32), theVal,
> +                                   InFlag };
> +        Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
> +                            CopyParamOps, 5);
> +        InFlag = Chain.getValue(1);
> +        ++paramCount;
> +      }
> +    }
> +  }
> +
> +  GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
> +  unsigned retAlignment = 0;
> +
> +  // Handle Result
> +  unsigned retCount = 0;
> +  if (Ins.size() > 0) {
> +    SmallVector<EVT, 16> resvtparts;
> +    ComputeValueVTs(*this, retTy, resvtparts);
> +
> +    // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
> +    // individual .reg .b<size> func_retval<0..> for non ABI
> +    unsigned resultsz = 0;
> +    for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
> +      unsigned elems = 1;
> +      EVT elemtype = resvtparts[i];
> +      if (resvtparts[i].isVector()) {
> +        elems = resvtparts[i].getVectorNumElements();
> +        elemtype = resvtparts[i].getVectorElementType();
> +      }
> +      for (unsigned j=0,je=elems; j!=je; ++j) {
> +        unsigned sz = elemtype.getSizeInBits();
> +        if (isABI == false) {
> +          if (elemtype.isInteger() && (sz < 32)) sz = 32;
> +        }
> +        else {
> +          if (elemtype.isInteger() && (sz < 8)) sz = 8;
> +        }
> +        if (isABI == false) {
> +          SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +          SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
> +                                      DAG.getConstant(sz, MVT::i32),
> +                                      DAG.getConstant(retCount, MVT::i32),
> +                                      InFlag };
> +          Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
> +                              DeclareRetOps, 5);
> +          InFlag = Chain.getValue(1);
> +          ++retCount;
> +        }
> +        resultsz += sz;
> +      }
> +    }
> +    if (isABI) {
> +      if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
> +          retTy->isPointerTy() ) {
> +        // Scalar needs to be at least 32bit wide
> +        if (resultsz < 32)
> +          resultsz = 32;
> +        SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
> +                                    DAG.getConstant(resultsz, MVT::i32),
> +                                    DAG.getConstant(0, MVT::i32), InFlag };
> +        Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
> +                            DeclareRetOps, 5);
> +        InFlag = Chain.getValue(1);
> +      }
> +      else {
> +        // @TODO: Re-enable getAlign calls.  We do not have the
> +        // ImmutableCallSite object here anymore.
> +        //if (Func) { // direct call
> +        //if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
> +        //retAlignment = TD->getABITypeAlignment(retTy);
> +        //}
> +        //else { // indirect call
> +        //const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
> +        //if (!llvm::getAlign(*CallI, 0, retAlignment))
> +        //retAlignment = TD->getABITypeAlignment(retTy);
> +        //}
> +        // @TODO: Remove this hack!
> +        // Functions with explicit alignment metadata will be broken, for now.
> +        retAlignment = 16;
> +        SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
> +                                                           MVT::i32),
> +                                          DAG.getConstant(resultsz/8, MVT::i32),
> +                                         DAG.getConstant(0, MVT::i32), InFlag };
> +        Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
> +                            DeclareRetOps, 5);
> +        InFlag = Chain.getValue(1);
> +      }
> +    }
> +  }
> +
> +  if (!Func) {
> +    // This is indirect function call case : PTX requires a prototype of the
> +    // form
> +    // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
> +    // to be emitted, and the label has to used as the last arg of call
> +    // instruction.
> +    // The prototype is embedded in a string and put as the operand for an
> +    // INLINEASM SDNode.
> +    SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +    std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
> +    const char *asmstr = nvTM->getManagedStrPool()->
> +        getManagedString(proto_string.c_str())->c_str();
> +    SDValue InlineAsmOps[] = { Chain,
> +                               DAG.getTargetExternalSymbol(asmstr,
> +                                                           getPointerTy()),
> +                                                           DAG.getMDNode(0),
> +                                   DAG.getTargetConstant(0, MVT::i32), InFlag };
> +    Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
> +    InFlag = Chain.getValue(1);
> +  }
> +  // Op to just print "call"
> +  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +  SDValue PrintCallOps[] = { Chain,
> +                             DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
> +                                 : retCount, MVT::i32),
> +                                   InFlag };
> +  Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
> +      PrintCallVTs, PrintCallOps, 3);
> +  InFlag = Chain.getValue(1);
> +
> +  // Ops to print out the function name
> +  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
> +  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
> +  InFlag = Chain.getValue(1);
> +
> +  // Ops to print out the param list
> +  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +  SDValue CallArgBeginOps[] = { Chain, InFlag };
> +  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
> +                      CallArgBeginOps, 2);
> +  InFlag = Chain.getValue(1);
> +
> +  for (unsigned i=0, e=paramCount; i!=e; ++i) {
> +    unsigned opcode;
> +    if (i==(e-1))
> +      opcode = NVPTXISD::LastCallArg;
> +    else
> +      opcode = NVPTXISD::CallArg;
> +    SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +    SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
> +                             DAG.getConstant(i, MVT::i32),
> +                             InFlag };
> +    Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
> +    InFlag = Chain.getValue(1);
> +  }
> +  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +  SDValue CallArgEndOps[] = { Chain,
> +                              DAG.getConstant(Func ? 1 : 0, MVT::i32),
> +                              InFlag };
> +  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
> +                      3);
> +  InFlag = Chain.getValue(1);
> +
> +  if (!Func) {
> +    SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
> +    SDValue PrototypeOps[] = { Chain,
> +                               DAG.getConstant(uniqueCallSite, MVT::i32),
> +                               InFlag };
> +    Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
> +    InFlag = Chain.getValue(1);
> +  }
> +
> +  // Generate loads from param memory/moves from registers for result
> +  if (Ins.size() > 0) {
> +    if (isABI) {
> +      unsigned resoffset = 0;
> +      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
> +        unsigned sz = Ins[i].VT.getSizeInBits();
> +        if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
> +        std::vector<EVT> LoadRetVTs;
> +        LoadRetVTs.push_back(Ins[i].VT);
> +        LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
> +        std::vector<SDValue> LoadRetOps;
> +        LoadRetOps.push_back(Chain);
> +        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
> +        LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
> +        LoadRetOps.push_back(InFlag);
> +        SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
> +                                     &LoadRetOps[0], LoadRetOps.size());
> +        Chain = retval.getValue(1);
> +        InFlag = retval.getValue(2);
> +        InVals.push_back(retval);
> +        resoffset += sz/8;
> +      }
> +    }
> +    else {
> +      SmallVector<EVT, 16> resvtparts;
> +      ComputeValueVTs(*this, retTy, resvtparts);
> +
> +      assert(Ins.size() == resvtparts.size() &&
> +             "Unexpected number of return values in non-ABI case");
> +      unsigned paramNum = 0;
> +      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
> +        assert(EVT(Ins[i].VT) == resvtparts[i] &&
> +               "Unexpected EVT type in non-ABI case");
> +        unsigned numelems = 1;
> +        EVT elemtype = Ins[i].VT;
> +        if (Ins[i].VT.isVector()) {
> +          numelems = Ins[i].VT.getVectorNumElements();
> +          elemtype = Ins[i].VT.getVectorElementType();
> +        }
> +        std::vector<SDValue> tempRetVals;
> +        for (unsigned j=0; j<numelems; ++j) {
> +          std::vector<EVT> MoveRetVTs;
> +          MoveRetVTs.push_back(elemtype);
> +          MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
> +          std::vector<SDValue> MoveRetOps;
> +          MoveRetOps.push_back(Chain);
> +          MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
> +          MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
> +          MoveRetOps.push_back(InFlag);
> +          SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
> +                                       &MoveRetOps[0], MoveRetOps.size());
> +          Chain = retval.getValue(1);
> +          InFlag = retval.getValue(2);
> +          tempRetVals.push_back(retval);
> +          ++paramNum;
> +        }
> +        if (Ins[i].VT.isVector())
> +          InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
> +                                       &tempRetVals[0], tempRetVals.size()));
> +        else
> +          InVals.push_back(tempRetVals[0]);
> +      }
> +    }
> +  }
> +  Chain = DAG.getCALLSEQ_END(Chain,
> +                             DAG.getIntPtrConstant(uniqueCallSite, true),
> +                             DAG.getIntPtrConstant(uniqueCallSite+1, true),
> +                             InFlag);
> +  uniqueCallSite++;
> +
> +  // set isTailCall to false for now, until we figure out how to express
> +  // tail call optimization in PTX
> +  isTailCall = false;
> +  return Chain;
> +}
> +#endif
> +
> +// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
> +// (see LegalizeDAG.cpp). This is slow and uses local memory.
> +// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
> +SDValue NVPTXTargetLowering::
> +LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
> +  SDNode *Node = Op.getNode();
> +  DebugLoc dl = Node->getDebugLoc();
> +  SmallVector<SDValue, 8> Ops;
> +  unsigned NumOperands = Node->getNumOperands();
> +  for (unsigned i=0; i < NumOperands; ++i) {
> +    SDValue SubOp = Node->getOperand(i);
> +    EVT VVT = SubOp.getNode()->getValueType(0);
> +    EVT EltVT = VVT.getVectorElementType();
> +    unsigned NumSubElem = VVT.getVectorNumElements();
> +    for (unsigned j=0; j < NumSubElem; ++j) {
> +      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
> +                                DAG.getIntPtrConstant(j)));
> +    }
> +  }
> +  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
> +                     &Ops[0], Ops.size());
> +}
> +
> +SDValue NVPTXTargetLowering::
> +LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> +  switch (Op.getOpcode()) {
> +  case ISD::RETURNADDR: return SDValue();
> +  case ISD::FRAMEADDR:  return SDValue();
> +  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
> +  case ISD::INTRINSIC_W_CHAIN: return Op;
> +  case ISD::BUILD_VECTOR:
> +  case ISD::EXTRACT_SUBVECTOR:
> +    return Op;
> +  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
> +  default:
> +    assert(0 && "Custom lowering not defined for operation");
> +  }
> +}
> +
> +SDValue
> +NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
> +                                EVT v) const {
> +  std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
> +  std::stringstream suffix;
> +  suffix << idx;
> +  *name += suffix.str();
> +  return DAG.getTargetExternalSymbol(name->c_str(), v);
> +}
> +
> +SDValue
> +NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
> +  return getExtSymb(DAG, ".PARAM", idx, v);
> +}
> +
> +SDValue
> +NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
> +  return getExtSymb(DAG, ".HLPPARAM", idx);
> +}
> +
> +// Check to see if the kernel argument is image*_t or sampler_t
> +
> +bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
> +  const char *specialTypes[] = {
> +                                "struct._image2d_t",
> +                                "struct._image3d_t",
> +                                "struct._sampler_t"
> +  };
> +
> +  const Type *Ty = arg->getType();
> +  const PointerType *PTy = dyn_cast<PointerType>(Ty);
> +
> +  if (!PTy)
> +    return false;
> +
> +  if (!context)
> +    return false;
> +
> +  const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
> +  const std::string TypeName = STy ? STy->getName() : "";
> +
> +  for (int i=0, e=sizeof(specialTypes)/sizeof(specialTypes[0]); i!=e; ++i)
> +    if (TypeName == specialTypes[i])
> +      return true;
> +
> +  return false;
> +}
> +
> +SDValue
> +NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
> +                                        CallingConv::ID CallConv, bool isVarArg,
> +                                      const SmallVectorImpl<ISD::InputArg> &Ins,
> +                                          DebugLoc dl, SelectionDAG &DAG,
> +                                       SmallVectorImpl<SDValue> &InVals) const {
> +  MachineFunction &MF = DAG.getMachineFunction();
> +  const TargetData *TD = getTargetData();
> +
> +  const Function *F = MF.getFunction();
> +  const AttrListPtr &PAL = F->getAttributes();
> +
> +  SDValue Root = DAG.getRoot();
> +  std::vector<SDValue> OutChains;
> +
> +  bool isKernel = llvm::isKernelFunction(*F);
> +  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
> +
> +  std::vector<Type *> argTypes;
> +  std::vector<const Argument *> theArgs;
> +  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
> +      I != E; ++I) {
> +    theArgs.push_back(I);
> +    argTypes.push_back(I->getType());
> +  }
> +  assert(argTypes.size() == Ins.size() &&
> +         "Ins types and function types did not match");
> +
> +  int idx = 0;
> +  for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
> +    Type *Ty = argTypes[i];
> +    EVT ObjectVT = getValueType(Ty);
> +    assert(ObjectVT == Ins[i].VT &&
> +           "Ins type did not match function type");
> +
> +    // If the kernel argument is image*_t or sampler_t, convert it to
> +    // a i32 constant holding the parameter position. This can later
> +    // matched in the AsmPrinter to output the correct mangled name.
> +    if (isImageOrSamplerVal(theArgs[i],
> +                           (theArgs[i]->getParent() ?
> +                               theArgs[i]->getParent()->getParent() : 0))) {
> +      assert(isKernel && "Only kernels can have image/sampler params");
> +      InVals.push_back(DAG.getConstant(i+1, MVT::i32));
> +      continue;
> +    }
> +
> +    if (theArgs[i]->use_empty()) {
> +      // argument is dead
> +      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
> +      continue;
> +    }
> +
> +    // In the following cases, assign a node order of "idx+1"
> +    // to newly created nodes. The SDNOdes for params have to
> +    // appear in the same order as their order of appearance
> +    // in the original function. "idx+1" holds that order.
> +    if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) {
> +      // A plain scalar.
> +      if (isABI || isKernel) {
> +        // If ABI, load from the param symbol
> +        SDValue Arg = getParamSymbol(DAG, idx);
> +        Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
> +            F->getContext()),
> +            llvm::ADDRESS_SPACE_PARAM));
> +        SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
> +                                MachinePointerInfo(srcValue), false, false,
> +                                false,
> +                                TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
> +                                  F->getContext())));
> +        if (p.getNode())
> +          DAG.AssignOrdering(p.getNode(), idx+1);
> +        InVals.push_back(p);
> +      }
> +      else {
> +        // If no ABI, just move the param symbol
> +        SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
> +        SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
> +        if (p.getNode())
> +          DAG.AssignOrdering(p.getNode(), idx+1);
> +        InVals.push_back(p);
> +      }
> +      continue;
> +    }
> +
> +    // Param has ByVal attribute
> +    if (isABI || isKernel) {
> +      // Return MoveParam(param symbol).
> +      // Ideally, the param symbol can be returned directly,
> +      // but when SDNode builder decides to use it in a CopyToReg(),
> +      // machine instruction fails because TargetExternalSymbol
> +      // (not lowered) is target dependent, and CopyToReg assumes
> +      // the source is lowered.
> +      SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
> +      SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
> +      if (p.getNode())
> +        DAG.AssignOrdering(p.getNode(), idx+1);
> +      if (isKernel)
> +        InVals.push_back(p);
> +      else {
> +        SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
> +                    DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
> +                                 p);
> +        InVals.push_back(p2);
> +      }
> +    } else {
> +      // Have to move a set of param symbols to registers and
> +      // store them locally and return the local pointer in InVals
> +      const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
> +      assert(elemPtrType &&
> +             "Byval parameter should be a pointer type");
> +      Type *elemType = elemPtrType->getElementType();
> +      // Compute the constituent parts
> +      SmallVector<EVT, 16> vtparts;
> +      SmallVector<uint64_t, 16> offsets;
> +      ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
> +      unsigned totalsize = 0;
> +      for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
> +        totalsize += vtparts[j].getStoreSizeInBits();
> +      SDValue localcopy =  DAG.getFrameIndex(MF.getFrameInfo()->
> +                                      CreateStackObject(totalsize/8, 16, false),
> +                                             getPointerTy());
> +      unsigned sizesofar = 0;
> +      std::vector<SDValue> theChains;
> +      for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
> +        unsigned numElems = 1;
> +        if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
> +        for (unsigned k=0, ke=numElems; k!=ke; ++k) {
> +          EVT tmpvt = vtparts[j];
> +          if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
> +          SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
> +                                    getParamSymbol(DAG, idx, tmpvt));
> +          SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
> +                                    DAG.getConstant(sizesofar, getPointerTy()));
> +          theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
> +                                        MachinePointerInfo(), false, false, 0));
> +          sizesofar += tmpvt.getStoreSizeInBits()/8;
> +          ++idx;
> +        }
> +      }
> +      --idx;
> +      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
> +                          theChains.size());
> +      InVals.push_back(localcopy);
> +    }
> +  }
> +
> +  // Clang will check explicit VarArg and issue error if any. However, Clang
> +  // will let code with
> +  // implicit var arg like f() pass.
> +  // We treat this case as if the arg list is empty.
> +  //if (F.isVarArg()) {
> +  // assert(0 && "VarArg not supported yet!");
> +  //}
> +
> +  if (!OutChains.empty())
> +    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
> +                            &OutChains[0], OutChains.size()));
> +
> +  return Chain;
> +}
> +
> +SDValue
> +NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
> +                                 bool isVarArg,
> +                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
> +                                 const SmallVectorImpl<SDValue> &OutVals,
> +                                 DebugLoc dl, SelectionDAG &DAG) const {
> +
> +  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
> +
> +  unsigned sizesofar = 0;
> +  unsigned idx = 0;
> +  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
> +    SDValue theVal = OutVals[i];
> +    EVT theValType = theVal.getValueType();
> +    unsigned numElems = 1;
> +    if (theValType.isVector()) numElems = theValType.getVectorNumElements();
> +    for (unsigned j=0,je=numElems; j!=je; ++j) {
> +      SDValue tmpval = theVal;
> +      if (theValType.isVector())
> +        tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
> +                             theValType.getVectorElementType(),
> +                             tmpval, DAG.getIntPtrConstant(j));
> +      Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
> +          dl, MVT::Other,
> +          Chain,
> +          DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
> +          tmpval);
> +      if (theValType.isVector())
> +        sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
> +      else
> +        sizesofar += theValType.getStoreSizeInBits()/8;
> +      ++idx;
> +    }
> +  }
> +
> +  return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
> +}
> +
> +void
> +NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
> +                                                  std::string &Constraint,
> +                                                  std::vector<SDValue> &Ops,
> +                                                  SelectionDAG &DAG) const
> +{
> +  if (Constraint.length() > 1)
> +    return;
> +  else
> +    TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
> +}
> +
> +// NVPTX suuport vector of legal types of any length in Intrinsics because the
> +// NVPTX specific type legalizer
> +// will legalize them to the PTX supported length.
> +bool
> +NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
> +  if (isTypeLegal(VT))
> +    return true;
> +  if (VT.isVector()) {
> +    MVT eVT = VT.getVectorElementType();
> +    if (isTypeLegal(eVT))
> +      return true;
> +  }
> +  return false;
> +}
> +
> +
> +// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
> +// TgtMemIntrinsic
> +// because we need the information that is only available in the "Value" type
> +// of destination
> +// pointer. In particular, the address space information.
> +bool
> +NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
> +                                        unsigned Intrinsic) const {
> +  switch (Intrinsic) {
> +  default:
> +    return false;
> +
> +  case Intrinsic::nvvm_atomic_load_add_f32:
> +    Info.opc = ISD::INTRINSIC_W_CHAIN;
> +    Info.memVT = MVT::f32;
> +    Info.ptrVal = I.getArgOperand(0);
> +    Info.offset = 0;
> +    Info.vol = 0;
> +    Info.readMem = true;
> +    Info.writeMem = true;
> +    Info.align = 0;
> +    return true;
> +
> +  case Intrinsic::nvvm_atomic_load_inc_32:
> +  case Intrinsic::nvvm_atomic_load_dec_32:
> +    Info.opc = ISD::INTRINSIC_W_CHAIN;
> +    Info.memVT = MVT::i32;
> +    Info.ptrVal = I.getArgOperand(0);
> +    Info.offset = 0;
> +    Info.vol = 0;
> +    Info.readMem = true;
> +    Info.writeMem = true;
> +    Info.align = 0;
> +    return true;
> +
> +  case Intrinsic::nvvm_ldu_global_i:
> +  case Intrinsic::nvvm_ldu_global_f:
> +  case Intrinsic::nvvm_ldu_global_p:
> +
> +    Info.opc = ISD::INTRINSIC_W_CHAIN;
> +    if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
> +      Info.memVT = MVT::i32;
> +    else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
> +      Info.memVT = getPointerTy();
> +    else
> +      Info.memVT = MVT::f32;
> +    Info.ptrVal = I.getArgOperand(0);
> +    Info.offset = 0;
> +    Info.vol = 0;
> +    Info.readMem = true;
> +    Info.writeMem = false;
> +    Info.align = 0;
> +    return true;
> +
> +  }
> +  return false;
> +}
> +
> +/// isLegalAddressingMode - Return true if the addressing mode represented
> +/// by AM is legal for this target, for a load/store of the specified type.
> +/// Used to guide target specific optimizations, like loop strength reduction
> +/// (LoopStrengthReduce.cpp) and memory optimization for address mode
> +/// (CodeGenPrepare.cpp)
> +bool
> +NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
> +                                           Type *Ty) const {
> +
> +  // AddrMode - This represents an addressing mode of:
> +  //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
> +  //
> +  // The legal address modes are
> +  // - [avar]
> +  // - [areg]
> +  // - [areg+immoff]
> +  // - [immAddr]
> +
> +  if (AM.BaseGV) {
> +    if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
> +      return false;
> +    return true;
> +  }
> +
> +  switch (AM.Scale) {
> +  case 0:  // "r", "r+i" or "i" is allowed
> +    break;
> +  case 1:
> +    if (AM.HasBaseReg)  // "r+r+i" or "r+r" is not allowed.
> +      return false;
> +    // Otherwise we have r+i.
> +    break;
> +  default:
> +    // No scale > 1 is allowed
> +    return false;
> +  }
> +  return true;
> +}
> +
> +//===----------------------------------------------------------------------===//
> +//                         NVPTX Inline Assembly Support
> +//===----------------------------------------------------------------------===//
> +
> +/// getConstraintType - Given a constraint letter, return the type of
> +/// constraint it is for this target.
> +NVPTXTargetLowering::ConstraintType
> +NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
> +  if (Constraint.size() == 1) {
> +    switch (Constraint[0]) {
> +    default:
> +      break;
> +    case 'r':
> +    case 'h':
> +    case 'c':
> +    case 'l':
> +    case 'f':
> +    case 'd':
> +    case '0':
> +    case 'N':
> +      return C_RegisterClass;
> +    }
> +  }
> +  return TargetLowering::getConstraintType(Constraint);
> +}
> +
> +
> +std::pair<unsigned, const TargetRegisterClass*>
> +NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
> +                                                  EVT VT) const {
> +  if (Constraint.size() == 1) {
> +    switch (Constraint[0]) {
> +    case 'c':
> +      return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
> +    case 'h':
> +      return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
> +    case 'r':
> +      return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
> +    case 'l':
> +    case 'N':
> +      return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
> +    case 'f':
> +      return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
> +    case 'd':
> +      return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
> +    }
> +  }
> +  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
> +}
> +
> +
> +
> +/// getFunctionAlignment - Return the Log2 alignment of this function.
> +unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
> +  return 4;
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,153 @@
> +//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines the interfaces that NVPTX uses to lower LLVM code into a
> +// selection DAG.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXISELLOWERING_H
> +#define NVPTXISELLOWERING_H
> +
> +#include "NVPTX.h"
> +#include "NVPTXSubtarget.h"
> +#include "llvm/CodeGen/SelectionDAG.h"
> +#include "llvm/Target/TargetLowering.h"
> +
> +namespace llvm {
> +namespace NVPTXISD {
> +enum NodeType {
> +  // Start the numbering from where ISD NodeType finishes.
> +  FIRST_NUMBER = ISD::BUILTIN_OP_END,
> +  Wrapper,
> +  CALL,
> +  RET_FLAG,
> +  LOAD_PARAM,
> +  NVBuiltin,
> +  DeclareParam,
> +  DeclareScalarParam,
> +  DeclareRetParam,
> +  DeclareRet,
> +  DeclareScalarRet,
> +  LoadParam,
> +  StoreParam,
> +  StoreParamS32, // to sext and store a <32bit value, not used currently
> +  StoreParamU32, // to zext and store a <32bit value, not used currently
> +  MoveToParam,
> +  PrintCall,
> +  PrintCallUni,
> +  CallArgBegin,
> +  CallArg,
> +  LastCallArg,
> +  CallArgEnd,
> +  CallVoid,
> +  CallVal,
> +  CallSymbol,
> +  Prototype,
> +  MoveParam,
> +  MoveRetval,
> +  MoveToRetval,
> +  StoreRetval,
> +  PseudoUseParam,
> +  RETURN,
> +  CallSeqBegin,
> +  CallSeqEnd,
> +  Dummy
> +};
> +}
> +
> +//===--------------------------------------------------------------------===//
> +// TargetLowering Implementation
> +//===--------------------------------------------------------------------===//
> +class NVPTXTargetLowering : public TargetLowering {
> +public:
> +  explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
> +  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
> +
> +  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
> +                             SelectionDAG &DAG) const;
> +
> +  virtual const char *getTargetNodeName(unsigned Opcode) const;
> +
> +  bool isTypeSupportedInIntrinsic(MVT VT) const;
> +
> +  bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
> +                          unsigned Intrinsic) const;
> +
> +  /// isLegalAddressingMode - Return true if the addressing mode represented
> +  /// by AM is legal for this target, for a load/store of the specified type
> +  /// Used to guide target specific optimizations, like loop strength
> +  /// reduction (LoopStrengthReduce.cpp) and memory optimization for
> +  /// address mode (CodeGenPrepare.cpp)
> +  virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
> +
> +  /// getFunctionAlignment - Return the Log2 alignment of this function.
> +  virtual unsigned getFunctionAlignment(const Function *F) const;
> +
> +  virtual EVT getSetCCResultType(EVT VT) const {
> +    return MVT::i1;
> +  }
> +
> +  ConstraintType getConstraintType(const std::string &Constraint) const;
> +  std::pair<unsigned, const TargetRegisterClass*>
> +  getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
> +
> +  virtual SDValue
> +  LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
> +                       const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
> +                       SelectionDAG &DAG,
> +                       SmallVectorImpl<SDValue> &InVals) const;
> +
> +  // This will be re-added once the necessary changes to LowerCallTo are
> +  // upstreamed.
> +  // virtual SDValue
> +  // LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
> +  // bool isVarArg, bool doesNotRet, bool &isTailCall,
> +  // const SmallVectorImpl<ISD::OutputArg> &Outs,
> +  // const SmallVectorImpl<SDValue> &OutVals,
> +  // const SmallVectorImpl<ISD::InputArg> &Ins,
> +  // DebugLoc dl, SelectionDAG &DAG,
> +  // SmallVectorImpl<SDValue> &InVals,
> +  // Type *retTy, const ArgListTy &Args) const;
> +
> +  std::string getPrototype(Type *, const ArgListTy &,
> +                           const SmallVectorImpl<ISD::OutputArg> &,
> +                           unsigned retAlignment) const;
> +
> +  virtual SDValue
> +  LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
> +              const SmallVectorImpl<ISD::OutputArg> &Outs,
> +              const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
> +              SelectionDAG &DAG) const;
> +
> +  virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
> +                                            std::vector<SDValue> &Ops,
> +                                            SelectionDAG &DAG) const;
> +
> +  NVPTXTargetMachine *nvTM;
> +
> +  // PTX always uses 32-bit shift amounts
> +  virtual MVT getShiftAmountTy(EVT LHSTy) const {
> +    return MVT::i32;
> +  }
> +
> +private:
> +  const NVPTXSubtarget &nvptxSubtarget;  // cache the subtarget here
> +
> +  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
> +                         MVT::i32) const;
> +  SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
> +  SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
> +
> +  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
> +};
> +} // namespace llvm
> +
> +#endif // NVPTXISELLOWERING_H
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXInstrFormats.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrFormats.td?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrFormats.td (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrFormats.td Fri May  4 15:18:50 2012
> @@ -0,0 +1,43 @@
> +//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +//===----------------------------------------------------------------------===//
> +//  Describe NVPTX instructions format
> +//
> +//===----------------------------------------------------------------------===//
> +
> +// Vector instruction type enum
> +class VecInstTypeEnum<bits<4> val> {
> +  bits<4> Value=val;
> +}
> +def VecNOP : VecInstTypeEnum<0>;
> +
> +// Generic NVPTX Format
> +
> +class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
> +  : Instruction {
> +  field bits<14> Inst;
> +
> +  let Namespace = "NVPTX";
> +  dag OutOperandList = outs;
> +  dag InOperandList = ins;
> +  let AsmString = asmstr;
> +  let Pattern = pattern;
> +
> +  // TSFlagFields
> +  bits<4> VecInstType = VecNOP.Value;
> +  bit IsSimpleMove = 0;
> +  bit IsLoad = 0;
> +  bit IsStore = 0;
> +
> +  let TSFlags{3-0} = VecInstType;
> +  let TSFlags{4-4} = IsSimpleMove;
> +  let TSFlags{5-5} = IsLoad;
> +  let TSFlags{6-6} = IsStore;
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,326 @@
> +//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the NVPTX implementation of the TargetInstrInfo class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTX.h"
> +#include "NVPTXInstrInfo.h"
> +#include "NVPTXTargetMachine.h"
> +#define GET_INSTRINFO_CTOR
> +#include "NVPTXGenInstrInfo.inc"
> +#include "llvm/Function.h"
> +#include "llvm/ADT/STLExtras.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include <cstdio>
> +
> +
> +using namespace llvm;
> +
> +// FIXME: Add the subtarget support on this constructor.
> +NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
> +: NVPTXGenInstrInfo(),
> +  TM(tm),
> +  RegInfo(*this, *TM.getSubtargetImpl()) {}
> +
> +
> +void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
> +                                  MachineBasicBlock::iterator I, DebugLoc DL,
> +                                  unsigned DestReg, unsigned SrcReg,
> +                                  bool KillSrc) const {
> +  if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
> +      NVPTX::Int32RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
> +      NVPTX::Int8RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
> +      NVPTX::Int1RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
> +      NVPTX::Float32RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
> +      NVPTX::Int16RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
> +      NVPTX::Int64RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
> +      NVPTX::Float64RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
> +      NVPTX::V4F32RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
> +      NVPTX::V4I32RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
> +      NVPTX::V2F32RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
> +      NVPTX::V2I32RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
> +      NVPTX::V4I8RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
> +      NVPTX::V2I8RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
> +      NVPTX::V4I16RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
> +      NVPTX::V2I16RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
> +      NVPTX::V2I64RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
> +      NVPTX::V2F64RegsRegClass.contains(SrcReg))
> +    BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
> +    .addReg(SrcReg, getKillRegState(KillSrc));
> +  else {
> +    assert(0 && "Don't know how to copy a register");
> +  }
> +}
> +
> +bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
> +                                 unsigned &SrcReg,
> +                                 unsigned &DestReg) const {
> +  // Look for the appropriate part of TSFlags
> +  bool isMove = false;
> +
> +  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
> +      NVPTX::SimpleMoveShift;
> +  isMove = (TSFlags == 1);
> +
> +  if (isMove) {
> +    MachineOperand dest = MI.getOperand(0);
> +    MachineOperand src = MI.getOperand(1);
> +    assert(dest.isReg() && "dest of a movrr is not a reg");
> +    assert(src.isReg() && "src of a movrr is not a reg");
> +
> +    SrcReg = src.getReg();
> +    DestReg = dest.getReg();
> +    return true;
> +  }
> +
> +  return false;
> +}
> +
> +bool  NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
> +{
> +  switch (MI.getOpcode()) {
> +  default: return false;
> +  case NVPTX::INT_PTX_SREG_NTID_X:
> +  case NVPTX::INT_PTX_SREG_NTID_Y:
> +  case NVPTX::INT_PTX_SREG_NTID_Z:
> +  case NVPTX::INT_PTX_SREG_TID_X:
> +  case NVPTX::INT_PTX_SREG_TID_Y:
> +  case NVPTX::INT_PTX_SREG_TID_Z:
> +  case NVPTX::INT_PTX_SREG_CTAID_X:
> +  case NVPTX::INT_PTX_SREG_CTAID_Y:
> +  case NVPTX::INT_PTX_SREG_CTAID_Z:
> +  case NVPTX::INT_PTX_SREG_NCTAID_X:
> +  case NVPTX::INT_PTX_SREG_NCTAID_Y:
> +  case NVPTX::INT_PTX_SREG_NCTAID_Z:
> +  case NVPTX::INT_PTX_SREG_WARPSIZE:
> +    return true;
> +  }
> +}
> +
> +
> +bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
> +                                 unsigned &AddrSpace) const {
> +  bool isLoad = false;
> +  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
> +      NVPTX::isLoadShift;
> +  isLoad = (TSFlags == 1);
> +  if (isLoad)
> +    AddrSpace = getLdStCodeAddrSpace(MI);
> +  return isLoad;
> +}
> +
> +bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
> +                                  unsigned &AddrSpace) const {
> +  bool isStore = false;
> +  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
> +      NVPTX::isStoreShift;
> +  isStore = (TSFlags == 1);
> +  if (isStore)
> +    AddrSpace = getLdStCodeAddrSpace(MI);
> +  return isStore;
> +}
> +
> +
> +bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
> +  unsigned addrspace = 0;
> +  if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
> +    return false;
> +  if (isLoadInstr(*MI, addrspace))
> +    if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
> +      return false;
> +  if (isStoreInstr(*MI, addrspace))
> +    if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
> +      return false;
> +  return true;
> +}
> +
> +
> +/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
> +/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
> +/// implemented for a target).  Upon success, this returns false and returns
> +/// with the following information in various cases:
> +///
> +/// 1. If this block ends with no branches (it just falls through to its succ)
> +///    just return false, leaving TBB/FBB null.
> +/// 2. If this block ends with only an unconditional branch, it sets TBB to be
> +///    the destination block.
> +/// 3. If this block ends with an conditional branch and it falls through to
> +///    an successor block, it sets TBB to be the branch destination block and a
> +///    list of operands that evaluate the condition. These
> +///    operands can be passed to other TargetInstrInfo methods to create new
> +///    branches.
> +/// 4. If this block ends with an conditional branch and an unconditional
> +///    block, it returns the 'true' destination in TBB, the 'false' destination
> +///    in FBB, and a list of operands that evaluate the condition. These
> +///    operands can be passed to other TargetInstrInfo methods to create new
> +///    branches.
> +///
> +/// Note that RemoveBranch and InsertBranch must be implemented to support
> +/// cases where this method returns success.
> +///
> +bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
> +                                   MachineBasicBlock *&TBB,
> +                                   MachineBasicBlock *&FBB,
> +                                   SmallVectorImpl<MachineOperand> &Cond,
> +                                   bool AllowModify) const {
> +  // If the block has no terminators, it just falls into the block after it.
> +  MachineBasicBlock::iterator I = MBB.end();
> +  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
> +    return false;
> +
> +  // Get the last instruction in the block.
> +  MachineInstr *LastInst = I;
> +
> +  // If there is only one terminator instruction, process it.
> +  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
> +    if (LastInst->getOpcode() == NVPTX::GOTO) {
> +      TBB = LastInst->getOperand(0).getMBB();
> +      return false;
> +    } else if (LastInst->getOpcode() == NVPTX::CBranch) {
> +      // Block ends with fall-through condbranch.
> +      TBB = LastInst->getOperand(1).getMBB();
> +      Cond.push_back(LastInst->getOperand(0));
> +      return false;
> +    }
> +    // Otherwise, don't know what this is.
> +    return true;
> +  }
> +
> +  // Get the instruction before it if it's a terminator.
> +  MachineInstr *SecondLastInst = I;
> +
> +  // If there are three terminators, we don't know what sort of block this is.
> +  if (SecondLastInst && I != MBB.begin() &&
> +      isUnpredicatedTerminator(--I))
> +    return true;
> +
> +  // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
> +  if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
> +      LastInst->getOpcode() == NVPTX::GOTO) {
> +    TBB =  SecondLastInst->getOperand(1).getMBB();
> +    Cond.push_back(SecondLastInst->getOperand(0));
> +    FBB = LastInst->getOperand(0).getMBB();
> +    return false;
> +  }
> +
> +  // If the block ends with two NVPTX:GOTOs, handle it.  The second one is not
> +  // executed, so remove it.
> +  if (SecondLastInst->getOpcode() == NVPTX::GOTO &&
> +      LastInst->getOpcode() == NVPTX::GOTO) {
> +    TBB = SecondLastInst->getOperand(0).getMBB();
> +    I = LastInst;
> +    if (AllowModify)
> +      I->eraseFromParent();
> +    return false;
> +  }
> +
> +  // Otherwise, can't handle this.
> +  return true;
> +}
> +
> +unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
> +  MachineBasicBlock::iterator I = MBB.end();
> +  if (I == MBB.begin()) return 0;
> +  --I;
> +  if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
> +    return 0;
> +
> +  // Remove the branch.
> +  I->eraseFromParent();
> +
> +  I = MBB.end();
> +
> +  if (I == MBB.begin()) return 1;
> +  --I;
> +  if (I->getOpcode() != NVPTX::CBranch)
> +    return 1;
> +
> +  // Remove the branch.
> +  I->eraseFromParent();
> +  return 2;
> +}
> +
> +unsigned
> +NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
> +                             MachineBasicBlock *FBB,
> +                             const SmallVectorImpl<MachineOperand> &Cond,
> +                             DebugLoc DL) const {
> +  // Shouldn't be a fall through.
> +  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
> +  assert((Cond.size() == 1 || Cond.size() == 0) &&
> +         "NVPTX branch conditions have two components!");
> +
> +  // One-way branch.
> +  if (FBB == 0) {
> +    if (Cond.empty())   // Unconditional branch
> +      BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
> +    else                // Conditional branch
> +      BuildMI(&MBB, DL, get(NVPTX::CBranch))
> +      .addReg(Cond[0].getReg()).addMBB(TBB);
> +    return 1;
> +  }
> +
> +  // Two-way Conditional Branch.
> +  BuildMI(&MBB, DL, get(NVPTX::CBranch))
> +  .addReg(Cond[0].getReg()).addMBB(TBB);
> +  BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
> +  return 2;
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,83 @@
> +//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the niversity of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the NVPTX implementation of the TargetInstrInfo class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXINSTRUCTIONINFO_H
> +#define NVPTXINSTRUCTIONINFO_H
> +
> +#include "NVPTX.h"
> +#include "NVPTXRegisterInfo.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +
> +#define GET_INSTRINFO_HEADER
> +#include "NVPTXGenInstrInfo.inc"
> +
> +namespace llvm {
> +
> +class NVPTXInstrInfo : public NVPTXGenInstrInfo
> +{
> +  NVPTXTargetMachine &TM;
> +  const NVPTXRegisterInfo RegInfo;
> +public:
> +  explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
> +
> +  virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
> +
> +  /* The following virtual functions are used in register allocation.
> +   * They are not implemented because the existing interface and the logic
> +   * at the caller side do not work for the elementized vector load and store.
> +   *
> +   * virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
> +   *                                  int &FrameIndex) const;
> +   * virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
> +   *                                 int &FrameIndex) const;
> +   * virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
> +   *                              MachineBasicBlock::iterator MBBI,
> +   *                             unsigned SrcReg, bool isKill, int FrameIndex,
> +   *                              const TargetRegisterClass *RC) const;
> +   * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
> +   *                               MachineBasicBlock::iterator MBBI,
> +   *                               unsigned DestReg, int FrameIndex,
> +   *                               const TargetRegisterClass *RC) const;
> +   */
> +
> +  virtual void copyPhysReg(MachineBasicBlock &MBB,
> +                           MachineBasicBlock::iterator I, DebugLoc DL,
> +                           unsigned DestReg, unsigned SrcReg,
> +                           bool KillSrc) const ;
> +  virtual bool isMoveInstr(const MachineInstr &MI,
> +                           unsigned &SrcReg,
> +                           unsigned &DestReg) const;
> +  bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
> +  bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
> +  bool isReadSpecialReg(MachineInstr &MI) const;
> +
> +  virtual bool CanTailMerge(const MachineInstr *MI) const ;
> +  // Branch analysis.
> +  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
> +                             MachineBasicBlock *&FBB,
> +                             SmallVectorImpl<MachineOperand> &Cond,
> +                             bool AllowModify) const;
> +  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
> +  virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
> +                                MachineBasicBlock *FBB,
> +                                const SmallVectorImpl<MachineOperand> &Cond,
> +                                DebugLoc DL) const;
> +  unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
> +    return  MI.getOperand(2).getImm();
> +  }
> +
> +};
> +
> +} // namespace llvm
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td Fri May  4 15:18:50 2012
> @@ -0,0 +1,2837 @@
> +//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file describes the PTX instructions in TableGen format.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +include "NVPTXInstrFormats.td"
> +
> +// A NOP instruction
> +def NOP : NVPTXInst<(outs), (ins), "", []>;
> +
> +// List of vector specific properties
> +def isVecLD      : VecInstTypeEnum<1>;
> +def isVecST      : VecInstTypeEnum<2>;
> +def isVecBuild   : VecInstTypeEnum<3>;
> +def isVecShuffle : VecInstTypeEnum<4>;
> +def isVecExtract : VecInstTypeEnum<5>;
> +def isVecInsert  : VecInstTypeEnum<6>;
> +def isVecDest    : VecInstTypeEnum<7>;
> +def isVecOther   : VecInstTypeEnum<15>;
> +
> +//===----------------------------------------------------------------------===//
> +// NVPTX Operand Definitions.
> +//===----------------------------------------------------------------------===//
> +
> +def brtarget    : Operand<OtherVT>;
> +
> +//===----------------------------------------------------------------------===//
> +// NVPTX Instruction Predicate Definitions
> +//===----------------------------------------------------------------------===//
> +
> +
> +def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
> +def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
> +def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
> +def useAtomRedG32forGen32 :
> +  Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
> +def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
> +def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
> +def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
> +def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
> +def useAtomRedG64forGen64 :
> +  Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
> +def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
> +def hasVote : Predicate<"Subtarget.hasVote()">;
> +def hasDouble : Predicate<"Subtarget.hasDouble()">;
> +def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
> +def hasLDU : Predicate<"Subtarget.hasLDU()">;
> +def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
> +
> +def doF32FTZ : Predicate<"UseF32FTZ">;
> +
> +def doFMAF32      : Predicate<"doFMAF32">;
> +def doFMAF32_ftz  : Predicate<"(doFMAF32 && UseF32FTZ)">;
> +def doFMAF32AGG      : Predicate<"doFMAF32AGG">;
> +def doFMAF32AGG_ftz  : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
> +def doFMAF64      : Predicate<"doFMAF64">;
> +def doFMAF64AGG      : Predicate<"doFMAF64AGG">;
> +def doFMADF32     : Predicate<"doFMADF32">;
> +def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
> +
> +def doMulWide      : Predicate<"doMulWide">;
> +
> +def allowFMA : Predicate<"allowFMA">;
> +def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
> +
> +def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
> +def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
> +
> +def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
> +
> +def true : Predicate<"1">;
> +
> +//===----------------------------------------------------------------------===//
> +// Special Handling for 8-bit Operands and Operations
> +//
> +// PTX supports 8-bit signed and unsigned types, but does not support 8-bit
> +// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
> +// 8-bit registers.
> +//
> +// PTX ld, st and cvt instructions permit source and destination data operands
> +// to be wider than the instruction-type size, so that narrow values may be
> +// loaded, stored, and converted using regular-width registers.
> +//
> +// So in PTX generation, we
> +// - always use 16-bit registers in place in 8-bit registers.
> +//   (8-bit variables should stay as 8-bit as they represent memory layout.)
> +// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
> +//   before operation
> +//   . div
> +//   . rem
> +//   . neg (sign)
> +//   . set, setp
> +//   . shr
> +//
> +// We are patching the operations by inserting the cvt instructions in the
> +// asm strings of the affected instructions.
> +//
> +// Since vector operations, except for ld/st, are eventually elementized. We
> +// do not need to special-hand the vector 8-bit operations.
> +//
> +//
> +//===----------------------------------------------------------------------===//
> +
> +// Generate string block like
> +// {
> +//   .reg .s16 %temp1;
> +//   .reg .s16 %temp2;
> +//   cvt.s16.s8 %temp1, %a;
> +//   cvt.s16.s8 %temp2, %b;
> +//   opc.s16    %dst, %temp1, %temp2;
> +// }
> +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
> +class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
> +  string s = !strconcat("{{\n\t",
> +             !strconcat(".reg .", !strconcat(TypeStr,
> +             !strconcat(" \t%temp1;\n\t",
> +             !strconcat(".reg .", !strconcat(TypeStr,
> +             !strconcat(" \t%temp2;\n\t",
> +             !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
> +             !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
> +             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
> +}
> +
> +// Generate string block like
> +// {
> +//   .reg .s16 %temp1;
> +//   .reg .s16 %temp2;
> +//   cvt.s16.s8 %temp1, %a;
> +//   mov.b16    %temp2, %b;
> +//   cvt.s16.s8 %temp2, %temp2;
> +//   opc.s16    %dst, %temp1, %temp2;
> +// }
> +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
> +class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
> +  string s = !strconcat("{{\n\t",
> +             !strconcat(".reg .", !strconcat(TypeStr,
> +             !strconcat(" \t%temp1;\n\t",
> +             !strconcat(".reg .",
> +             !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
> +             !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
> +             !strconcat("mov.b16 \t%temp2, $b;\n\t",
> +             !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
> +             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
> +}
> +
> +// Generate string block like
> +// {
> +//   .reg .s16 %temp1;
> +//   .reg .s16 %temp2;
> +//   mov.b16    %temp1, %b;
> +//   cvt.s16.s8 %temp1, %temp1;
> +//   cvt.s16.s8 %temp2, %a;
> +//   opc.s16    %dst, %temp1, %temp2;
> +// }
> +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
> +class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
> +  string s = !strconcat("{{\n\t",
> +             !strconcat(".reg .", !strconcat(TypeStr,
> +             !strconcat(" \t%temp1;\n\t",
> +             !strconcat(".reg .", !strconcat(TypeStr,
> +             !strconcat(" \t%temp2;\n\t",
> +             !strconcat("mov.b16 \t%temp1, $a;\n\t",
> +             !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
> +             !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
> +             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
> +}
> +
> +
> +//===----------------------------------------------------------------------===//
> +// Some Common Instruction Class Templates
> +//===----------------------------------------------------------------------===//
> +
> +multiclass I3<string OpcStr, SDNode OpNode> {
> +  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
> +                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                       Int64Regs:$b))]>;
> +  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
> +                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
> +  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
> +                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                       Int32Regs:$b))]>;
> +  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
> +  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                       Int16Regs:$b))]>;
> +  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
> +  def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
> +  def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
> +}
> +
> +multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
> +  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
> +                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                       Int64Regs:$b))]>;
> +  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
> +                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
> +  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
> +                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                       Int32Regs:$b))]>;
> +  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
> +  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                       Int16Regs:$b))]>;
> +  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
> +  def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +                     Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
> +                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
> +  def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
> +                     Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
> +                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
> +}
> +
> +multiclass I3_noi8<string OpcStr, SDNode OpNode> {
> +  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
> +                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                       Int64Regs:$b))]>;
> +  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
> +                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
> +  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
> +                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                       Int32Regs:$b))]>;
> +  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
> +  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                       Int16Regs:$b))]>;
> +  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
> +                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
> +}
> +
> +multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
> +   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
> +       Int32Regs:$b),
> +                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
> +}
> +
> +multiclass F3<string OpcStr, SDNode OpNode> {
> +   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, Float64Regs:$b),
> +                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
> +                      [(set Float64Regs:$dst,
> +                        (OpNode Float64Regs:$a, Float64Regs:$b))]>,
> +                      Requires<[allowFMA]>;
> +   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, f64imm:$b),
> +                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
> +                      [(set Float64Regs:$dst,
> +                        (OpNode Float64Regs:$a, fpimm:$b))]>,
> +                      Requires<[allowFMA]>;
> +   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[allowFMA_ftz]>;
> +   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, fpimm:$b))]>,
> +                      Requires<[allowFMA_ftz]>;
> +   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[allowFMA]>;
> +   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, fpimm:$b))]>,
> +                      Requires<[allowFMA]>;
> +}
> +
> +multiclass F3_rn<string OpcStr, SDNode OpNode> {
> +   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, Float64Regs:$b),
> +                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
> +                      [(set Float64Regs:$dst,
> +                        (OpNode Float64Regs:$a, Float64Regs:$b))]>;
> +   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, f64imm:$b),
> +                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
> +                      [(set Float64Regs:$dst,
> +                        (OpNode Float64Regs:$a, fpimm:$b))]>;
> +   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[doF32FTZ]>;
> +   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, fpimm:$b))]>,
> +                      Requires<[doF32FTZ]>;
> +   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, Float32Regs:$b))]>;
> +   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
> +                      [(set Float32Regs:$dst,
> +                        (OpNode Float32Regs:$a, fpimm:$b))]>;
> +}
> +
> +multiclass F2<string OpcStr, SDNode OpNode> {
> +   def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
> +                      !strconcat(OpcStr, ".f64 \t$dst, $a;"),
> +                      [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
> +   def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
> +                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
> +                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
> +                      Requires<[doF32FTZ]>;
> +   def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
> +                      !strconcat(OpcStr, ".f32 \t$dst, $a;"),
> +                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
> +}
> +
> +//===----------------------------------------------------------------------===//
> +// NVPTX Instructions.
> +//===----------------------------------------------------------------------===//
> +
> +//-----------------------------------
> +// Integer Arithmetic
> +//-----------------------------------
> +
> +multiclass ADD_SUB_i1<SDNode OpNode> {
> +   def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
> +          "xor.pred \t$dst, $a, $b;",
> +      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
> +   def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
> +          "xor.pred \t$dst, $a, $b;",
> +      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
> +}
> +
> +defm ADD_i1 : ADD_SUB_i1<add>;
> +defm SUB_i1 : ADD_SUB_i1<sub>;
> +
> +
> +defm ADD : I3<"add.s", add>;
> +defm SUB : I3<"sub.s", sub>;
> +
> +defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
> +defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
> +
> +defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
> +defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
> +
> +//mul.wide PTX instruction
> +def SInt32Const : PatLeaf<(imm), [{
> +  const APInt &v = N->getAPIntValue();
> +  if (v.isSignedIntN(32))
> +    return true;
> +  return false;
> +}]>;
> +
> +def UInt32Const : PatLeaf<(imm), [{
> +  const APInt &v = N->getAPIntValue();
> +  if (v.isIntN(32))
> +    return true;
> +  return false;
> +}]>;
> +
> +def SInt16Const : PatLeaf<(imm), [{
> +  const APInt &v = N->getAPIntValue();
> +  if (v.isSignedIntN(16))
> +    return true;
> +  return false;
> +}]>;
> +
> +def UInt16Const : PatLeaf<(imm), [{
> +  const APInt &v = N->getAPIntValue();
> +  if (v.isIntN(16))
> +    return true;
> +  return false;
> +}]>;
> +
> +def Int5Const : PatLeaf<(imm), [{
> +  const APInt &v = N->getAPIntValue();
> +  // Check if 0 <= v < 32
> +  // Only then the result from (x << v) will be i32
> +  if (v.sge(0) && v.slt(32))
> +    return true;
> +  return false;
> +}]>;
> +
> +def Int4Const : PatLeaf<(imm), [{
> +  const APInt &v = N->getAPIntValue();
> +  // Check if 0 <= v < 16
> +  // Only then the result from (x << v) will be i16
> +  if (v.sge(0) && v.slt(16))
> +    return true;
> +  return false;
> +}]>;
> +
> +def SHL2MUL32 : SDNodeXForm<imm, [{
> +  const APInt &v = N->getAPIntValue();
> +  APInt temp(32, 1);
> +  return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
> +}]>;
> +
> +def SHL2MUL16 : SDNodeXForm<imm, [{
> +  const APInt &v = N->getAPIntValue();
> +  APInt temp(16, 1);
> +  return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
> +}]>;
> +
> +def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
> +                           (ins Int32Regs:$a, Int32Regs:$b),
> +                           "mul.wide.s32 \t$dst, $a, $b;", []>;
> +def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
> +                            (ins Int32Regs:$a, i64imm:$b),
> +                           "mul.wide.s32 \t$dst, $a, $b;", []>;
> +
> +def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
> +                           (ins Int32Regs:$a, Int32Regs:$b),
> +                           "mul.wide.u32 \t$dst, $a, $b;", []>;
> +def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
> +                            (ins Int32Regs:$a, i64imm:$b),
> +                           "mul.wide.u32 \t$dst, $a, $b;", []>;
> +
> +def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
> +                            (ins Int16Regs:$a, Int16Regs:$b),
> +                           "mul.wide.s16 \t$dst, $a, $b;", []>;
> +def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
> +                            (ins Int16Regs:$a, i32imm:$b),
> +                           "mul.wide.s16 \t$dst, $a, $b;", []>;
> +
> +def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
> +                            (ins Int16Regs:$a, Int16Regs:$b),
> +                           "mul.wide.u16 \t$dst, $a, $b;", []>;
> +def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
> +                            (ins Int16Regs:$a, i32imm:$b),
> +                           "mul.wide.u16 \t$dst, $a, $b;", []>;
> +
> +def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
> +          (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
> +          Requires<[doMulWide]>;
> +def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
> +          (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
> +          Requires<[doMulWide]>;
> +
> +def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
> +          (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
> +          Requires<[doMulWide]>;
> +def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
> +          (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
> +          Requires<[doMulWide]>;
> +
> +def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
> +          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
> +          Requires<[doMulWide]>;
> +def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
> +          (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
> +          Requires<[doMulWide]>;
> +
> +def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
> +          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
> +def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
> +          (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
> +          Requires<[doMulWide]>;
> +
> +def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
> +          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
> +def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
> +          (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
> +          Requires<[doMulWide]>;
> +
> +def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
> +          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
> +def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
> +          (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
> +          Requires<[doMulWide]>;
> +
> +defm MULT : I3<"mul.lo.s", mul>;
> +
> +defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
> +defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
> +def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +            !strconcat("{{ \n\t",
> +            !strconcat(".reg \t.s16 temp1; \n\t",
> +            !strconcat(".reg \t.s16 temp2; \n\t",
> +            !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
> +            !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
> +            !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
> +            !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
> +            !strconcat("}}", "")))))))),
> +      [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
> +def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
> +            !strconcat("{{ \n\t",
> +            !strconcat(".reg \t.s16 temp1; \n\t",
> +            !strconcat(".reg \t.s16 temp2; \n\t",
> +            !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
> +            !strconcat("mov.b16 \ttemp2, $b; \n\t",
> +            !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
> +            !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
> +            !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
> +            !strconcat("}}", ""))))))))),
> +      [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
> +def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +            !strconcat("{{ \n\t",
> +            !strconcat(".reg \t.u16 temp1; \n\t",
> +            !strconcat(".reg \t.u16 temp2; \n\t",
> +            !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
> +            !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
> +            !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
> +            !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
> +            !strconcat("}}", "")))))))),
> +      [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
> +def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
> +            !strconcat("{{ \n\t",
> +            !strconcat(".reg \t.u16 temp1; \n\t",
> +            !strconcat(".reg \t.u16 temp2; \n\t",
> +            !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
> +            !strconcat("mov.b16 \ttemp2, $b; \n\t",
> +            !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
> +            !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
> +            !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
> +            !strconcat("}}", ""))))))))),
> +      [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
> +
> +
> +defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
> +defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
> +
> +defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
> +// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
> +defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
> +// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
> +
> +def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
> +                      (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
> +                        Int8Regs:$c))]>;
> +def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
> +                      (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
> +                        imm:$c))]>;
> +def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
> +                      (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
> +                        Int8Regs:$c))]>;
> +def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
> +                      (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
> +                        imm:$c))]>;
> +
> +def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
> +                      (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int16Regs:$dst, (add
> +                        (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
> +def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
> +                      (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int16Regs:$dst, (add
> +                        (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
> +def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
> +                      (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int16Regs:$dst, (add
> +                        (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
> +def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
> +    (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
> +                      "mad.lo.s16 \t$dst, $a, $b, $c;",
> +                      [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
> +                        imm:$c))]>;
> +
> +def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
> +                      (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
> +                      "mad.lo.s32 \t$dst, $a, $b, $c;",
> +                      [(set Int32Regs:$dst, (add
> +                        (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
> +def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
> +                      (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
> +                      "mad.lo.s32 \t$dst, $a, $b, $c;",
> +                      [(set Int32Regs:$dst, (add
> +                        (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
> +def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
> +                      (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
> +                      "mad.lo.s32 \t$dst, $a, $b, $c;",
> +                      [(set Int32Regs:$dst, (add
> +                        (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
> +def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
> +                      (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
> +                      "mad.lo.s32 \t$dst, $a, $b, $c;",
> +                      [(set Int32Regs:$dst, (add
> +                        (mul Int32Regs:$a, imm:$b), imm:$c))]>;
> +
> +def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
> +                      (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
> +                      "mad.lo.s64 \t$dst, $a, $b, $c;",
> +                      [(set Int64Regs:$dst, (add
> +                        (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
> +def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
> +                      (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
> +                      "mad.lo.s64 \t$dst, $a, $b, $c;",
> +                      [(set Int64Regs:$dst, (add
> +                        (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
> +def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
> +                      (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
> +                      "mad.lo.s64 \t$dst, $a, $b, $c;",
> +                      [(set Int64Regs:$dst, (add
> +                        (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
> +def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
> +                      (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
> +                      "mad.lo.s64 \t$dst, $a, $b, $c;",
> +                      [(set Int64Regs:$dst, (add
> +                        (mul Int64Regs:$a, imm:$b), imm:$c))]>;
> +
> +
> +def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
> +                     !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
> +                                 "neg.s16 \t$dst, $dst;"),
> +         [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
> +def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
> +                     "neg.s16 \t$dst, $src;",
> +         [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
> +def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
> +                     "neg.s32 \t$dst, $src;",
> +         [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
> +def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
> +                     "neg.s64 \t$dst, $src;",
> +         [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
> +
> +//-----------------------------------
> +// Floating Point Arithmetic
> +//-----------------------------------
> +
> +// Constant 1.0f
> +def FloatConst1 : PatLeaf<(fpimm), [{
> +    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
> +      return false;
> +    float f = (float)N->getValueAPF().convertToFloat();
> +    return (f==1.0f);
> +}]>;
> +// Constand (double)1.0
> +def DoubleConst1 : PatLeaf<(fpimm), [{
> +    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
> +      return false;
> +    double d = (double)N->getValueAPF().convertToDouble();
> +    return (d==1.0);
> +}]>;
> +
> +defm FADD : F3<"add", fadd>;
> +defm FSUB : F3<"sub", fsub>;
> +defm FMUL : F3<"mul", fmul>;
> +
> +defm FADD_rn : F3_rn<"add", fadd>;
> +defm FSUB_rn : F3_rn<"sub", fsub>;
> +defm FMUL_rn : F3_rn<"mul", fmul>;
> +
> +defm FABS : F2<"abs", fabs>;
> +defm FNEG : F2<"neg", fneg>;
> +defm FSQRT : F2<"sqrt.rn", fsqrt>;
> +
> +//
> +// F64 division
> +//
> +def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins f64imm:$a, Float64Regs:$b),
> +                      "rcp.rn.f64 \t$dst, $b;",
> +                      [(set Float64Regs:$dst,
> +                        (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
> +def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, Float64Regs:$b),
> +                      "div.rn.f64 \t$dst, $a, $b;",
> +                      [(set Float64Regs:$dst,
> +                        (fdiv Float64Regs:$a, Float64Regs:$b))]>;
> +def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, f64imm:$b),
> +                      "div.rn.f64 \t$dst, $a, $b;",
> +                      [(set Float64Regs:$dst,
> +                        (fdiv Float64Regs:$a, fpimm:$b))]>;
> +
> +//
> +// F32 Approximate reciprocal
> +//
> +def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins f32imm:$a, Float32Regs:$b),
> +                      "rcp.approx.ftz.f32 \t$dst, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
> +def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
> +                        (ins f32imm:$a, Float32Regs:$b),
> +                       "rcp.approx.f32 \t$dst, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_APPROX]>;
> +//
> +// F32 Approximate division
> +//
> +def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      "div.approx.ftz.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
> +def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      "div.approx.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_APPROX]>;
> +//
> +// F32 Semi-accurate reciprocal
> +//
> +// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
> +//
> +def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins f32imm:$a, Float32Regs:$b),
> +                      "rcp.approx.ftz.f32 \t$dst, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
> +def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins f32imm:$a, Float32Regs:$b),
> +                      "rcp.approx.f32 \t$dst, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_FULL]>;
> +//
> +// F32 Semi-accurate division
> +//
> +def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      "div.full.ftz.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
> +def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      "div.full.ftz.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, fpimm:$b))]>,
> +                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
> +def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      "div.full.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[do_DIVF32_FULL]>;
> +def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      "div.full.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, fpimm:$b))]>,
> +                      Requires<[do_DIVF32_FULL]>;
> +//
> +// F32 Accurate reciprocal
> +//
> +def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                        (ins f32imm:$a, Float32Regs:$b),
> +                       "rcp.rn.ftz.f32 \t$dst, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
> +                      Requires<[reqPTX20, doF32FTZ]>;
> +def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins f32imm:$a, Float32Regs:$b),
> +                       "rcp.rn.f32 \t$dst, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
> +                      Requires<[reqPTX20]>;
> +//
> +// F32 Accurate division
> +//
> +def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      "div.rn.ftz.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[doF32FTZ, reqPTX20]>;
> +def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      "div.rn.ftz.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, fpimm:$b))]>,
> +                      Requires<[doF32FTZ, reqPTX20]>;
> +def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b),
> +                      "div.rn.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
> +                      Requires<[reqPTX20]>;
> +def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b),
> +                      "div.rn.f32 \t$dst, $a, $b;",
> +                      [(set Float32Regs:$dst,
> +                        (fdiv Float32Regs:$a, fpimm:$b))]>,
> +                      Requires<[reqPTX20]>;
> +
> +
> +multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
> +   def rrr : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float32Regs:$dst, (fadd
> +                        (fmul Float32Regs:$a, Float32Regs:$b),
> +                        Float32Regs:$c))]>, Requires<[Pred]>;
> +   // This is to WAR a wierd bug in Tablegen that does not automatically
> +   // generate the following permutated rule rrr2 from the above rrr.
> +   // So we explicitly add it here. This happens to FMA32 only.
> +   // See the comments at FMAD32 and FMA32 for more information.
> +   def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
> +                        (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float32Regs:$dst, (fadd Float32Regs:$c,
> +                        (fmul Float32Regs:$a, Float32Regs:$b)))]>,
> +                      Requires<[Pred]>;
> +   def rri : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float32Regs:$dst, (fadd
> +                        (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
> +                      Requires<[Pred]>;
> +   def rir : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float32Regs:$dst, (fadd
> +                        (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
> +                      Requires<[Pred]>;
> +   def rii : NVPTXInst<(outs Float32Regs:$dst),
> +                      (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float32Regs:$dst, (fadd
> +                        (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
> +                      Requires<[Pred]>;
> +}
> +
> +multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
> +   def rrr : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float64Regs:$dst, (fadd
> +                        (fmul Float64Regs:$a, Float64Regs:$b),
> +                        Float64Regs:$c))]>, Requires<[Pred]>;
> +   def rri : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
> +                        Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
> +   def rir : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float64Regs:$dst, (fadd
> +                        (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
> +                      Requires<[Pred]>;
> +   def rii : NVPTXInst<(outs Float64Regs:$dst),
> +                      (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
> +                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
> +                      [(set Float64Regs:$dst, (fadd
> +                        (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
> +                      Requires<[Pred]>;
> +}
> +
> +// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
> +// automatically generate the rrr2 rule from
> +// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
> +// If we reverse the order of the following two lines, then rrr2 rule will be
> +// generated for FMA32, but not for rrr.
> +// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
> +defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
> +defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
> +defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
> +defm FMA32  : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
> +defm FMA64  : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
> +
> +// b*c-a => fmad(b, c, -a)
> +multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
> +  def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
> +          (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
> +          Requires<[Pred]>;
> +}
> +
> +// a-b*c => fmad(-b,c, a)
> +// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
> +// b*c-a => fmad(b, c, -a)
> +// - legal because b*c-a <=> b*c+(-a)
> +multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
> +  def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
> +          (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
> +          Requires<[Pred]>;
> +  def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
> +          (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
> +          Requires<[Pred]>;
> +}
> +
> +// a-b*c => fmad(-b,c, a)
> +// b*c-a => fmad(b, c, -a)
> +multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
> +  def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
> +          (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
> +          Requires<[Pred]>;
> +
> +  def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
> +          (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
> +          Requires<[Pred]>;
> +}
> +
> +defm FMAF32ext_ftz  : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
> +defm FMAF32ext  : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
> +defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
> +defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
> +defm FMAF64ext  : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
> +
> +def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
> +                      "sin.approx.f32 \t$dst, $src;",
> +                      [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
> +def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
> +                      "cos.approx.f32 \t$dst, $src;",
> +                      [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
> +
> +//-----------------------------------
> +// Logical Arithmetic
> +//-----------------------------------
> +
> +multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
> +  def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
> +                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
> +                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
> +  def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
> +                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
> +                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
> +  def b8rr:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
> +                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
> +  def b8ri:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
> +                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
> +                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
> +  def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
> +                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
> +                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                        Int16Regs:$b))]>;
> +  def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
> +                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
> +                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
> +  def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
> +                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                        Int32Regs:$b))]>;
> +  def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
> +  def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
> +                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
> +                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                        Int64Regs:$b))]>;
> +  def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
> +                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
> +                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
> +}
> +
> +defm OR  : LOG_FORMAT<"or", or>;
> +defm AND : LOG_FORMAT<"and", and>;
> +defm XOR : LOG_FORMAT<"xor", xor>;
> +
> +def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
> +                      "not.pred \t$dst, $src;",
> +                      [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
> +def NOT8:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
> +                      "not.b16 \t$dst, $src;",
> +                      [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
> +def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
> +                      "not.b16 \t$dst, $src;",
> +                      [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
> +def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
> +                      "not.b32 \t$dst, $src;",
> +                      [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
> +def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
> +                      "not.b64 \t$dst, $src;",
> +                      [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
> +
> +// For shifts, the second src operand must be 32-bit value
> +multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
> +   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
> +                      Int32Regs:$b),
> +                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                        (i32 imm:$b)))]>;
> +   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
> +                      Int32Regs:$b),
> +                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                        (i32 imm:$b)))]>;
> +   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
> +                        (i32 imm:$b)))]>;
> +   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
> +                      Int32Regs:$b),
> +                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                        (i32 imm:$b)))]>;
> +   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
> +                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
> +                        (i32 imm:$b)))]>;
> +}
> +
> +defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
> +
> +// For shifts, the second src operand must be 32-bit value
> +// Need to add cvt for the 8-bits.
> +multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
> +   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
> +                      Int32Regs:$b),
> +                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
> +                        (i32 imm:$b)))]>;
> +   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
> +                      Int32Regs:$b),
> +                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
> +                        (i32 imm:$b)))]>;
> +   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
> +                        (i32 imm:$b)))]>;
> +   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
> +                      Int32Regs:$b),
> +                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
> +                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
> +                        (i32 imm:$b)))]>;
> +   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
> +                      !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
> +                      !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
> +                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
> +                        Int32Regs:$b))]>;
> +   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
> +                      !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
> +                      !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
> +                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
> +                        (i32 imm:$b)))]>;
> +}
> +
> +defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
> +defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
> +
> +// 32bit
> +def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
> +  (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
> +    !strconcat("{{\n\t",
> +    !strconcat(".reg .b32 %lhs;\n\t",
> +    !strconcat(".reg .b32 %rhs;\n\t",
> +    !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
> +    !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
> +    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
> +    !strconcat("}}", ""))))))),
> +    []>;
> +
> +def SUB_FRM_32 : SDNodeXForm<imm, [{
> +    return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
> +}]>;
> +
> +def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
> +          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
> +def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
> +          (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
> +
> +def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
> +    Int32Regs:$amt),
> +    !strconcat("{{\n\t",
> +    !strconcat(".reg .b32 %lhs;\n\t",
> +    !strconcat(".reg .b32 %rhs;\n\t",
> +    !strconcat(".reg .b32 %amt2;\n\t",
> +    !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
> +    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
> +    !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
> +    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
> +    !strconcat("}}", ""))))))))),
> +    [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
> +
> +def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
> +    Int32Regs:$amt),
> +    !strconcat("{{\n\t",
> +    !strconcat(".reg .b32 %lhs;\n\t",
> +    !strconcat(".reg .b32 %rhs;\n\t",
> +    !strconcat(".reg .b32 %amt2;\n\t",
> +    !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
> +    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
> +    !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
> +    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
> +    !strconcat("}}", ""))))))))),
> +    [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
> +
> +// 64bit
> +def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
> +    i32imm:$amt1, i32imm:$amt2),
> +    !strconcat("{{\n\t",
> +    !strconcat(".reg .b64 %lhs;\n\t",
> +    !strconcat(".reg .b64 %rhs;\n\t",
> +    !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
> +    !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
> +    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
> +    !strconcat("}}", ""))))))),
> +    []>;
> +
> +def SUB_FRM_64 : SDNodeXForm<imm, [{
> +    return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
> +}]>;
> +
> +def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
> +          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
> +def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
> +          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
> +
> +def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
> +    Int32Regs:$amt),
> +    !strconcat("{{\n\t",
> +    !strconcat(".reg .b64 %lhs;\n\t",
> +    !strconcat(".reg .b64 %rhs;\n\t",
> +    !strconcat(".reg .u32 %amt2;\n\t",
> +    !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
> +    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
> +    !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
> +    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
> +    !strconcat("}}", ""))))))))),
> +    [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
> +
> +def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
> +    Int32Regs:$amt),
> +    !strconcat("{{\n\t",
> +    !strconcat(".reg .b64 %lhs;\n\t",
> +    !strconcat(".reg .b64 %rhs;\n\t",
> +    !strconcat(".reg .u32 %amt2;\n\t",
> +    !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
> +    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
> +    !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
> +    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
> +    !strconcat("}}", ""))))))))),
> +    [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
> +
> +
> +//-----------------------------------
> +// Data Movement (Load / Store, Move)
> +//-----------------------------------
> +
> +def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
> +  [SDNPWantRoot]>;
> +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
> +  [SDNPWantRoot]>;
> +
> +def MEMri : Operand<i32> {
> +  let PrintMethod = "printMemOperand";
> +  let MIOperandInfo = (ops Int32Regs, i32imm);
> +}
> +def MEMri64 : Operand<i64> {
> +  let PrintMethod = "printMemOperand";
> +  let MIOperandInfo = (ops Int64Regs, i64imm);
> +}
> +
> +def imem : Operand<iPTR> {
> +    let PrintMethod = "printOperand";
> +}
> +
> +def imemAny : Operand<iPTRAny> {
> +    let PrintMethod = "printOperand";
> +}
> +
> +def LdStCode : Operand<i32> {
> +    let PrintMethod = "printLdStCode";
> +}
> +
> +def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
> +def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
> +
> +def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
> +                     "mov.u32 \t$dst, $a;",
> +                     [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
> +
> +def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
> +                     "mov.u64 \t$dst, $a;",
> +                     [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
> +
> +// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
> +let IsSimpleMove=1 in {
> +def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
> +                   "mov.pred \t$dst, $sss;", []>;
> +def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
> +                    "mov.u16 \t$dst, $sss;", []>;
> +def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
> +                    "mov.u16 \t$dst, $sss;", []>;
> +def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
> +                    "mov.u32 \t$dst, $sss;", []>;
> +def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
> +                    "mov.u64 \t$dst, $sss;", []>;
> +
> +def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
> +                    "mov.f32 \t$dst, $src;", []>;
> +def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
> +                    "mov.f64 \t$dst, $src;", []>;
> +}
> +def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
> +                    "mov.pred \t$dst, $src;",
> +          [(set Int1Regs:$dst, imm:$src)]>;
> +def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
> +                    "mov.u16 \t$dst, $src;",
> +          [(set Int8Regs:$dst, imm:$src)]>;
> +def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
> +                    "mov.u16 \t$dst, $src;",
> +          [(set Int16Regs:$dst, imm:$src)]>;
> +def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
> +                    "mov.u32 \t$dst, $src;",
> +          [(set Int32Regs:$dst, imm:$src)]>;
> +def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
> +                    "mov.u64 \t$dst, $src;",
> +          [(set Int64Regs:$dst, imm:$src)]>;
> +
> +def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
> +                    "mov.f32 \t$dst, $src;",
> +          [(set Float32Regs:$dst, fpimm:$src)]>;
> +def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
> +                    "mov.f64 \t$dst, $src;",
> +          [(set Float64Regs:$dst, fpimm:$src)]>;
> +
> +def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
> +
> +//---- Copy Frame Index ----
> +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
> +                        "add.u32 \t$dst, ${addr:add};",
> +                        [(set Int32Regs:$dst, ADDRri:$addr)]>;
> +def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
> +                        "add.u64 \t$dst, ${addr:add};",
> +                        [(set Int64Regs:$dst, ADDRri64:$addr)]>;
> +
> +//-----------------------------------
> +// Comparison and Selection
> +//-----------------------------------
> +
> +// Generate string block like
> +// {
> +//   .reg .pred p;
> +//   setp.gt.s16 p, %a, %b;
> +//   selp.s16 %dst, -1, 0, p;
> +// }
> +// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
> +class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
> +  string b> {
> +  string t1  = "{{\n\t.reg .pred p;\n\t";
> +  string t2  = !strconcat(t1 , OpcStr);
> +  string t3  = !strconcat(t2 , sz1);
> +  string t4  = !strconcat(t3 , " \tp, ");
> +  string t5  = !strconcat(t4 , a);
> +  string t6  = !strconcat(t5 , ", ");
> +  string t7  = !strconcat(t6 , b);
> +  string t8  = !strconcat(t7 , ";\n\tselp.s");
> +  string t9  = !strconcat(t8 , sz2);
> +  string t10 = !strconcat(t9, " \t");
> +  string t11 = !strconcat(t10, d);
> +  string s   = !strconcat(t11, ", -1, 0, p;\n\t}}");
> +}
> +
> +// Generate string block like
> +// {
> +//   .reg .pred p;
> +//   .reg .s16 %temp1;
> +//   .reg .s16 %temp2;
> +//   cvt.s16.s8 %temp1, %a;
> +//   cvt s16.s8 %temp1, %b;
> +//   setp.gt.s16 p, %temp1, %temp2;
> +//   selp.s16 %dst, -1, 0, p;
> +// }
> +// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
> +class Set_Stri8<string OpcStr, string d, string a, string b, string type,
> +  string cvt> {
> +  string t1  = "{{\n\t.reg .pred p;\n\t";
> +  string t2  = !strconcat(t1, ".reg .");
> +  string t3  = !strconcat(t2, type);
> +  string t4  = !strconcat(t3, " %temp1;\n\t");
> +  string t5  = !strconcat(t4, ".reg .");
> +  string t6  = !strconcat(t5, type);
> +  string t7  = !strconcat(t6, " %temp2;\n\t");
> +  string t8  = !strconcat(t7, cvt);
> +  string t9  = !strconcat(t8, " \t%temp1, ");
> +  string t10 = !strconcat(t9, a);
> +  string t11 = !strconcat(t10, ";\n\t");
> +  string t12 = !strconcat(t11, cvt);
> +  string t13 = !strconcat(t12, " \t%temp2, ");
> +  string t14 = !strconcat(t13, b);
> +  string t15 = !strconcat(t14, ";\n\t");
> +  string t16 = !strconcat(t15, OpcStr);
> +  string t17 = !strconcat(t16, "16");
> +  string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
> +  string t19 = !strconcat(t18, "selp.s16 \t");
> +  string t20 = !strconcat(t19, d);
> +  string s   = !strconcat(t20, ", -1, 0, p;\n\t}}");
> +}
> +
> +multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
> +  string TypeStr, string CVTStr> {
> +  def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +                     Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
> +               []>;
> +  def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
> +      Int16Regs:$b),
> +                     Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
> +               []>;
> +  def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
> +      Int32Regs:$b),
> +                     Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
> +               []>;
> +  def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
> +      Int64Regs:$b),
> +                     Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
> +               []>;
> +
> +  def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +                     Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
> +               [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
> +  def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
> +                     Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
> +               [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
> +  def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
> +                     Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
> +               [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
> +  def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
> +                 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
> +  def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
> +                 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
> +  def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
> +                 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
> +  def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
> +                 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
> +  def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
> +  def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
> +                 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
> +  def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
> +                 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
> +  def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
> +                 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
> +  def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
> +                 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
> +
> +  def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
> +                     Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
> +               [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
> +  def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
> +                     Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
> +               [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
> +  def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
> +                     Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
> +               [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
> +  def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
> +      Int16Regs:$b),
> +                 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
> +  def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
> +                 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
> +  def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
> +                 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
> +  def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
> +      Int32Regs:$b),
> +                 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
> +  def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
> +                 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
> +  def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
> +                 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
> +  def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
> +      Int64Regs:$b),
> +                 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
> +  def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
> +                 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
> +  def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
> +                 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
> +}
> +
> +multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
> +  def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
> +      Float32Regs:$b),
> +                     Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
> +               []>, Requires<[doF32FTZ]>;
> +  def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
> +      Float32Regs:$b),
> +                     Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
> +               []>;
> +  def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
> +      Float64Regs:$b),
> +                     Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
> +               []>;
> +  def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
> +      Float64Regs:$b),
> +                     Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
> +               []>;
> +
> +  def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
> +      , Float32Regs:$b),
> +                 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
> +  , Requires<[doF32FTZ]>;
> +  def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
> +    (ins Float32Regs:$a, Float32Regs:$b),
> +                 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
> +  def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
> +    (ins Float32Regs:$a, f32imm:$b),
> +                 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
> +  Requires<[doF32FTZ]>;
> +  def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
> +                 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
> +  def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
> +    (ins f32imm:$a, Float32Regs:$b),
> +                 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
> +  Requires<[doF32FTZ]>;
> +  def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
> +                 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
> +  def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
> +    (ins Float64Regs:$a, Float64Regs:$b),
> +                 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
> +  def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
> +                 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
> +  def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
> +                 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
> +               [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
> +
> +  def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins Float32Regs:$a, Float32Regs:$b),
> +                 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
> +  def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins Float32Regs:$a, Float32Regs:$b),
> +                 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
> +  def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins Float32Regs:$a, f32imm:$b),
> +                 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
> +  def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins Float32Regs:$a, f32imm:$b),
> +                 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
> +  def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins f32imm:$a, Float32Regs:$b),
> +                 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
> +  def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins f32imm:$a, Float32Regs:$b),
> +                 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
> +  def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins Float64Regs:$a, Float64Regs:$b),
> +                 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
> +  def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins Float64Regs:$a, f64imm:$b),
> +                 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
> +  def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
> +    (ins f64imm:$a, Float64Regs:$b),
> +                 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
> +               [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
> +}
> +
> +defm ISetSGT
> +: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
> +defm ISetUGT
> +: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
> +defm ISetSLT
> +: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
> +defm ISetULT
> +: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
> +defm ISetSGE
> +: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
> +defm ISetUGE
> +: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
> +defm ISetSLE
> +: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
> +defm ISetULE
> +: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
> +defm ISetSEQ
> +: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
> +defm ISetUEQ
> +: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
> +defm ISetSNE
> +: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
> +defm ISetUNE
> +: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
> +
> +def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
> +  (ins Int1Regs:$a, Int1Regs:$b),
> +                      "xor.pred \t$dst, $a, $b;",
> +            [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
> +def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
> +  (ins Int1Regs:$a, Int1Regs:$b),
> +                      "xor.pred \t$dst, $a, $b;",
> +            [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
> +def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
> +  (ins Int1Regs:$a, Int1Regs:$b),
> +            !strconcat("{{\n\t",
> +            !strconcat(".reg .pred temp;\n\t",
> +            !strconcat("xor.pred \ttemp, $a, $b;\n\t",
> +            !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
> +            [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
> +def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
> +  (ins Int1Regs:$a, Int1Regs:$b),
> +            !strconcat("{{\n\t",
> +            !strconcat(".reg .pred temp;\n\t",
> +            !strconcat("xor.pred \ttemp, $a, $b;\n\t",
> +            !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
> +            [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
> +
> +// Compare 2 i1's and produce a u32
> +def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
> +  (ins Int1Regs:$a, Int1Regs:$b),
> +                  !strconcat("{{\n\t",
> +                  !strconcat(".reg .pred temp;\n\t",
> +                  !strconcat("xor.pred \ttemp, $a, $b;\n\t",
> +                  !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
> +                  [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
> +def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
> +  (ins Int1Regs:$a, Int1Regs:$b),
> +                  !strconcat("{{\n\t",
> +                  !strconcat(".reg .pred temp;\n\t",
> +                  !strconcat("xor.pred \ttemp, $a, $b;\n\t",
> +                  !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
> +                  [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
> +
> +defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
> +defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
> +defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
> +defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
> +defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
> +defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
> +
> +defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
> +defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
> +defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
> +defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
> +defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
> +defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
> +
> +defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
> +defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
> +
> +def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
> +                     (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
> +                             (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
> +def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
> +  (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
> +def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
> +  (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
> +def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
> +  (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
> +def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
> +  (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
> +
> +def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
> +  (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
> +def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
> +  (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
> +def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
> +  (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
> +def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
> +  (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
> +                      "selp.b16 \t$dst, $a, $b, $p;",
> +      [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
> +
> +def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
> +  (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
> +                      "selp.b32 \t$dst, $a, $b, $p;",
> +      [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
> +def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
> +  (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
> +                      "selp.b32 \t$dst, $a, $b, $p;",
> +      [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
> +def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
> +  (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
> +                      "selp.b32 \t$dst, $a, $b, $p;",
> +      [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
> +def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
> +  (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
> +                      "selp.b32 \t$dst, $a, $b, $p;",
> +      [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
> +
> +def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
> +  (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
> +                      "selp.b64 \t$dst, $a, $b, $p;",
> +      [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
> +def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
> +  (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
> +                      "selp.b64 \t$dst, $a, $b, $p;",
> +      [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
> +def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
> +  (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
> +                      "selp.b64 \t$dst, $a, $b, $p;",
> +      [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
> +def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
> +  (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
> +                      "selp.b64 \t$dst, $a, $b, $p;",
> +      [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
> +
> +def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
> +  (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
> +                      "selp.f32 \t$dst, $a, $b, $p;",
> +      [(set Float32Regs:$dst,
> +        (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
> +def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
> +  (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
> +                      "selp.f32 \t$dst, $a, $b, $p;",
> +      [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
> +def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
> +  (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
> +                      "selp.f32 \t$dst, $a, $b, $p;",
> +      [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
> +def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
> +  (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
> +                      "selp.f32 \t$dst, $a, $b, $p;",
> +      [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
> +
> +def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
> +  (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
> +                      "selp.f64 \t$dst, $a, $b, $p;",
> +      [(set Float64Regs:$dst,
> +        (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
> +def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
> +  (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
> +                      "selp.f64 \t$dst, $a, $b, $p;",
> +      [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
> +def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
> +  (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
> +                      "selp.f64 \t$dst, $a, $b, $p;",
> +      [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
> +def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
> +  (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
> +                      "selp.f64 \t $dst, $a, $b, $p;",
> +      [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
> +
> +//def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
> +//                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
> +
> +def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
> +  SDTCisInt<2>]>;
> +def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
> +  SDTCisInt<1>, SDTCisInt<2>]>;
> +def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
> +def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
> +def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
> +def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
> +def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
> +def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
> +def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
> +def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
> +def SDTCallValProfile : SDTypeProfile<1, 0, []>;
> +def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
> +def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
> +def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
> +def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
> +
> +def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
> +  SDTDeclareScalarParamProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
> +  SDTDeclareParamProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
> +                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
> +def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def MoveToParam  : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
> +                         []>;
> +def MoveRetval   : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
> +                         [SDNPHasChain, SDNPSideEffect]>;
> +def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
> +                         [SDNPHasChain, SDNPSideEffect]>;
> +def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
> +                         [SDNPHasChain, SDNPSideEffect]>;
> +def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
> +  SDTPseudoUseParamProfile,
> +                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
> +def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
> +                         [SDNPHasChain, SDNPSideEffect]>;
> +
> +class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
> +                !strconcat(!strconcat("ld.param", opstr),
> +                "\t$dst, [retval0+$b];"),
> +                [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
> +
> +class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
> +                !strconcat(!strconcat("mov", opstr),
> +                "\t$dst, retval$b;"),
> +                [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
> +
> +class StoreParamInst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
> +                !strconcat(!strconcat("st.param", opstr),
> +                "\t[param$a+$b], $val;"),
> +                [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
> +
> +class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
> +                !strconcat(!strconcat("mov", opstr),
> +                "\tparam$a, $val;"),
> +                [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
> +
> +class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
> +                !strconcat(!strconcat("st.param", opstr),
> +                "\t[func_retval0+$a], $val;"),
> +                [(StoreRetval (i32 imm:$a), regclass:$val)]>;
> +
> +class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
> +                !strconcat(!strconcat("mov", opstr),
> +                "\tfunc_retval$num, $val;"),
> +                [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
> +
> +class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs), (ins regclass:$val),
> +                !strconcat(!strconcat("mov", opstr),
> +                "\tfunc_retval0, $val;"),
> +                [(MoveRetval regclass:$val)]>;
> +
> +def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
> +"call (retval0), ",
> +                                [(PrintCall (i32 1))]>;
> +def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
> +"call (retval0, retval1), ",
> +                                [(PrintCall (i32 2))]>;
> +def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
> +"call (retval0, retval1, retval2), ",
> +                                [(PrintCall (i32 3))]>;
> +def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
> +"call (retval0, retval1, retval2, retval3), ",
> +                                [(PrintCall (i32 4))]>;
> +def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
> +"call (retval0, retval1, retval2, retval3, retval4), ",
> +                                [(PrintCall (i32 5))]>;
> +def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
> +"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
> +                                [(PrintCall (i32 6))]>;
> +def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
> +"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
> +                                [(PrintCall (i32 7))]>;
> +def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
> +!strconcat("call (retval0, retval1, retval2, retval3, retval4",
> +           ", retval5, retval6, retval7), "),
> +                                [(PrintCall (i32 8))]>;
> +
> +def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
> +                                [(PrintCall (i32 0))]>;
> +
> +def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
> +"call.uni (retval0), ",
> +                                [(PrintCallUni (i32 1))]>;
> +def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
> +"call.uni (retval0, retval1), ",
> +                                [(PrintCallUni (i32 2))]>;
> +def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
> +"call.uni (retval0, retval1, retval2), ",
> +                                [(PrintCallUni (i32 3))]>;
> +def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
> +"call.uni (retval0, retval1, retval2, retval3), ",
> +                                [(PrintCallUni (i32 4))]>;
> +def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
> +"call.uni (retval0, retval1, retval2, retval3, retval4), ",
> +                                [(PrintCallUni (i32 5))]>;
> +def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
> +"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
> +                                [(PrintCallUni (i32 6))]>;
> +def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
> +"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
> +                                [(PrintCallUni (i32 7))]>;
> +def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
> +!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
> +           ", retval5, retval6, retval7), "),
> +                                [(PrintCallUni (i32 8))]>;
> +
> +def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
> +                                [(PrintCallUni (i32 0))]>;
> +
> +def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
> +def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
> +def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
> +def LoadParamMemI8     : LoadParamMemInst<Int8Regs, ".b8">;
> +
> +//def LoadParamMemI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
> +//                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
> +//                "cvt.u16.u32\t$dst, temp_param_reg;"),
> +//                [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
> +//def LoadParamMemI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
> +//                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
> +//                "cvt.u16.u32\t$dst, temp_param_reg;"),
> +//                [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
> +
> +def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
> +def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
> +
> +def LoadParamRegI64    : LoadParamRegInst<Int64Regs, ".b64">;
> +def LoadParamRegI32    : LoadParamRegInst<Int32Regs, ".b32">;
> +def LoadParamRegI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
> +                         "cvt.u16.u32\t$dst, retval$b;",
> +                         [(set Int16Regs:$dst,
> +                           (LoadParam (i32 0), (i32 imm:$b)))]>;
> +def LoadParamRegI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
> +                         "cvt.u16.u32\t$dst, retval$b;",
> +                         [(set Int8Regs:$dst,
> +                           (LoadParam (i32 0), (i32 imm:$b)))]>;
> +
> +def LoadParamRegF32    : LoadParamRegInst<Float32Regs, ".f32">;
> +def LoadParamRegF64    : LoadParamRegInst<Float64Regs, ".f64">;
> +
> +def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
> +def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
> +
> +def StoreParamI16    : NVPTXInst<(outs),
> +  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
> +                       "st.param.b16\t[param$a+$b], $val;",
> +           [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
> +
> +def StoreParamI8     : NVPTXInst<(outs),
> +  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
> +                       "st.param.b8\t[param$a+$b], $val;",
> +                       [(StoreParam
> +                         (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
> +
> +def StoreParamS32I16 : NVPTXInst<(outs),
> +  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
> +                 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
> +                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
> +                 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
> +def StoreParamU32I16 : NVPTXInst<(outs),
> +  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
> +                 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
> +                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
> +                 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
> +
> +def StoreParamU32I8   : NVPTXInst<(outs),
> +  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
> +                 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
> +                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
> +                 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
> +def StoreParamS32I8   : NVPTXInst<(outs),
> +  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
> +                 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
> +                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
> +                 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
> +
> +def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
> +def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
> +
> +def MoveToParamI64   : MoveToParamInst<Int64Regs, ".b64">;
> +def MoveToParamI32   : MoveToParamInst<Int32Regs, ".b32">;
> +def MoveToParamF64   : MoveToParamInst<Float64Regs, ".f64">;
> +def MoveToParamF32   : MoveToParamInst<Float32Regs, ".f32">;
> +def MoveToParamI16   : NVPTXInst<(outs),
> +  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
> +                   !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
> +                              "mov.b32\tparam$a, temp_param_reg;"),
> +                   [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
> +def MoveToParamI8    : NVPTXInst<(outs),
> +  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
> +                   !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
> +                              "mov.b32\tparam$a, temp_param_reg;"),
> +                   [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
> +
> +def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
> +def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
> +def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
> +def StoreRetvalI8     : StoreRetvalInst<Int8Regs, ".b8">;
> +
> +//def StoreRetvalI16    : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
> +//     !strconcat("\{\n\t",
> +//     !strconcat(".reg .b32 temp_retval_reg;\n\t",
> +//     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
> +//                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
> +//     [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
> +//def StoreRetvalI8     : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
> +//     !strconcat("\{\n\t",
> +//     !strconcat(".reg .b32 temp_retval_reg;\n\t",
> +//     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
> +//                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
> +//     [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
> +
> +def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
> +def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
> +
> +def MoveRetvalI64    : MoveRetvalInst<Int64Regs, ".b64">;
> +def MoveRetvalI32    : MoveRetvalInst<Int32Regs, ".b32">;
> +def MoveRetvalI16    : MoveRetvalInst<Int16Regs, ".b16">;
> +def MoveRetvalI8     : MoveRetvalInst<Int8Regs, ".b8">;
> +def MoveRetvalF64    : MoveRetvalInst<Float64Regs, ".f64">;
> +def MoveRetvalF32    : MoveRetvalInst<Float32Regs, ".f32">;
> +
> +def MoveToRetvalI64    : MoveToRetvalInst<Int64Regs, ".b64">;
> +def MoveToRetvalI32    : MoveToRetvalInst<Int32Regs, ".b32">;
> +def MoveToRetvalF64    : MoveToRetvalInst<Float64Regs, ".f64">;
> +def MoveToRetvalF32    : MoveToRetvalInst<Float32Regs, ".f32">;
> +def MoveToRetvalI16    : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
> +                         "cvt.u32.u16\tfunc_retval$num, $val;",
> +                         [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
> +def MoveToRetvalI8     : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
> +                         "cvt.u32.u16\tfunc_retval$num, $val;",
> +                         [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
> +
> +def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
> +def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
> +def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
> +def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
> +
> +class CallArgInst<NVPTXRegClass regclass> :
> +      NVPTXInst<(outs), (ins regclass:$a), "$a, ",
> +                [(CallArg (i32 0), regclass:$a)]>;
> +
> +class LastCallArgInst<NVPTXRegClass regclass> :
> +      NVPTXInst<(outs), (ins regclass:$a), "$a",
> +                [(LastCallArg (i32 0), regclass:$a)]>;
> +
> +def CallArgI64     : CallArgInst<Int64Regs>;
> +def CallArgI32     : CallArgInst<Int32Regs>;
> +def CallArgI16     : CallArgInst<Int16Regs>;
> +def CallArgI8      : CallArgInst<Int8Regs>;
> +
> +def CallArgF64     : CallArgInst<Float64Regs>;
> +def CallArgF32     : CallArgInst<Float32Regs>;
> +
> +def LastCallArgI64 : LastCallArgInst<Int64Regs>;
> +def LastCallArgI32 : LastCallArgInst<Int32Regs>;
> +def LastCallArgI16 : LastCallArgInst<Int16Regs>;
> +def LastCallArgI8  : LastCallArgInst<Int8Regs>;
> +
> +def LastCallArgF64 : LastCallArgInst<Float64Regs>;
> +def LastCallArgF32 : LastCallArgInst<Float32Regs>;
> +
> +def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
> +                              [(CallArg (i32 0), (i32 imm:$a))]>;
> +def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
> +                              [(LastCallArg (i32 0), (i32 imm:$a))]>;
> +
> +def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
> +                             [(CallArg (i32 1), (i32 imm:$a))]>;
> +def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
> +                             [(LastCallArg (i32 1), (i32 imm:$a))]>;
> +
> +def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
> +                             "$addr, ",
> +                             [(CallVoid (Wrapper tglobaladdr:$addr))]>;
> +def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
> +                             "$addr, ",
> +                             [(CallVoid Int32Regs:$addr)]>;
> +def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
> +                             "$addr, ",
> +                             [(CallVoid Int64Regs:$addr)]>;
> +def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
> +                             ", prototype_$val;",
> +                             [(Prototype (i32 imm:$val))]>;
> +
> +def DeclareRetMemInst : NVPTXInst<(outs),
> +  (ins i32imm:$align, i32imm:$size, i32imm:$num),
> +         ".param .align $align .b8 retval$num[$size];",
> +         [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
> +def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
> +         ".param .b$size retval$num;",
> +         [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
> +def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
> +         ".reg .b$size retval$num;",
> +         [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
> +
> +def DeclareParamInst : NVPTXInst<(outs),
> +  (ins i32imm:$align, i32imm:$a, i32imm:$size),
> +         ".param .align $align .b8 param$a[$size];",
> +         [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
> +def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
> +         ".param .b$size param$a;",
> +         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
> +def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
> +         ".reg .b$size param$a;",
> +         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
> +
> +class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
> +      NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
> +                !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
> +                [(set regclass:$dst, (MoveParam regclass:$src))]>;
> +
> +def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
> +def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
> +def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
> +                   "cvt.u16.u32\t$dst, $src;",
> +                   [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
> +def MoveParamI8  : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
> +                   "cvt.u16.u32\t$dst, $src;",
> +                   [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
> +def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
> +def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
> +
> +class PseudoUseParamInst<NVPTXRegClass regclass> :
> +      NVPTXInst<(outs), (ins regclass:$src),
> +      "// Pseudo use of $src",
> +      [(PseudoUseParam regclass:$src)]>;
> +
> +def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
> +def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
> +def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
> +def PseudoUseParamI8  : PseudoUseParamInst<Int8Regs>;
> +def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
> +def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
> +
> +
> +//
> +// Load / Store Handling
> +//
> +multiclass LD<NVPTXRegClass regclass> {
> +  def _avar : NVPTXInst<(outs regclass:$dst),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr),
> +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +           "$fromWidth \t$dst, [$addr];"), []>;
> +  def _areg : NVPTXInst<(outs regclass:$dst),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr),
> +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +           "$fromWidth \t$dst, [$addr];"), []>;
> +  def _ari : NVPTXInst<(outs regclass:$dst),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
> +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +           "$fromWidth \t$dst, [$addr+$offset];"), []>;
> +  def _asi : NVPTXInst<(outs regclass:$dst),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
> +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +           "$fromWidth \t$dst, [$addr+$offset];"), []>;
> +}
> +
> +let mayLoad=1, neverHasSideEffects=1 in {
> +defm LD_i8  : LD<Int8Regs>;
> +defm LD_i16 : LD<Int16Regs>;
> +defm LD_i32 : LD<Int32Regs>;
> +defm LD_i64 : LD<Int64Regs>;
> +defm LD_f32 : LD<Float32Regs>;
> +defm LD_f64 : LD<Float64Regs>;
> +}
> +
> +let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
> +defm LD_v2i8 : LD<V2I8Regs>;
> +defm LD_v4i8 : LD<V4I8Regs>;
> +defm LD_v2i16 : LD<V2I16Regs>;
> +defm LD_v4i16 : LD<V4I16Regs>;
> +defm LD_v2i32 : LD<V2I32Regs>;
> +defm LD_v4i32 : LD<V4I32Regs>;
> +defm LD_v2f32 : LD<V2F32Regs>;
> +defm LD_v4f32 : LD<V4F32Regs>;
> +defm LD_v2i64 : LD<V2I64Regs>;
> +defm LD_v2f64 : LD<V2F64Regs>;
> +}
> +
> +multiclass ST<NVPTXRegClass regclass> {
> +  def _avar : NVPTXInst<(outs),
> +    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
> +      LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
> +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
> +           " \t[$addr], $src;"), []>;
> +  def _areg : NVPTXInst<(outs),
> +    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
> +      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
> +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
> +           " \t[$addr], $src;"), []>;
> +  def _ari : NVPTXInst<(outs),
> +    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
> +      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
> +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
> +           " \t[$addr+$offset], $src;"), []>;
> +  def _asi : NVPTXInst<(outs),
> +    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
> +      LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
> +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
> +           " \t[$addr+$offset], $src;"), []>;
> +}
> +
> +let mayStore=1, neverHasSideEffects=1 in {
> +defm ST_i8  : ST<Int8Regs>;
> +defm ST_i16 : ST<Int16Regs>;
> +defm ST_i32 : ST<Int32Regs>;
> +defm ST_i64 : ST<Int64Regs>;
> +defm ST_f32 : ST<Float32Regs>;
> +defm ST_f64 : ST<Float64Regs>;
> +}
> +
> +let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
> +defm ST_v2i8 : ST<V2I8Regs>;
> +defm ST_v4i8 : ST<V4I8Regs>;
> +defm ST_v2i16 : ST<V2I16Regs>;
> +defm ST_v4i16 : ST<V4I16Regs>;
> +defm ST_v2i32 : ST<V2I32Regs>;
> +defm ST_v4i32 : ST<V4I32Regs>;
> +defm ST_v2f32 : ST<V2F32Regs>;
> +defm ST_v4f32 : ST<V4F32Regs>;
> +defm ST_v2i64 : ST<V2I64Regs>;
> +defm ST_v2f64 : ST<V2F64Regs>;
> +}
> +
> +// The following is used only in and after vector elementizations.
> +// Vector elementization happens at the machine instruction level, so the
> +// following instruction
> +// never appears in the DAG.
> +multiclass LD_VEC<NVPTXRegClass regclass> {
> +  def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
> +  def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
> +  def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
> +  def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
> +  def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
> +      regclass:$dst3, regclass:$dst4),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
> +  def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
> +      regclass:$dst4),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
> +  def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
> +      regclass:$dst4),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
> +                []>;
> +  def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
> +      regclass:$dst4),
> +    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
> +    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
> +                []>;
> +}
> +let mayLoad=1, neverHasSideEffects=1 in {
> +defm LDV_i8  : LD_VEC<Int8Regs>;
> +defm LDV_i16 : LD_VEC<Int16Regs>;
> +defm LDV_i32 : LD_VEC<Int32Regs>;
> +defm LDV_i64 : LD_VEC<Int64Regs>;
> +defm LDV_f32 : LD_VEC<Float32Regs>;
> +defm LDV_f64 : LD_VEC<Float64Regs>;
> +}
> +
> +multiclass ST_VEC<NVPTXRegClass regclass> {
> +  def _v2_avar : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
> +      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
> +  def _v2_areg : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
> +      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
> +  def _v2_ari : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
> +      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
> +      i32imm:$offset),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
> +  def _v2_asi : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
> +      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
> +      i32imm:$offset),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
> +  def _v4_avar : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
> +      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
> +  def _v4_areg : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
> +      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
> +  def _v4_ari : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
> +      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
> +    []>;
> +  def _v4_asi : NVPTXInst<(outs),
> +    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
> +      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
> +      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
> +    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
> +               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
> +    []>;
> +}
> +let mayStore=1, neverHasSideEffects=1 in {
> +defm STV_i8  : ST_VEC<Int8Regs>;
> +defm STV_i16 : ST_VEC<Int16Regs>;
> +defm STV_i32 : ST_VEC<Int32Regs>;
> +defm STV_i64 : ST_VEC<Int64Regs>;
> +defm STV_f32 : ST_VEC<Float32Regs>;
> +defm STV_f64 : ST_VEC<Float64Regs>;
> +}
> +
> +
> +//---- Conversion ----
> +
> +multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
> +// FIXME: need to add f16 support
> +//  def CVTf16i8 :
> +//    NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
> +//              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
> +//        [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
> +//  def CVTf16i16 :
> +//    NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
> +//              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
> +//        [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
> +//  def CVTf16i32 :
> +//    NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
> +//              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
> +//        [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
> +//  def CVTf16i64:
> +//    NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
> +//          !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
> +//            [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
> +
> +  def CVTf32i1 :
> +    NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
> +              "selp.f32 \t$d, 1.0, 0.0, $a;",
> +        [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def CVTf32i8 :
> +    NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
> +              !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
> +        [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
> +  def CVTf32i16 :
> +    NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
> +              !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
> +        [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
> +  def CVTf32i32 :
> +    NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
> +              !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
> +        [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
> +  def CVTf32i64:
> +    NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
> +          !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
> +            [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
> +
> +  def CVTf64i1 :
> +    NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
> +              "selp.f64 \t$d, 1.0, 0.0, $a;",
> +        [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def CVTf64i8 :
> +    NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
> +              !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
> +        [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
> +  def CVTf64i16 :
> +    NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
> +              !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
> +        [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
> +  def CVTf64i32 :
> +    NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
> +              !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
> +        [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
> +  def CVTf64i64:
> +    NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
> +          !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
> +            [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
> +}
> +
> +defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
> +defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
> +
> +multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
> +// FIXME: need to add f16 support
> +//  def CVTi8f16:
> +//    NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
> +//              !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
> +//        [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
> +  def CVTi8f32_ftz:
> +    NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
> +        [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
> +  def CVTi8f32:
> +    NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
> +        [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
> +  def CVTi8f64:
> +    NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
> +        [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
> +
> +// FIXME: need to add f16 support
> +//  def CVTi16f16:
> +//    NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
> +//              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
> +//        [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
> +  def CVTi16f32_ftz:
> +    NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
> +        [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
> +  def CVTi16f32:
> +    NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
> +        [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
> +  def CVTi16f64:
> +    NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
> +        [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
> +
> +// FIXME: need to add f16 support
> +//  def CVTi32f16:  def CVTi32f16:
> +//    NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
> +//              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
> +//        [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
> +  def CVTi32f32_ftz:
> +    NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
> +        [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
> +  def CVTi32f32:
> +    NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
> +        [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
> +  def CVTi32f64:
> +    NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
> +        [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
> +
> +// FIXME: need to add f16 support
> +//  def CVTi64f16:
> +//    NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
> +//              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
> +//        [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
> +  def CVTi64f32_ftz:
> +    NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
> +        [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
> +  def CVTi64f32:
> +    NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
> +        [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
> +  def CVTi64f64:
> +    NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
> +              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
> +        [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
> +}
> +
> +defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
> +defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
> +
> +multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
> +  def ext1to8:
> +       NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
> +           "selp.u16 \t$d, 1, 0, $a;",
> +     [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def ext1to16:
> +       NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
> +           "selp.u16 \t$d, 1, 0, $a;",
> +     [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def ext1to32:
> +       NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
> +           "selp.u32 \t$d, 1, 0, $a;",
> +     [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def ext1to64:
> +       NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
> +           "selp.u64 \t$d, 1, 0, $a;",
> +     [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
> +}
> +
> +multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
> +  def ext1to8:
> +       NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
> +           "selp.s16 \t$d, -1, 0, $a;",
> +     [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def ext1to16:
> +       NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
> +           "selp.s16 \t$d, -1, 0, $a;",
> +     [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def ext1to32:
> +       NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
> +           "selp.s32 \t$d, -1, 0, $a;",
> +     [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
> +  def ext1to64:
> +       NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
> +           "selp.s64 \t$d, -1, 0, $a;",
> +     [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
> +}
> +
> +multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
> +  // All Int8Regs are emiited as 16bit registers in ptx.
> +  // And there is no selp.u8 in ptx.
> +  def ext8to16:
> +       NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
> +           !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
> +             !strconcat(OpStr, "8 \t$d, $a;")))),
> +     [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
> +  def ext8to32:
> +       NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
> +           !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
> +             !strconcat(OpStr, "8 \t$d, $a;")))),
> +     [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
> +  def ext8to64:
> +       NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
> +           !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
> +             !strconcat(OpStr, "8 \t$d, $a;")))),
> +     [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
> +  def ext16to32:
> +       NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
> +           !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
> +             !strconcat(OpStr, "16 \t$d, $a;")))),
> +     [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
> +  def ext16to64:
> +       NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
> +           !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
> +             !strconcat(OpStr, "16 \t$d, $a;")))),
> +     [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
> +  def ext32to64:
> +       NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
> +           !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
> +             !strconcat(OpStr, "32 \t$d, $a;")))),
> +     [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
> +}
> +
> +defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
> +defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
> +defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
> +
> +defm Sint_extend : INT_EXTEND <"s", sext>;
> +defm Zint_extend : INT_EXTEND <"u", zext>;
> +defm Aint_extend : INT_EXTEND <"u", anyext>;
> +
> +class TRUNC_to1_asm<string sz> {
> +  string s = !strconcat("{{\n\t",
> +             !strconcat(".reg ",
> +             !strconcat(sz,
> +             !strconcat(" temp;\n\t",
> +             !strconcat("and",
> +             !strconcat(sz,
> +             !strconcat("\t temp, $a, 1;\n\t",
> +             !strconcat("setp",
> +             !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
> +}
> +
> +def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
> +             "cvt.u32.u64 \t$d, $a;",
> +       [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
> +def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
> +             "cvt.u16.u64 \t$d, $a;",
> +       [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
> +def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
> +             "cvt.u8.u64 \t$d, $a;",
> +       [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
> +def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
> +             "cvt.u16.u32 \t$d, $a;",
> +       [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
> +def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
> +             "cvt.u8.u32 \t$d, $a;",
> +       [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
> +def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
> +             "cvt.u8.u16 \t$d, $a;",
> +       [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
> +def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
> +             TRUNC_to1_asm<".b64">.s,
> +             [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
> +def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
> +             TRUNC_to1_asm<".b32">.s,
> +             [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
> +def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
> +             TRUNC_to1_asm<".b16">.s,
> +             [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
> +def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
> +             TRUNC_to1_asm<".b16">.s,
> +             [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
> +
> +// Select instructions
> +def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
> +          (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
> +def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
> +          (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
> +            (TRUNC_32to1 Int32Regs:$pred))>;
> +def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
> +          (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
> +            (TRUNC_32to1 Int32Regs:$pred))>;
> +def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
> +          (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
> +            (TRUNC_32to1 Int32Regs:$pred))>;
> +def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
> +          (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
> +            (TRUNC_32to1 Int32Regs:$pred))>;
> +def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
> +          (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
> +            (TRUNC_32to1 Int32Regs:$pred))>;
> +
> +class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
> +  NVPTXRegClass regclassOut> :
> +           NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
> +           !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
> +     [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
> +
> +def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
> +def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
> +def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
> +def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
> +
> +// pack a set of smaller int registers to a larger int register
> +def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
> +                          (ins Int8Regs:$s1, Int8Regs:$s2,
> +                               Int8Regs:$s3, Int8Regs:$s4),
> +                          !strconcat("{{\n\t.reg .b8\t%t<4>;",
> +                          !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
> +                          !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
> +                          !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
> +                          !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
> +                           "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
> +                          []>;
> +def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
> +                          (ins Int16Regs:$s1, Int16Regs:$s2,
> +                               Int16Regs:$s3, Int16Regs:$s4),
> +                          "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
> +                          []>;
> +def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
> +                          (ins Int8Regs:$s1, Int8Regs:$s2),
> +                          !strconcat("{{\n\t.reg .b8\t%t<2>;",
> +                          !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
> +                          !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
> +                                     "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
> +                          []>;
> +def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
> +                          (ins Int16Regs:$s1, Int16Regs:$s2),
> +                          "mov.b32\t$d, {{$s1, $s2}};",
> +                          []>;
> +def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
> +                          (ins Int32Regs:$s1, Int32Regs:$s2),
> +                          "mov.b64\t$d, {{$s1, $s2}};",
> +                          []>;
> +def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
> +                          (ins Float32Regs:$s1, Float32Regs:$s2),
> +                          "mov.b64\t$d, {{$s1, $s2}};",
> +                          []>;
> +
> +// unpack a larger int register to a set of smaller int registers
> +def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
> +                                Int8Regs:$d3, Int8Regs:$d4),
> +                          (ins Int32Regs:$s),
> +                          !strconcat("{{\n\t.reg .b8\t%t<4>;",
> +                          !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
> +                          !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
> +                          !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
> +                          !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
> +                                     "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
> +                          []>;
> +def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
> +                                 Int16Regs:$d3, Int16Regs:$d4),
> +                           (ins Int64Regs:$s),
> +                           "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
> +                          []>;
> +def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
> +                          (ins Int16Regs:$s),
> +                          !strconcat("{{\n\t.reg .b8\t%t<2>;",
> +                          !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
> +                          !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
> +                                     "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
> +                          []>;
> +def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
> +                           (ins Int32Regs:$s),
> +                           "mov.b32\t{{$d1, $d2}}, $s;",
> +                          []>;
> +def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
> +                           (ins Int64Regs:$s),
> +                           "mov.b64\t{{$d1, $d2}}, $s;",
> +                          []>;
> +def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
> +                           (ins Float64Regs:$s),
> +                           "mov.b64\t{{$d1, $d2}}, $s;",
> +                          []>;
> +
> +def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
> +            "cvt.rn.ftz.f32.f64 \t$d, $a;",
> +      [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
> +
> +def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
> +            "cvt.rn.f32.f64 \t$d, $a;",
> +      [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
> +
> +def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
> +            "cvt.ftz.f64.f32 \t$d, $a;",
> +      [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
> +
> +def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
> +            "cvt.f64.f32 \t$d, $a;",
> +      [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
> +
> +def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
> +                           [SDNPHasChain, SDNPOptInGlue]>;
> +
> +//-----------------------------------
> +// Control-flow
> +//-----------------------------------
> +
> +let isTerminator=1 in {
> +   let isReturn=1, isBarrier=1 in
> +      def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
> +
> +   let isBranch=1 in
> +      def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
> +                          "@$a bra \t$target;",
> +                           [(brcond Int1Regs:$a, bb:$target)]>;
> +   let isBranch=1 in
> +      def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
> +                          "@!$a bra \t$target;",
> +                           []>;
> +
> +   let isBranch=1, isBarrier=1 in
> +      def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
> +                        "bra.uni \t$target;",
> +                  [(br bb:$target)]>;
> +}
> +
> +def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
> +    (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
> +
> +// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
> +// conditional branch if
> +// the target block is the next block so that the code can fall through to the
> +// target block.
> +// The invertion is done by 'xor condition, 1', which will be translated to
> +// (setne condition, -1).
> +// Since ptx supports '@!pred bra target', we should use it.
> +def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
> +  (CBranchOther Int1Regs:$a, bb:$target)>;
> +
> +// Call
> +def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
> +def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
> +                                        SDTCisVT<1, i32> ]>;
> +
> +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
> +                           [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
> +def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
> +                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
> +                           SDNPSideEffect]>;
> +
> +def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
> +def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
> +                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
> +def calltarget : Operand<i32>;
> +let isCall=1 in {
> +   def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
> +                  "call \t$dst, (1);", []>;
> +}
> +
> +def : Pat<(call tglobaladdr:$dst),
> +          (CALL tglobaladdr:$dst)>;
> +def : Pat<(call texternalsym:$dst),
> +          (CALL texternalsym:$dst)>;
> +
> +// Pseudo instructions.
> +class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
> +   : NVPTXInst<outs, ins, asmstr, pattern>;
> +
> +// @TODO: We use some tricks here to emit curly braces.  Can we clean this up
> +// a bit without TableGen modifications?
> +def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
> +  "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
> +                               [(callseq_start timm:$amt)]>;
> +def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
> +  "\n\t//{{\n\t}}// Callseq End $amt1",
> +                            [(callseq_end timm:$amt1, timm:$amt2)]>;
> +
> +// trap instruction
> +
> +def trapinst : NVPTXInst<(outs), (ins),
> +                         "trap;",
> +                         [(trap)]>;
> +
> +include "NVPTXVector.td"
> +
> +include "NVPTXIntrinsics.td"
> +
> +
> +//-----------------------------------
> +// Notes
> +//-----------------------------------
> +// BSWAP is currently expanded. The following is a more efficient
> +// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
> +// - for sm_20, use pmpt (use vector scalar mov to get the pack and
> +//   unpack). sm_20 supports native 32-bit register, but not native 16-bit
> +// register.
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td Fri May  4 15:18:50 2012
> @@ -0,0 +1,1675 @@
> +//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +def immFloat0 : PatLeaf<(fpimm), [{
> +    float f = (float)N->getValueAPF().convertToFloat();
> +    return (f==0.0f);
> +}]>;
> +
> +def immFloat1 : PatLeaf<(fpimm), [{
> +    float f = (float)N->getValueAPF().convertToFloat();
> +    return (f==1.0f);
> +}]>;
> +
> +def immDouble0 : PatLeaf<(fpimm), [{
> +    double d = (double)N->getValueAPF().convertToDouble();
> +    return (d==0.0);
> +}]>;
> +
> +def immDouble1 : PatLeaf<(fpimm), [{
> +    double d = (double)N->getValueAPF().convertToDouble();
> +    return (d==1.0);
> +}]>;
> +
> +
> +
> +//-----------------------------------
> +// Synchronization Functions
> +//-----------------------------------
> +def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins),
> +                  "bar.sync \t0;",
> +      [(int_cuda_syncthreads)]>;
> +def INT_BARRIER0 : NVPTXInst<(outs), (ins),
> +                  "bar.sync \t0;",
> +      [(int_nvvm_barrier0)]>;
> +def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
> +  !strconcat("{{ \n\t",
> +      !strconcat(".reg .pred \t%p1; \n\t",
> +      !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
> +      !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
> +        !strconcat("}}", ""))))),
> +      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
> +def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
> +  !strconcat("{{ \n\t",
> +      !strconcat(".reg .pred \t%p1; \n\t",
> +      !strconcat(".reg .pred \t%p2; \n\t",
> +      !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
> +      !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
> +      !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
> +        !strconcat("}}", ""))))))),
> +      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
> +def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
> +  !strconcat("{{ \n\t",
> +      !strconcat(".reg .pred \t%p1; \n\t",
> +      !strconcat(".reg .pred \t%p2; \n\t",
> +      !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
> +      !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
> +      !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
> +        !strconcat("}}", ""))))))),
> +      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
> +
> +
> +//-----------------------------------
> +// Explicit Memory Fence Functions
> +//-----------------------------------
> +class MEMBAR<string StrOp, Intrinsic IntOP> :
> +              NVPTXInst<(outs), (ins),
> +            StrOp, [(IntOP)]>;
> +
> +def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
> +def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
> +def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
> +
> +
> +//-----------------------------------
> +// Math Functions
> +//-----------------------------------
> +
> +// Map min(1.0, max(0.0, x)) to sat(x)
> +multiclass SAT<NVPTXRegClass regclass, Operand fimm, Intrinsic IntMinOp,
> +  Intrinsic IntMaxOp, PatLeaf f0, PatLeaf f1, string OpStr> {
> +
> +   // fmin(1.0, fmax(0.0, x)) => sat(x)
> +   def SAT11 : NVPTXInst<(outs regclass:$dst),
> +     (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
> +           OpStr,
> +     [(set regclass:$dst, (IntMinOp f1:$srcf0 ,
> +       (IntMaxOp f0:$srcf1, regclass:$src)))]>;
> +
> +   // fmin(1.0, fmax(x, 0.0)) => sat(x)
> +   def SAT12 : NVPTXInst<(outs regclass:$dst),
> +     (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
> +           OpStr,
> +     [(set regclass:$dst, (IntMinOp f1:$srcf0 ,
> +       (IntMaxOp regclass:$src, f0:$srcf1)))]>;
> +
> +   // fmin(fmax(0.0, x), 1.0) => sat(x)
> +   def SAT13 : NVPTXInst<(outs regclass:$dst),
> +     (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
> +           OpStr,
> +     [(set regclass:$dst, (IntMinOp
> +       (IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>;
> +
> +   // fmin(fmax(x, 0.0), 1.0) => sat(x)
> +   def SAT14 : NVPTXInst<(outs regclass:$dst),
> +     (ins fimm:$srcf0, fimm:$srcf1, regclass:$src),
> +         OpStr,
> +     [(set regclass:$dst, (IntMinOp
> +       (IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>;
> +
> +}
> +// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x
> +// is NaN
> +// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
> +// Same story for fmax, fmin.
> +
> +defm SAT_fmin_fmax_f : SAT<Float32Regs, f32imm, int_nvvm_fmin_f,
> +  int_nvvm_fmax_f, immFloat0, immFloat1,
> +           "cvt.sat.f32.f32 \t$dst, $src; \n">;
> +defm SAT_fmin_fmax_d : SAT<Float64Regs, f64imm, int_nvvm_fmin_d,
> +  int_nvvm_fmax_d, immDouble0, immDouble1,
> +           "cvt.sat.f64.f64 \t$dst, $src; \n">;
> +
> +
> +// We need a full string for OpcStr here because we need to deal with case like
> +// INT_PTX_RECIP.
> +class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
> +  NVPTXRegClass src_regclass, Intrinsic IntOP>
> +            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
> +            OpcStr,
> +        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
> +
> +// We need a full string for OpcStr here because we need to deal with the case
> +// like INT_PTX_NATIVE_POWR_F.
> +class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
> +  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
> +            : NVPTXInst<(outs t_regclass:$dst),
> +              (ins s0_regclass:$src0, s1_regclass:$src1),
> +            OpcStr,
> +        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
> +
> +class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
> +  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
> +  NVPTXRegClass s2_regclass, Intrinsic IntOP>
> +            : NVPTXInst<(outs t_regclass:$dst),
> +              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
> +            OpcStr,
> +        [(set t_regclass:$dst,
> +          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
> +
> +//
> +// MISC
> +//
> +
> +def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
> +  int_nvvm_clz_i>;
> +def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
> +  int_nvvm_clz_ll>;
> +
> +def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
> +  int_nvvm_popc_i>;
> +def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
> +  int_nvvm_popc_ll>;
> +
> +def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
> +  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
> +
> +//
> +// Min Max
> +//
> +
> +def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
> +  Int32Regs, Int32Regs, int_nvvm_min_i>;
> +def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
> +  Int32Regs, Int32Regs, int_nvvm_min_ui>;
> +
> +def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
> +  Int64Regs, Int64Regs, int_nvvm_min_ll>;
> +def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
> +  Int64Regs, Int64Regs, int_nvvm_min_ull>;
> +
> +def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
> +  Int32Regs, Int32Regs, int_nvvm_max_i>;
> +def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
> +  Int32Regs, Int32Regs, int_nvvm_max_ui>;
> +
> +def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
> +  Int64Regs, Int64Regs, int_nvvm_max_ll>;
> +def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
> +  Int64Regs, Int64Regs, int_nvvm_max_ull>;
> +
> +def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
> +  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
> +def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
> +
> +def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
> +  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
> +def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
> +
> +def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
> +  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
> +def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
> +  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
> +
> +//
> +// Multiplication
> +//
> +
> +def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
> +  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
> +def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
> +  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
> +
> +def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
> +  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
> +def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
> +  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
> +
> +def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
> +def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
> +def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
> +def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
> +def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
> +def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
> +def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
> +def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
> +
> +def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
> +def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
> +def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
> +def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
> +
> +def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
> +  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
> +def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
> +  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
> +
> +//
> +// Div
> +//
> +
> +def INT_NVVM_DIV_APPROX_FTZ_F
> +  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
> +    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
> +def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
> +
> +def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
> +def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
> +def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
> +def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
> +def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
> +def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
> +def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
> +def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
> +
> +def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
> +def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
> +def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
> +def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
> +
> +//
> +// Brev
> +//
> +
> +def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
> +  int_nvvm_brev32>;
> +def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
> +  int_nvvm_brev64>;
> +
> +//
> +// Sad
> +//
> +
> +def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
> +  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
> +def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
> +  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
> +
> +//
> +// Floor  Ceil
> +//
> +
> +def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>;
> +def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_floor_f>;
> +def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;",
> +  Float64Regs, Float64Regs, int_nvvm_floor_d>;
> +
> +def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>;
> +def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_ceil_f>;
> +def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;",
> +  Float64Regs, Float64Regs, int_nvvm_ceil_d>;
> +
> +//
> +// Abs
> +//
> +
> +def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
> +  int_nvvm_abs_i>;
> +def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
> +  int_nvvm_abs_ll>;
> +
> +def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_fabs_ftz_f>;
> +def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_fabs_f>;
> +
> +def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_fabs_d>;
> +
> +//
> +// Round
> +//
> +
> +def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_round_ftz_f>;
> +def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_round_f>;
> +
> +def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_round_d>;
> +
> +//
> +// Trunc
> +//
> +
> +def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>;
> +def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_trunc_f>;
> +
> +def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_trunc_d>;
> +
> +//
> +// Saturate
> +//
> +
> +def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>;
> +def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_saturate_f>;
> +
> +def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;",
> +  Float64Regs, Float64Regs, int_nvvm_saturate_d>;
> +
> +//
> +// Exp2  Log2
> +//
> +
> +def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
> +def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
> +def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
> +  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
> +
> +def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
> +def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
> +def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
> +  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
> +
> +//
> +// Sin  Cos
> +//
> +
> +def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
> +def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
> +
> +def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
> +def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
> +
> +//
> +// Fma
> +//
> +
> +def INT_NVVM_FMA_RN_FTZ_F
> +  : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
> +    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
> +def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
> +  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
> +def INT_NVVM_FMA_RZ_FTZ_F
> +  : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
> +    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
> +def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
> +  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
> +def INT_NVVM_FMA_RM_FTZ_F
> +  : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
> +    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
> +def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
> +  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
> +def INT_NVVM_FMA_RP_FTZ_F
> +  : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
> +    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
> +def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
> +  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
> +
> +def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
> +  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
> +def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
> +  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
> +def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
> +  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
> +def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
> +  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
> +
> +//
> +// Rcp
> +//
> +
> +def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
> +def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
> +def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
> +def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
> +def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
> +def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
> +def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
> +def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
> +
> +def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_rcp_rn_d>;
> +def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_rcp_rz_d>;
> +def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_rcp_rm_d>;
> +def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_rcp_rp_d>;
> +
> +def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
> +  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
> +
> +//
> +// Sqrt
> +//
> +
> +def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
> +def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_sqrt_rn_f>;
> +def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
> +def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_sqrt_rz_f>;
> +def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
> +def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_sqrt_rm_f>;
> +def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
> +def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
> +  Float32Regs, int_nvvm_sqrt_rp_f>;
> +def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
> +def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
> +
> +def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_sqrt_rn_d>;
> +def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_sqrt_rz_d>;
> +def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_sqrt_rm_d>;
> +def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
> +  Float64Regs, int_nvvm_sqrt_rp_d>;
> +
> +//
> +// Rsqrt
> +//
> +
> +def INT_NVVM_RSQRT_APPROX_FTZ_F
> +  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
> +    int_nvvm_rsqrt_approx_ftz_f>;
> +def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
> +  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
> +def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
> +  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
> +
> +//
> +// Add
> +//
> +
> +def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
> +def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
> +def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
> +def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
> +def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
> +def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
> +def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
> +def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
> +  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
> +
> +def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
> +def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
> +def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
> +def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
> +  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
> +
> +//
> +// Convert
> +//
> +
> +def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>;
> +def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rn>;
> +def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>;
> +def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rz>;
> +def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>;
> +def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rm>;
> +def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>;
> +def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;",
> +  Float32Regs, Float64Regs, int_nvvm_d2f_rp>;
> +
> +def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2i_rn>;
> +def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2i_rz>;
> +def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2i_rm>;
> +def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2i_rp>;
> +
> +def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2ui_rn>;
> +def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2ui_rz>;
> +def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2ui_rm>;
> +def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;",
> +  Int32Regs, Float64Regs, int_nvvm_d2ui_rp>;
> +
> +def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_i2d_rn>;
> +def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_i2d_rz>;
> +def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_i2d_rm>;
> +def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_i2d_rp>;
> +
> +def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_ui2d_rn>;
> +def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_ui2d_rz>;
> +def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_ui2d_rm>;
> +def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;",
> +  Float64Regs, Int32Regs, int_nvvm_ui2d_rp>;
> +
> +def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>;
> +def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2i_rn>;
> +def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>;
> +def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2i_rz>;
> +def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>;
> +def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2i_rm>;
> +def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>;
> +def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2i_rp>;
> +
> +def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>;
> +def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2ui_rn>;
> +def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>;
> +def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2ui_rz>;
> +def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>;
> +def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2ui_rm>;
> +def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;",
> +  Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>;
> +def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_f2ui_rp>;
> +
> +def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_i2f_rn>;
> +def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_i2f_rz>;
> +def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_i2f_rm>;
> +def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_i2f_rp>;
> +
> +def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_ui2f_rn>;
> +def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_ui2f_rz>;
> +def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_ui2f_rm>;
> +def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_ui2f_rp>;
> +
> +def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
> +  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
> +
> +def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
> +                       !strconcat(".reg .b32 %temp; \n\t",
> +             !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
> +               "}}"))),
> +             Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
> +def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
> +                       !strconcat(".reg .b32 %temp; \n\t",
> +                         !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
> +                           "}}"))),
> +             Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
> +
> +def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>;
> +def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ll_rn>;
> +def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>;
> +def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ll_rz>;
> +def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>;
> +def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ll_rm>;
> +def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>;
> +def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ll_rp>;
> +
> +def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>;
> +def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ull_rn>;
> +def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>;
> +def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ull_rz>;
> +def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>;
> +def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ull_rm>;
> +def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;",
> +  Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>;
> +def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs,
> +  Float32Regs, int_nvvm_f2ull_rp>;
> +
> +def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ll_rn>;
> +def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ll_rz>;
> +def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ll_rm>;
> +def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ll_rp>;
> +
> +def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ull_rn>;
> +def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ull_rz>;
> +def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ull_rm>;
> +def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_d2ull_rp>;
> +
> +def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ll2f_rn>;
> +def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ll2f_rz>;
> +def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ll2f_rm>;
> +def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ll2f_rp>;
> +def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ull2f_rn>;
> +def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ull2f_rz>;
> +def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ull2f_rm>;
> +def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs,
> +  Int64Regs, int_nvvm_ull2f_rp>;
> +
> +def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ll2d_rn>;
> +def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ll2d_rz>;
> +def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ll2d_rm>;
> +def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ll2d_rp>;
> +def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ull2d_rn>;
> +def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ull2d_rz>;
> +def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ull2d_rm>;
> +def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_ull2d_rp>;
> +
> +def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
> +                                   !strconcat(".reg .b16 %temp;\n\t",
> +           !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
> +           !strconcat("mov.b16 \t$dst, %temp;\n",
> +             "}}")))),
> +                                   Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
> +def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
> +                                   !strconcat(".reg .b16 %temp;\n\t",
> +           !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
> +           !strconcat("mov.b16 \t$dst, %temp;\n",
> +             "}}")))),
> +           Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
> +
> +def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
> +                            !strconcat(".reg .b16 %temp;\n\t",
> +          !strconcat("mov.b16 \t%temp, $src0;\n\t",
> +          !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
> +            "}}")))),
> +          Float32Regs, Int16Regs, int_nvvm_h2f>;
> +
> +//
> +// Bitcast
> +//
> +
> +def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
> +  Float32Regs, int_nvvm_bitcast_f2i>;
> +def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
> +  Int32Regs, int_nvvm_bitcast_i2f>;
> +
> +def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
> +  Int64Regs, int_nvvm_bitcast_ll2d>;
> +def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
> +  Float64Regs, int_nvvm_bitcast_d2ll>;
> +
> +//-----------------------------------
> +// Atomic Functions
> +//-----------------------------------
> +
> +class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
> + : PatFrag<ops, frag, [{
> +   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
> +}]>;
> +class ATOMIC_SHARED_CHK <dag ops, dag frag>
> + : PatFrag<ops, frag, [{
> +   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
> +}]>;
> +class ATOMIC_GENERIC_CHK <dag ops, dag frag>
> + : PatFrag<ops, frag, [{
> +   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
> +}]>;
> +
> +multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
> +  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
> +  Operand IMMType, SDNode IMM, Predicate Pred> {
> +  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
> +               !strconcat("atom",
> +         !strconcat(SpaceStr,
> +         !strconcat(OpcStr,
> +         !strconcat(TypeStr,
> +         !strconcat(" \t$dst, [$addr], $b;", ""))))),
> +         [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
> +  Requires<[Pred]>;
> +  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
> +               !strconcat("atom",
> +         !strconcat(SpaceStr,
> +         !strconcat(OpcStr,
> +         !strconcat(TypeStr,
> +         !strconcat(" \t$dst, [$addr], $b;", ""))))),
> +         [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
> +  Requires<[Pred]>;
> +}
> +multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
> +  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
> +  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
> +    IntOp, IMMType, IMM, Pred>;
> +  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
> +    IntOp, IMMType, IMM, Pred>;
> +}
> +
> +// has 2 operands, neg the second one
> +multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
> +  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
> +  Operand IMMType, Predicate Pred> {
> +  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
> +    !strconcat("{{ \n\t",
> +         !strconcat(".reg \t.s",
> +         !strconcat(TypeStr,
> +         !strconcat(" temp; \n\t",
> +         !strconcat("neg.s",
> +         !strconcat(TypeStr,
> +         !strconcat(" \ttemp, $b; \n\t",
> +               !strconcat("atom",
> +         !strconcat(SpaceStr,
> +         !strconcat(OpcStr,
> +         !strconcat(".u",
> +         !strconcat(TypeStr,
> +         !strconcat(" \t$dst, [$addr], temp; \n\t",
> +           !strconcat("}}", "")))))))))))))),
> +         [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
> +  Requires<[Pred]>;
> +}
> +multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
> +  string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
> +  Predicate Pred> {
> + defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
> +   IntOp, IMMType, Pred> ;
> + defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
> +   IntOp, IMMType, Pred> ;
> +}
> +
> +// has 3 operands
> +multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
> +  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
> +  Operand IMMType, Predicate Pred> {
> +  def reg : NVPTXInst<(outs regclass:$dst),
> +    (ins ptrclass:$addr, regclass:$b, regclass:$c),
> +               !strconcat("atom",
> +         !strconcat(SpaceStr,
> +         !strconcat(OpcStr,
> +         !strconcat(TypeStr,
> +         !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
> +         [(set regclass:$dst,
> +           (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
> +         Requires<[Pred]>;
> +  def imm1 : NVPTXInst<(outs regclass:$dst),
> +    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
> +               !strconcat("atom",
> +         !strconcat(SpaceStr,
> +         !strconcat(OpcStr,
> +         !strconcat(TypeStr,
> +         !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
> +         [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
> +  Requires<[Pred]>;
> +  def imm2 : NVPTXInst<(outs regclass:$dst),
> +    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
> +               !strconcat("atom",
> +         !strconcat(SpaceStr,
> +         !strconcat(OpcStr,
> +         !strconcat(TypeStr,
> +         !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
> +         [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
> +  Requires<[Pred]>;
> +  def imm3 : NVPTXInst<(outs regclass:$dst),
> +    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
> +               !strconcat("atom",
> +         !strconcat(SpaceStr,
> +         !strconcat(OpcStr,
> +         !strconcat(TypeStr,
> +         !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
> +         [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
> +  Requires<[Pred]>;
> +}
> +multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
> +  string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
> +  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
> +    IntOp, IMMType, Pred>;
> +  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
> +    IntOp, IMMType, Pred>;
> +}
> +
> +// atom_add
> +
> +def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_add_32 node:$a, node:$b)>;
> +def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_add_32 node:$a, node:$b)>;
> +def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_add_32 node:$a, node:$b)>;
> +def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_add_64 node:$a, node:$b)>;
> +def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_add_64 node:$a, node:$b)>;
> +def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_add_64 node:$a, node:$b)>;
> +def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
> +def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
> +def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
> +  atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
> +  atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
> +  atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
> +  ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +
> +defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
> +  atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
> +defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
> +  atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
> +defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
> +  atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
> +defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
> +  ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
> +
> +defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
> +  atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
> +defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
> +  atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
> +defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
> +  atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
> +
> +// atom_sub
> +
> +def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_sub_32 node:$a, node:$b)>;
> +def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_sub_32 node:$a, node:$b)>;
> +def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_sub_32 node:$a, node:$b)>;
> +def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_sub_64 node:$a, node:$b)>;
> +def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_sub_64 node:$a, node:$b)>;
> +def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_sub_64 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
> +  atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
> +  atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
> +defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
> +  atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
> +  ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
> +defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
> +  atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
> +  atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
> +defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
> +  atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
> +defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
> +  ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
> +
> +// atom_swap
> +
> +def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_swap_32 node:$a, node:$b)>;
> +def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_swap_32 node:$a, node:$b)>;
> +def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_swap_32 node:$a, node:$b)>;
> +def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_swap_64 node:$a, node:$b)>;
> +def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_swap_64 node:$a, node:$b)>;
> +def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_swap_64 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
> +  atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
> +  atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
> +  atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
> +  ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
> +  atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
> +defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
> +  atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
> +defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
> +  atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
> +defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
> +  ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
> +
> +// atom_max
> +
> +def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
> +  , (atomic_load_max_32 node:$a, node:$b)>;
> +def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_max_32 node:$a, node:$b)>;
> +def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_max_32 node:$a, node:$b)>;
> +def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_umax_32 node:$a, node:$b)>;
> +def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_umax_32 node:$a, node:$b)>;
> +def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_umax_32 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
> +  ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
> +  ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
> +  atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
> +  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
> +  ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
> +  ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
> +  atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
> +  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +
> +// atom_min
> +
> +def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_min_32 node:$a, node:$b)>;
> +def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_min_32 node:$a, node:$b)>;
> +def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_min_32 node:$a, node:$b)>;
> +def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_umin_32 node:$a, node:$b)>;
> +def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_umin_32 node:$a, node:$b)>;
> +def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_umin_32 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
> +  ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
> +  ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
> +  atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
> +  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
> +  ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
> +  ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
> +  atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
> +  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +
> +// atom_inc  atom_dec
> +
> +def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
> +def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
> +def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
> +def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
> +def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
> +def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
> +  atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
> +  atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
> +  atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
> +  ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
> +  atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
> +  atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
> +  atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
> +  ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +
> +// atom_and
> +
> +def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_and_32 node:$a, node:$b)>;
> +def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_and_32 node:$a, node:$b)>;
> +def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_and_32 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
> +  atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
> +  atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
> +  atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
> +  ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +
> +// atom_or
> +
> +def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_or_32 node:$a, node:$b)>;
> +def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_or_32 node:$a, node:$b)>;
> +def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_or_32 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
> +  atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
> +  atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
> +  ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
> +  atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
> +
> +// atom_xor
> +
> +def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
> +  (atomic_load_xor_32 node:$a, node:$b)>;
> +def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
> +  (atomic_load_xor_32 node:$a, node:$b)>;
> +def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
> +  (atomic_load_xor_32 node:$a, node:$b)>;
> +
> +defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
> +  atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
> +  atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
> +  atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
> +  ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
> +
> +// atom_cas
> +
> +def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
> +  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
> +def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
> +  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
> +def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
> +  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
> +def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
> +  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
> +def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
> +  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
> +def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
> +  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
> +
> +defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
> +  atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
> +defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
> +  atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
> +defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
> +  atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
> +defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
> +  ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
> +defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
> +  atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
> +defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
> +  atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
> +defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
> +  atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
> +defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
> +  ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
> +
> +
> +//-----------------------------------
> +// Read Special Registers
> +//-----------------------------------
> +class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
> +      NVPTXInst<(outs regclassOut:$dst), (ins),
> +               OpStr,
> +         [(set regclassOut:$dst, (IntOp))]>;
> +
> +def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_tid_x>;
> +def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_tid_y>;
> +def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_tid_z>;
> +
> +def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_ntid_x>;
> +def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_ntid_y>;
> +def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_ntid_z>;
> +
> +def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_ctaid_x>;
> +def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_ctaid_y>;
> +def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_ctaid_z>;
> +
> +def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_nctaid_x>;
> +def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_nctaid_y>;
> +def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_nctaid_z>;
> +
> +def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
> +  int_nvvm_read_ptx_sreg_warpsize>;
> +
> +
> +//-----------------------------------
> +// Support for ldu on sm_20 or later
> +//-----------------------------------
> +
> +// Scalar
> +// @TODO: Revisit this, Changed imemAny to imem
> +multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
> +  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
> +               !strconcat("ldu.global.", TyStr),
> +         [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
> +  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
> +               !strconcat("ldu.global.", TyStr),
> +         [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
> + def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
> +               !strconcat("ldu.global.", TyStr),
> +                [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
> +                Requires<[hasLDU]>;
> + def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
> +               !strconcat("ldu.global.", TyStr),
> +         [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
> + def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
> +               !strconcat("ldu.global.", TyStr),
> +         [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
> +}
> +
> +defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];",  Int8Regs,
> +int_nvvm_ldu_global_i>;
> +defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
> +int_nvvm_ldu_global_i>;
> +defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
> +int_nvvm_ldu_global_i>;
> +defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
> +int_nvvm_ldu_global_i>;
> +defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
> +int_nvvm_ldu_global_f>;
> +defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
> +int_nvvm_ldu_global_f>;
> +defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
> +int_nvvm_ldu_global_p>;
> +defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
> +int_nvvm_ldu_global_p>;
> +
> +// vector
> +
> +// Elementized vector ldu
> +multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
> + def _32:     NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
> +   (ins Int32Regs:$src),
> +                     !strconcat("ldu.global.", TyStr), []>;
> + def _64:     NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
> +   (ins Int64Regs:$src),
> +                     !strconcat("ldu.global.", TyStr), []>;
> +}
> +
> +multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
> + def _32:    NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
> +     regclass:$dst4), (ins Int32Regs:$src),
> +               !strconcat("ldu.global.", TyStr), []>;
> + def _64:    NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
> +     regclass:$dst4), (ins Int64Regs:$src),
> +               !strconcat("ldu.global.", TyStr), []>;
> +}
> +
> +defm INT_PTX_LDU_G_v2i8_ELE
> +  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int8Regs>;
> +defm INT_PTX_LDU_G_v2i16_ELE
> +  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
> +defm INT_PTX_LDU_G_v2i32_ELE
> +  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
> +defm INT_PTX_LDU_G_v2f32_ELE
> +  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
> +defm INT_PTX_LDU_G_v2i64_ELE
> +  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
> +defm INT_PTX_LDU_G_v2f64_ELE
> +  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
> +defm INT_PTX_LDU_G_v4i8_ELE
> +  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int8Regs>;
> +defm INT_PTX_LDU_G_v4i16_ELE
> +  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
> +    Int16Regs>;
> +defm INT_PTX_LDU_G_v4i32_ELE
> +  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
> +    Int32Regs>;
> +defm INT_PTX_LDU_G_v4f32_ELE
> +  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
> +    Float32Regs>;
> +
> +// Vector ldu
> +multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
> +  NVPTXInst eleInst, NVPTXInst eleInst64> {
> + def _32:    NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
> +               !strconcat("ldu.global.", TyStr),
> +         [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
> + Requires<[hasLDU]>;
> + def _64:    NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
> +               !strconcat("ldu.global.", TyStr),
> +         [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
> + Requires<[hasLDU]>;
> +}
> +
> +let VecInstType=isVecLD.Value in {
> +defm INT_PTX_LDU_G_v2i8  : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
> +  V2I8Regs,  int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
> +  INT_PTX_LDU_G_v2i8_ELE_64>;
> +defm INT_PTX_LDU_G_v4i8  : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
> +  V4I8Regs,  int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
> +  INT_PTX_LDU_G_v4i8_ELE_64>;
> +defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
> +  V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
> +  INT_PTX_LDU_G_v2i16_ELE_64>;
> +defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
> +  V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
> +  INT_PTX_LDU_G_v4i16_ELE_64>;
> +defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
> +  V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
> +  INT_PTX_LDU_G_v2i32_ELE_64>;
> +defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
> +  V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
> +  INT_PTX_LDU_G_v4i32_ELE_64>;
> +defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
> +  V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
> +  INT_PTX_LDU_G_v2f32_ELE_64>;
> +defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
> +  V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
> +  INT_PTX_LDU_G_v4f32_ELE_64>;
> +defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
> +  V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
> +  INT_PTX_LDU_G_v2i64_ELE_64>;
> +defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
> +  V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
> +  INT_PTX_LDU_G_v2f64_ELE_64>;
> +}
> +
> +
> +
> +multiclass NG_TO_G<string Str, Intrinsic Intrin> {
> +   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
> +          !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
> +      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
> +   Requires<[hasGenericLdSt]>;
> +   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
> +          !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
> +      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
> +   Requires<[hasGenericLdSt]>;
> +
> +// @TODO: Are these actually needed?  I believe global addresses will be copied
> +// to register values anyway.
> +   /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
> +          !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
> +      [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
> +      Requires<[hasGenericLdSt]>;
> +   def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
> +          !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
> +      [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
> +      Requires<[hasGenericLdSt]>;*/
> +
> +   def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
> +          "mov.u32 \t$result, $src;",
> +      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
> +   def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
> +          "mov.u64 \t$result, $src;",
> +      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
> +
> +// @TODO: Are these actually needed?  I believe global addresses will be copied
> +// to register values anyway.
> +   /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
> +          "mov.u32 \t$result, $src;",
> +      [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
> +   def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
> +          "mov.u64 \t$result, $src;",
> +      [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
> +}
> +
> +multiclass G_TO_NG<string Str, Intrinsic Intrin> {
> +   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
> +          !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
> +      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
> +   Requires<[hasGenericLdSt]>;
> +   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
> +          !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
> +      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
> +   Requires<[hasGenericLdSt]>;
> +   def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
> +          "mov.u32 \t$result, $src;",
> +      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
> +   def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
> +          "mov.u64 \t$result, $src;",
> +      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
> +}
> +
> +defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
> +defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
> +defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
> +
> +defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
> +defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
> +defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
> +
> +def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
> +               "mov.u32 \t$result, $src;",
> +     [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
> +def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
> +               "mov.u64 \t$result, $src;",
> +     [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
> +
> +
> +
> +// @TODO: Revisit this.  There is a type
> +// contradiction between iPTRAny and iPTR for the def.
> +/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
> +               "mov.u32 \t$result, $src;",
> +     [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
> +     (Wrapper tglobaladdr:$src)))]>;
> +def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
> +               "mov.u64 \t$result, $src;",
> +     [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
> +     (Wrapper tglobaladdr:$src)))]>;*/
> +
> +
> +def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
> +            "mov.u32 \t$result, $src;",
> +     [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
> +def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
> +            "mov.u64 \t$result, $src;",
> +     [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
> +
> +
> +// nvvm.ptr.gen.to.param
> +def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
> +  (ins Int32Regs:$src),
> +                        "mov.u32 \t$result, $src;",
> +                              [(set Int32Regs:$result,
> +                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
> +def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
> +  (ins Int64Regs:$src),
> +                        "mov.u64 \t$result, $src;",
> +                              [(set Int64Regs:$result,
> +                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
> +
> +
> +// nvvm.move intrinsicc
> +def nvvm_move_i8 : NVPTXInst<(outs Int8Regs:$r), (ins Int8Regs:$s),
> +                             "mov.b16 \t$r, $s;",
> +                             [(set Int8Regs:$r,
> +                               (int_nvvm_move_i8 Int8Regs:$s))]>;
> +def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
> +                             "mov.b16 \t$r, $s;",
> +                             [(set Int16Regs:$r,
> +                               (int_nvvm_move_i16 Int16Regs:$s))]>;
> +def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
> +                             "mov.b32 \t$r, $s;",
> +                             [(set Int32Regs:$r,
> +                               (int_nvvm_move_i32 Int32Regs:$s))]>;
> +def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
> +                             "mov.b64 \t$r, $s;",
> +                             [(set Int64Regs:$r,
> +                               (int_nvvm_move_i64 Int64Regs:$s))]>;
> +def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
> +                             "mov.f32 \t$r, $s;",
> +                             [(set Float32Regs:$r,
> +                               (int_nvvm_move_float Float32Regs:$s))]>;
> +def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
> +                             "mov.f64 \t$r, $s;",
> +                             [(set Float64Regs:$r,
> +                               (int_nvvm_move_double Float64Regs:$s))]>;
> +def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
> +                             "mov.u32 \t$r, $s;",
> +                             [(set Int32Regs:$r,
> +                               (int_nvvm_move_ptr Int32Regs:$s))]>;
> +def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
> +                             "mov.u64 \t$r, $s;",
> +                             [(set Int64Regs:$r,
> +                               (int_nvvm_move_ptr Int64Regs:$s))]>;
> +
> +// @TODO: Are these actually needed, or will we always just see symbols
> +// copied to registers first?
> +/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
> +                             "mov.u32 \t$r, $s;",
> +                             [(set Int32Regs:$r,
> +                             (int_nvvm_move_ptr texternalsym:$s))]>;
> +def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
> +                             "mov.u64 \t$r, $s;",
> +                             [(set Int64Regs:$r,
> +                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
> +
> +
> +// MoveParam        %r1, param
> +// ptr_local_to_gen %r2, %r1
> +// ptr_gen_to_local %r3, %r2
> +// ->
> +// mov %r1, param
> +
> +// @TODO: Revisit this.  There is a type
> +// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
> +// instructions are not currently defined. However, we can use the ptr
> +// variants and the asm printer will do the right thing.
> +def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
> +                (MoveParam texternalsym:$src)))),
> +               (nvvm_move_ptr64  texternalsym:$src)>;
> +def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
> +                (MoveParam texternalsym:$src)))),
> +               (nvvm_move_ptr32  texternalsym:$src)>;
> +
> +
> +//-----------------------------------
> +// Compiler Error Warn
> +// - Just ignore them in codegen
> +//-----------------------------------
> +
> +def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
> +                "// llvm.nvvm.compiler.warn()",
> +                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
> +def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
> +                "// llvm.nvvm.compiler.warn()",
> +                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
> +def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
> +                "// llvm.nvvm.compiler.error()",
> +                [(int_nvvm_compiler_error Int32Regs:$a)]>;
> +def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
> +                "// llvm.nvvm.compiler.error()",
> +                [(int_nvvm_compiler_error Int64Regs:$a)]>;
> +
> +
> +
> +//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
> +
> +// These intrinsics are handled to retain compatibility with the old backend.
> +
> +// PTX Special Purpose Register Accessor Intrinsics
> +
> +class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
> +  : NVPTXInst<(outs Int64Regs:$d), (ins),
> +              !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
> +              [(set Int64Regs:$d, (intop))]>;
> +
> +class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
> +  : NVPTXInst<(outs Int32Regs:$d), (ins),
> +              !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
> +              [(set Int32Regs:$d, (intop))]>;
> +
> +// TODO Add read vector-version of special registers
> +
> +def PTX_READ_TID_X   : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
> +                                                     int_ptx_read_tid_x>;
> +def PTX_READ_TID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
> +                                                     int_ptx_read_tid_y>;
> +def PTX_READ_TID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
> +                                                     int_ptx_read_tid_z>;
> +def PTX_READ_TID_W   : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
> +                                                     int_ptx_read_tid_w>;
> +
> +def PTX_READ_NTID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
> +                                                      int_ptx_read_ntid_x>;
> +def PTX_READ_NTID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
> +                                                      int_ptx_read_ntid_y>;
> +def PTX_READ_NTID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
> +                                                      int_ptx_read_ntid_z>;
> +def PTX_READ_NTID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
> +                                                      int_ptx_read_ntid_w>;
> +
> +def PTX_READ_LANEID  : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
> +                                                     int_ptx_read_laneid>;
> +def PTX_READ_WARPID  : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
> +                                                     int_ptx_read_warpid>;
> +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
> +                                                     int_ptx_read_nwarpid>;
> +
> +def PTX_READ_CTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
> +                                                       int_ptx_read_ctaid_x>;
> +def PTX_READ_CTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
> +                                                       int_ptx_read_ctaid_y>;
> +def PTX_READ_CTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
> +                                                       int_ptx_read_ctaid_z>;
> +def PTX_READ_CTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
> +                                                       int_ptx_read_ctaid_w>;
> +
> +def PTX_READ_NCTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
> +                                                        int_ptx_read_nctaid_x>;
> +def PTX_READ_NCTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
> +                                                        int_ptx_read_nctaid_y>;
> +def PTX_READ_NCTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
> +                                                        int_ptx_read_nctaid_z>;
> +def PTX_READ_NCTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
> +                                                        int_ptx_read_nctaid_w>;
> +
> +def PTX_READ_SMID  : PTX_READ_SPECIAL_REGISTER_R32<"smid",
> +                                                   int_ptx_read_smid>;
> +def PTX_READ_NSMID  : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
> +                                                    int_ptx_read_nsmid>;
> +def PTX_READ_GRIDID  : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
> +                                                     int_ptx_read_gridid>;
> +
> +def PTX_READ_LANEMASK_EQ
> +  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
> +def PTX_READ_LANEMASK_LE
> +  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
> +def PTX_READ_LANEMASK_LT
> +  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
> +def PTX_READ_LANEMASK_GE
> +  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
> +def PTX_READ_LANEMASK_GT
> +  : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
> +
> +def PTX_READ_CLOCK
> +  : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
> +def PTX_READ_CLOCK64
> +  : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
> +
> +def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
> +def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
> +def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
> +def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
> +
> +// PTX Parallel Synchronization and Communication Intrinsics
> +
> +def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
> +                             [(int_ptx_bar_sync imm:$i)]>;
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,208 @@
> +//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
> +// the size is large or is not a compile-time constant.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/Function.h"
> +#include "llvm/Constants.h"
> +#include "llvm/Module.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/Intrinsics.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Support/InstIterator.h"
> +#include "llvm/Support/IRBuilder.h"
> +#include "NVPTXLowerAggrCopies.h"
> +#include "llvm/Target/TargetData.h"
> +#include "llvm/LLVMContext.h"
> +
> +using namespace llvm;
> +
> +namespace llvm {
> +FunctionPass *createLowerAggrCopies();
> +}
> +
> +char NVPTXLowerAggrCopies::ID = 0;
> +
> +// Lower MemTransferInst or load-store pair to loop
> +static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
> +                                  Value *dstAddr, Value *len,
> +                                  //unsigned numLoads,
> +                                  bool srcVolatile, bool dstVolatile,
> +                                  LLVMContext &Context, Function &F) {
> +  Type *indType = len->getType();
> +
> +  BasicBlock *origBB = splitAt->getParent();
> +  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
> +  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
> +
> +  origBB->getTerminator()->setSuccessor(0, loopBB);
> +  IRBuilder<> builder(origBB, origBB->getTerminator());
> +
> +  // srcAddr and dstAddr are expected to be pointer types,
> +  // so no check is made here.
> +  unsigned srcAS =
> +      dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
> +  unsigned dstAS =
> +      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
> +
> +  // Cast pointers to (char *)
> +  srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
> +  dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
> +
> +  IRBuilder<> loop(loopBB);
> +  // The loop index (ind) is a phi node.
> +  PHINode *ind = loop.CreatePHI(indType, 0);
> +  // Incoming value for ind is 0
> +  ind->addIncoming(ConstantInt::get(indType, 0), origBB);
> +
> +  // load from srcAddr+ind
> +  Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
> +  // store at dstAddr+ind
> +  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
> +
> +  // The value for ind coming from backedge is (ind + 1)
> +  Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
> +  ind->addIncoming(newind, loopBB);
> +
> +  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
> +}
> +
> +// Lower MemSetInst to loop
> +static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
> +                                Value *len, Value *val, LLVMContext &Context,
> +                                Function &F) {
> +  BasicBlock *origBB = splitAt->getParent();
> +  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
> +  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
> +
> +  origBB->getTerminator()->setSuccessor(0, loopBB);
> +  IRBuilder<> builder(origBB, origBB->getTerminator());
> +
> +  unsigned dstAS =
> +      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
> +
> +  // Cast pointer to the type of value getting stored
> +  dstAddr = builder.CreateBitCast(dstAddr,
> +                                  PointerType::get(val->getType(), dstAS));
> +
> +  IRBuilder<> loop(loopBB);
> +  PHINode *ind = loop.CreatePHI(len->getType(), 0);
> +  ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
> +
> +  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
> +
> +  Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
> +  ind->addIncoming(newind, loopBB);
> +
> +  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
> +}
> +
> +bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
> +  SmallVector<LoadInst *, 4> aggrLoads;
> +  SmallVector<MemTransferInst *, 4> aggrMemcpys;
> +  SmallVector<MemSetInst *, 4> aggrMemsets;
> +
> +  TargetData *TD = &getAnalysis<TargetData>();
> +  LLVMContext &Context = F.getParent()->getContext();
> +
> +  //
> +  // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
> +  //
> +  //const BasicBlock *firstBB = &F.front();  // first BB in F
> +  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
> +    //BasicBlock *bb = BI;
> +    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
> +        ++II) {
> +      if (LoadInst * load = dyn_cast<LoadInst>(II)) {
> +
> +        if (load->hasOneUse() == false) continue;
> +
> +        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
> +
> +        User *use = *(load->use_begin());
> +        if (StoreInst * store = dyn_cast<StoreInst>(use)) {
> +          if (store->getOperand(0) != load) //getValueOperand
> +          continue;
> +          aggrLoads.push_back(load);
> +        }
> +      } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
> +        Value *len = intr->getLength();
> +        // If the number of elements being copied is greater
> +        // than MaxAggrCopySize, lower it to a loop
> +        if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
> +          if (len_int->getZExtValue() >= MaxAggrCopySize) {
> +            aggrMemcpys.push_back(intr);
> +          }
> +        } else {
> +          // turn variable length memcpy/memmov into loop
> +          aggrMemcpys.push_back(intr);
> +        }
> +      } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
> +        Value *len = memsetintr->getLength();
> +        if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
> +          if (len_int->getZExtValue() >= MaxAggrCopySize) {
> +            aggrMemsets.push_back(memsetintr);
> +          }
> +        } else {
> +          // turn variable length memset into loop
> +          aggrMemsets.push_back(memsetintr);
> +        }
> +      }
> +    }
> +  }
> +  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
> +      && (aggrMemsets.size() == 0)) return false;
> +
> +  //
> +  // Do the transformation of an aggr load/copy/set to a loop
> +  //
> +  for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
> +    LoadInst *load = aggrLoads[i];
> +    StoreInst *store = dyn_cast<StoreInst>(*load->use_begin());
> +    Value *srcAddr = load->getOperand(0);
> +    Value *dstAddr = store->getOperand(1);
> +    unsigned numLoads = TD->getTypeStoreSize(load->getType());
> +    Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
> +
> +    convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
> +                          store->isVolatile(), Context, F);
> +
> +    store->eraseFromParent();
> +    load->eraseFromParent();
> +  }
> +
> +  for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
> +    MemTransferInst *cpy = aggrMemcpys[i];
> +    Value *len = cpy->getLength();
> +    // llvm 2.7 version of memcpy does not have volatile
> +    // operand yet. So always making it non-volatile
> +    // optimistically, so that we don't see unnecessary
> +    // st.volatile in ptx
> +    convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
> +                          false, Context, F);
> +    cpy->eraseFromParent();
> +  }
> +
> +  for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
> +    MemSetInst *memsetinst = aggrMemsets[i];
> +    Value *len = memsetinst->getLength();
> +    Value *val = memsetinst->getValue();
> +    convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
> +                        F);
> +    memsetinst->eraseFromParent();
> +  }
> +
> +  return true;
> +}
> +
> +FunctionPass *llvm::createLowerAggrCopies() {
> +  return new NVPTXLowerAggrCopies();
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,47 @@
> +//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the declaration of the NVIDIA specific lowering of
> +// aggregate copies
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTX_LOWER_AGGR_COPIES_H
> +#define NVPTX_LOWER_AGGR_COPIES_H
> +
> +#include "llvm/Pass.h"
> +#include "llvm/CodeGen/MachineFunctionAnalysis.h"
> +#include "llvm/Target/TargetData.h"
> +
> +namespace llvm {
> +
> +// actual analysis class, which is a functionpass
> +struct NVPTXLowerAggrCopies : public FunctionPass {
> +  static char ID;
> +
> +  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
> +
> +  void getAnalysisUsage(AnalysisUsage &AU) const {
> +    AU.addRequired<TargetData>();
> +    AU.addPreserved<MachineFunctionAnalysis>();
> +  }
> +
> +  virtual bool runOnFunction(Function &F);
> +
> +  static const unsigned MaxAggrCopySize = 128;
> +
> +  virtual const char *getPassName() const {
> +    return "Lower aggregate copies/intrinsics into loops";
> +  }
> +};
> +
> +extern FunctionPass *createLowerAggrCopies();
> +}
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXNumRegisters.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXNumRegisters.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXNumRegisters.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXNumRegisters.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,20 @@
> +
> +//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTX_NUM_REGISTERS_H
> +#define NVPTX_NUM_REGISTERS_H
> +
> +namespace llvm {
> +
> +const unsigned NVPTXNumRegisters = 396;
> +
> +}
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,332 @@
> +//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the NVPTX implementation of the TargetRegisterInfo class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#define DEBUG_TYPE "nvptx-reg-info"
> +
> +#include "NVPTX.h"
> +#include "NVPTXRegisterInfo.h"
> +#include "NVPTXSubtarget.h"
> +#include "llvm/ADT/BitVector.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/MC/MachineLocation.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +
> +
> +using namespace llvm;
> +
> +namespace llvm
> +{
> +std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
> +  if (RC == &NVPTX::Float32RegsRegClass) {
> +    return ".f32";
> +  }
> +  if (RC == &NVPTX::Float64RegsRegClass) {
> +    return ".f64";
> +  }
> +  else if (RC == &NVPTX::Int64RegsRegClass) {
> +    return ".s64";
> +  }
> +  else if (RC == &NVPTX::Int32RegsRegClass) {
> +    return ".s32";
> +  }
> +  else if (RC == &NVPTX::Int16RegsRegClass) {
> +    return ".s16";
> +  }
> +  // Int8Regs become 16-bit registers in PTX
> +  else if (RC == &NVPTX::Int8RegsRegClass) {
> +    return ".s16";
> +  }
> +  else if (RC == &NVPTX::Int1RegsRegClass) {
> +    return ".pred";
> +  }
> +  else if (RC == &NVPTX::SpecialRegsRegClass) {
> +    return "!Special!";
> +  }
> +  else if (RC == &NVPTX::V2F32RegsRegClass) {
> +    return ".v2.f32";
> +  }
> +  else if (RC == &NVPTX::V4F32RegsRegClass) {
> +    return ".v4.f32";
> +  }
> +  else if (RC == &NVPTX::V2I32RegsRegClass) {
> +    return ".v2.s32";
> +  }
> +  else if (RC == &NVPTX::V4I32RegsRegClass) {
> +    return ".v4.s32";
> +  }
> +  else if (RC == &NVPTX::V2F64RegsRegClass) {
> +    return ".v2.f64";
> +  }
> +  else if (RC == &NVPTX::V2I64RegsRegClass) {
> +    return ".v2.s64";
> +  }
> +  else if (RC == &NVPTX::V2I16RegsRegClass) {
> +    return ".v2.s16";
> +  }
> +  else if (RC == &NVPTX::V4I16RegsRegClass) {
> +    return ".v4.s16";
> +  }
> +  else if (RC == &NVPTX::V2I8RegsRegClass) {
> +    return ".v2.s16";
> +  }
> +  else if (RC == &NVPTX::V4I8RegsRegClass) {
> +    return ".v4.s16";
> +  }
> +  else {
> +    return "INTERNAL";
> +  }
> +  return "";
> +}
> +
> +std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
> +  if (RC == &NVPTX::Float32RegsRegClass) {
> +    return "%f";
> +  }
> +  if (RC == &NVPTX::Float64RegsRegClass) {
> +    return "%fd";
> +  }
> +  else if (RC == &NVPTX::Int64RegsRegClass) {
> +    return "%rd";
> +  }
> +  else if (RC == &NVPTX::Int32RegsRegClass) {
> +    return "%r";
> +  }
> +  else if (RC == &NVPTX::Int16RegsRegClass) {
> +    return "%rs";
> +  }
> +  else if (RC == &NVPTX::Int8RegsRegClass) {
> +    return "%rc";
> +  }
> +  else if (RC == &NVPTX::Int1RegsRegClass) {
> +    return "%p";
> +  }
> +  else if (RC == &NVPTX::SpecialRegsRegClass) {
> +    return "!Special!";
> +  }
> +  else if (RC == &NVPTX::V2F32RegsRegClass) {
> +    return "%v2f";
> +  }
> +  else if (RC == &NVPTX::V4F32RegsRegClass) {
> +    return "%v4f";
> +  }
> +  else if (RC == &NVPTX::V2I32RegsRegClass) {
> +    return "%v2r";
> +  }
> +  else if (RC == &NVPTX::V4I32RegsRegClass) {
> +    return "%v4r";
> +  }
> +  else if (RC == &NVPTX::V2F64RegsRegClass) {
> +    return "%v2fd";
> +  }
> +  else if (RC == &NVPTX::V2I64RegsRegClass) {
> +    return "%v2rd";
> +  }
> +  else if (RC == &NVPTX::V2I16RegsRegClass) {
> +    return "%v2s";
> +  }
> +  else if (RC == &NVPTX::V4I16RegsRegClass) {
> +    return "%v4rs";
> +  }
> +  else if (RC == &NVPTX::V2I8RegsRegClass) {
> +    return "%v2rc";
> +  }
> +  else if (RC == &NVPTX::V4I8RegsRegClass) {
> +    return "%v4rc";
> +  }
> +  else {
> +    return "INTERNAL";
> +  }
> +  return "";
> +}
> +
> +bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
> +  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
> +    return true;
> +  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
> +    return true;
> +  return false;
> +}
> +
> +std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
> +  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
> +  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
> +  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
> +  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
> +  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
> +  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
> +    return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
> +  assert(0 && "Not a vector register class");
> +  return "Unsupported";
> +}
> +
> +const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
> +  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
> +    return (&NVPTX::Float32RegsRegClass);
> +  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
> +    return (&NVPTX::Float64RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
> +    return (&NVPTX::Int16RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
> +    return (&NVPTX::Int32RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
> +    return (&NVPTX::Int64RegsRegClass);
> +  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
> +    return (&NVPTX::Int8RegsRegClass);
> +  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
> +    return (&NVPTX::Float32RegsRegClass);
> +  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
> +    return (&NVPTX::Int16RegsRegClass);
> +  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
> +    return (&NVPTX::Int32RegsRegClass);
> +  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
> +    return (&NVPTX::Int8RegsRegClass);
> +  assert(0 && "Not a vector register class");
> +  return 0;
> +}
> +
> +int getNVPTXVectorSize(TargetRegisterClass const *RC) {
> +  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
> +    return 2;
> +  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
> +    return 2;
> +  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
> +    return 2;
> +  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
> +    return 2;
> +  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
> +    return 2;
> +  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
> +    return 2;
> +  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
> +    return 4;
> +  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
> +    return 4;
> +  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
> +    return 4;
> +  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
> +    return 4;
> +  assert(0 && "Not a vector register class");
> +  return -1;
> +}
> +}
> +
> +NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
> +                                     const NVPTXSubtarget &st)
> +: NVPTXGenRegisterInfo(0),
> +  TII(tii),
> +  ST(st) {
> +  Is64Bit = st.is64Bit();
> +}
> +
> +
> +#define GET_REGINFO_TARGET_DESC
> +#include "NVPTXGenRegisterInfo.inc"
> +
> +/// NVPTX Callee Saved Registers
> +const uint16_t* NVPTXRegisterInfo::
> +getCalleeSavedRegs(const MachineFunction *MF) const {
> +  static const uint16_t CalleeSavedRegs[] = { 0 };
> +  return CalleeSavedRegs;
> +}
> +
> +// NVPTX Callee Saved Reg Classes
> +const TargetRegisterClass* const*
> +NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
> +  static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
> +  return CalleeSavedRegClasses;
> +}
> +
> +BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> +  BitVector Reserved(getNumRegs());
> +  return Reserved;
> +}
> +
> +void NVPTXRegisterInfo::
> +eliminateFrameIndex(MachineBasicBlock::iterator II,
> +                    int SPAdj,
> +                    RegScavenger *RS) const {
> +  assert(SPAdj == 0 && "Unexpected");
> +
> +  unsigned i = 0;
> +  MachineInstr &MI = *II;
> +  while (!MI.getOperand(i).isFI()) {
> +    ++i;
> +    assert(i < MI.getNumOperands() &&
> +           "Instr doesn't have FrameIndex operand!");
> +  }
> +
> +  int FrameIndex = MI.getOperand(i).getIndex();
> +
> +  MachineFunction &MF = *MI.getParent()->getParent();
> +  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
> +      MI.getOperand(i+1).getImm();
> +
> +  // Using I0 as the frame pointer
> +  MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false);
> +  MI.getOperand(i+1).ChangeToImmediate(Offset);
> +}
> +
> +
> +int NVPTXRegisterInfo::
> +getDwarfRegNum(unsigned RegNum, bool isEH) const {
> +  return 0;
> +}
> +
> +unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
> +  return NVPTX::VRFrame;
> +}
> +
> +unsigned NVPTXRegisterInfo::getRARegister() const {
> +  return 0;
> +}
> +
> +// This function eliminates ADJCALLSTACKDOWN,
> +// ADJCALLSTACKUP pseudo instructions
> +void NVPTXRegisterInfo::
> +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
> +                              MachineBasicBlock::iterator I) const {
> +  // Simply discard ADJCALLSTACKDOWN,
> +  // ADJCALLSTACKUP instructions.
> +  MBB.erase(I);
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,94 @@
> +//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the NVPTX implementation of the TargetRegisterInfo class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXREGISTERINFO_H
> +#define NVPTXREGISTERINFO_H
> +
> +#include "ManagedStringPool.h"
> +#include "llvm/Target/TargetRegisterInfo.h"
> +
> +
> +#define GET_REGINFO_HEADER
> +#include "NVPTXGenRegisterInfo.inc"
> +#include "llvm/Target/TargetRegisterInfo.h"
> +#include <sstream>
> +
> +namespace llvm {
> +
> +// Forward Declarations.
> +class TargetInstrInfo;
> +class NVPTXSubtarget;
> +
> +class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
> +private:
> +  const TargetInstrInfo &TII;
> +  const NVPTXSubtarget &ST;
> +  bool Is64Bit;
> +  // Hold Strings that can be free'd all together with NVPTXRegisterInfo
> +  ManagedStringPool     ManagedStrPool;
> +
> +public:
> +  NVPTXRegisterInfo(const TargetInstrInfo &tii,
> +                    const NVPTXSubtarget &st);
> +
> +
> +  //------------------------------------------------------
> +  // Pure virtual functions from TargetRegisterInfo
> +  //------------------------------------------------------
> +
> +  // NVPTX callee saved registers
> +  virtual const uint16_t*
> +  getCalleeSavedRegs(const MachineFunction *MF = 0) const;
> +
> +  // NVPTX callee saved register classes
> +  virtual const TargetRegisterClass* const *
> +  getCalleeSavedRegClasses(const MachineFunction *MF) const;
> +
> +  virtual BitVector getReservedRegs(const MachineFunction &MF) const;
> +
> +  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
> +                                   int SPAdj,
> +                                   RegScavenger *RS=NULL) const;
> +
> +  void eliminateCallFramePseudoInstr(MachineFunction &MF,
> +                                     MachineBasicBlock &MBB,
> +                                     MachineBasicBlock::iterator I) const;
> +
> +  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
> +  virtual unsigned getFrameRegister(const MachineFunction &MF) const;
> +  virtual unsigned getRARegister() const;
> +
> +  ManagedStringPool *getStrPool() const {
> +    return const_cast<ManagedStringPool *>(&ManagedStrPool);
> +  }
> +
> +  const char *getName(unsigned RegNo) const {
> +    std::stringstream O;
> +    O << "reg" << RegNo;
> +    return getStrPool()->getManagedString(O.str().c_str())->c_str();
> +  }
> +
> +};
> +
> +
> +std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
> +std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
> +bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
> +std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
> +int getNVPTXVectorSize (const TargetRegisterClass *RC);
> +const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
> +
> +} // end namespace llvm
> +
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.td?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.td (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXRegisterInfo.td Fri May  4 15:18:50 2012
> @@ -0,0 +1,7235 @@
> +//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +//===----------------------------------------------------------------------===//
> +//  Declarations that describe the PTX register file
> +//===----------------------------------------------------------------------===//
> +
> +class NVPTXReg<string n> : Register<n> {
> +  let Namespace = "NVPTX";
> +}
> +
> +class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
> +     : RegisterClass <"NVPTX", regTypes, alignment, regList>;
> +
> +//===----------------------------------------------------------------------===//
> +//  Registers
> +//===----------------------------------------------------------------------===//
> +
> +// Special Registers used as stack pointer
> +def VRFrame         : NVPTXReg<"%SP">;
> +def VRFrameLocal    : NVPTXReg<"%SPL">;
> +
> +// Special Registers used as the stack
> +def VRDepot  : NVPTXReg<"%Depot">;
> +
> +//===--- Predicate --------------------------------------------------------===//
> +def P0 : NVPTXReg<"%p0">;
> +def P1 : NVPTXReg<"%p1">;
> +def P2 : NVPTXReg<"%p2">;
> +def P3 : NVPTXReg<"%p3">;
> +def P4 : NVPTXReg<"%p4">;
> +def P5 : NVPTXReg<"%p5">;
> +def P6 : NVPTXReg<"%p6">;
> +def P7 : NVPTXReg<"%p7">;
> +def P8 : NVPTXReg<"%p8">;
> +def P9 : NVPTXReg<"%p9">;
> +def P10 : NVPTXReg<"%p10">;
> +def P11 : NVPTXReg<"%p11">;
> +def P12 : NVPTXReg<"%p12">;
> +def P13 : NVPTXReg<"%p13">;
> +def P14 : NVPTXReg<"%p14">;
> +def P15 : NVPTXReg<"%p15">;
> +def P16 : NVPTXReg<"%p16">;
> +def P17 : NVPTXReg<"%p17">;
> +def P18 : NVPTXReg<"%p18">;
> +def P19 : NVPTXReg<"%p19">;
> +def P20 : NVPTXReg<"%p20">;
> +def P21 : NVPTXReg<"%p21">;
> +def P22 : NVPTXReg<"%p22">;
> +def P23 : NVPTXReg<"%p23">;
> +def P24 : NVPTXReg<"%p24">;
> +def P25 : NVPTXReg<"%p25">;
> +def P26 : NVPTXReg<"%p26">;
> +def P27 : NVPTXReg<"%p27">;
> +def P28 : NVPTXReg<"%p28">;
> +def P29 : NVPTXReg<"%p29">;
> +def P30 : NVPTXReg<"%p30">;
> +def P31 : NVPTXReg<"%p31">;
> +def P32 : NVPTXReg<"%p32">;
> +def P33 : NVPTXReg<"%p33">;
> +def P34 : NVPTXReg<"%p34">;
> +def P35 : NVPTXReg<"%p35">;
> +def P36 : NVPTXReg<"%p36">;
> +def P37 : NVPTXReg<"%p37">;
> +def P38 : NVPTXReg<"%p38">;
> +def P39 : NVPTXReg<"%p39">;
> +def P40 : NVPTXReg<"%p40">;
> +def P41 : NVPTXReg<"%p41">;
> +def P42 : NVPTXReg<"%p42">;
> +def P43 : NVPTXReg<"%p43">;
> +def P44 : NVPTXReg<"%p44">;
> +def P45 : NVPTXReg<"%p45">;
> +def P46 : NVPTXReg<"%p46">;
> +def P47 : NVPTXReg<"%p47">;
> +def P48 : NVPTXReg<"%p48">;
> +def P49 : NVPTXReg<"%p49">;
> +def P50 : NVPTXReg<"%p50">;
> +def P51 : NVPTXReg<"%p51">;
> +def P52 : NVPTXReg<"%p52">;
> +def P53 : NVPTXReg<"%p53">;
> +def P54 : NVPTXReg<"%p54">;
> +def P55 : NVPTXReg<"%p55">;
> +def P56 : NVPTXReg<"%p56">;
> +def P57 : NVPTXReg<"%p57">;
> +def P58 : NVPTXReg<"%p58">;
> +def P59 : NVPTXReg<"%p59">;
> +def P60 : NVPTXReg<"%p60">;
> +def P61 : NVPTXReg<"%p61">;
> +def P62 : NVPTXReg<"%p62">;
> +def P63 : NVPTXReg<"%p63">;
> +def P64 : NVPTXReg<"%p64">;
> +def P65 : NVPTXReg<"%p65">;
> +def P66 : NVPTXReg<"%p66">;
> +def P67 : NVPTXReg<"%p67">;
> +def P68 : NVPTXReg<"%p68">;
> +def P69 : NVPTXReg<"%p69">;
> +def P70 : NVPTXReg<"%p70">;
> +def P71 : NVPTXReg<"%p71">;
> +def P72 : NVPTXReg<"%p72">;
> +def P73 : NVPTXReg<"%p73">;
> +def P74 : NVPTXReg<"%p74">;
> +def P75 : NVPTXReg<"%p75">;
> +def P76 : NVPTXReg<"%p76">;
> +def P77 : NVPTXReg<"%p77">;
> +def P78 : NVPTXReg<"%p78">;
> +def P79 : NVPTXReg<"%p79">;
> +def P80 : NVPTXReg<"%p80">;
> +def P81 : NVPTXReg<"%p81">;
> +def P82 : NVPTXReg<"%p82">;
> +def P83 : NVPTXReg<"%p83">;
> +def P84 : NVPTXReg<"%p84">;
> +def P85 : NVPTXReg<"%p85">;
> +def P86 : NVPTXReg<"%p86">;
> +def P87 : NVPTXReg<"%p87">;
> +def P88 : NVPTXReg<"%p88">;
> +def P89 : NVPTXReg<"%p89">;
> +def P90 : NVPTXReg<"%p90">;
> +def P91 : NVPTXReg<"%p91">;
> +def P92 : NVPTXReg<"%p92">;
> +def P93 : NVPTXReg<"%p93">;
> +def P94 : NVPTXReg<"%p94">;
> +def P95 : NVPTXReg<"%p95">;
> +def P96 : NVPTXReg<"%p96">;
> +def P97 : NVPTXReg<"%p97">;
> +def P98 : NVPTXReg<"%p98">;
> +def P99 : NVPTXReg<"%p99">;
> +def P100 : NVPTXReg<"%p100">;
> +def P101 : NVPTXReg<"%p101">;
> +def P102 : NVPTXReg<"%p102">;
> +def P103 : NVPTXReg<"%p103">;
> +def P104 : NVPTXReg<"%p104">;
> +def P105 : NVPTXReg<"%p105">;
> +def P106 : NVPTXReg<"%p106">;
> +def P107 : NVPTXReg<"%p107">;
> +def P108 : NVPTXReg<"%p108">;
> +def P109 : NVPTXReg<"%p109">;
> +def P110 : NVPTXReg<"%p110">;
> +def P111 : NVPTXReg<"%p111">;
> +def P112 : NVPTXReg<"%p112">;
> +def P113 : NVPTXReg<"%p113">;
> +def P114 : NVPTXReg<"%p114">;
> +def P115 : NVPTXReg<"%p115">;
> +def P116 : NVPTXReg<"%p116">;
> +def P117 : NVPTXReg<"%p117">;
> +def P118 : NVPTXReg<"%p118">;
> +def P119 : NVPTXReg<"%p119">;
> +def P120 : NVPTXReg<"%p120">;
> +def P121 : NVPTXReg<"%p121">;
> +def P122 : NVPTXReg<"%p122">;
> +def P123 : NVPTXReg<"%p123">;
> +def P124 : NVPTXReg<"%p124">;
> +def P125 : NVPTXReg<"%p125">;
> +def P126 : NVPTXReg<"%p126">;
> +def P127 : NVPTXReg<"%p127">;
> +def P128 : NVPTXReg<"%p128">;
> +def P129 : NVPTXReg<"%p129">;
> +def P130 : NVPTXReg<"%p130">;
> +def P131 : NVPTXReg<"%p131">;
> +def P132 : NVPTXReg<"%p132">;
> +def P133 : NVPTXReg<"%p133">;
> +def P134 : NVPTXReg<"%p134">;
> +def P135 : NVPTXReg<"%p135">;
> +def P136 : NVPTXReg<"%p136">;
> +def P137 : NVPTXReg<"%p137">;
> +def P138 : NVPTXReg<"%p138">;
> +def P139 : NVPTXReg<"%p139">;
> +def P140 : NVPTXReg<"%p140">;
> +def P141 : NVPTXReg<"%p141">;
> +def P142 : NVPTXReg<"%p142">;
> +def P143 : NVPTXReg<"%p143">;
> +def P144 : NVPTXReg<"%p144">;
> +def P145 : NVPTXReg<"%p145">;
> +def P146 : NVPTXReg<"%p146">;
> +def P147 : NVPTXReg<"%p147">;
> +def P148 : NVPTXReg<"%p148">;
> +def P149 : NVPTXReg<"%p149">;
> +def P150 : NVPTXReg<"%p150">;
> +def P151 : NVPTXReg<"%p151">;
> +def P152 : NVPTXReg<"%p152">;
> +def P153 : NVPTXReg<"%p153">;
> +def P154 : NVPTXReg<"%p154">;
> +def P155 : NVPTXReg<"%p155">;
> +def P156 : NVPTXReg<"%p156">;
> +def P157 : NVPTXReg<"%p157">;
> +def P158 : NVPTXReg<"%p158">;
> +def P159 : NVPTXReg<"%p159">;
> +def P160 : NVPTXReg<"%p160">;
> +def P161 : NVPTXReg<"%p161">;
> +def P162 : NVPTXReg<"%p162">;
> +def P163 : NVPTXReg<"%p163">;
> +def P164 : NVPTXReg<"%p164">;
> +def P165 : NVPTXReg<"%p165">;
> +def P166 : NVPTXReg<"%p166">;
> +def P167 : NVPTXReg<"%p167">;
> +def P168 : NVPTXReg<"%p168">;
> +def P169 : NVPTXReg<"%p169">;
> +def P170 : NVPTXReg<"%p170">;
> +def P171 : NVPTXReg<"%p171">;
> +def P172 : NVPTXReg<"%p172">;
> +def P173 : NVPTXReg<"%p173">;
> +def P174 : NVPTXReg<"%p174">;
> +def P175 : NVPTXReg<"%p175">;
> +def P176 : NVPTXReg<"%p176">;
> +def P177 : NVPTXReg<"%p177">;
> +def P178 : NVPTXReg<"%p178">;
> +def P179 : NVPTXReg<"%p179">;
> +def P180 : NVPTXReg<"%p180">;
> +def P181 : NVPTXReg<"%p181">;
> +def P182 : NVPTXReg<"%p182">;
> +def P183 : NVPTXReg<"%p183">;
> +def P184 : NVPTXReg<"%p184">;
> +def P185 : NVPTXReg<"%p185">;
> +def P186 : NVPTXReg<"%p186">;
> +def P187 : NVPTXReg<"%p187">;
> +def P188 : NVPTXReg<"%p188">;
> +def P189 : NVPTXReg<"%p189">;
> +def P190 : NVPTXReg<"%p190">;
> +def P191 : NVPTXReg<"%p191">;
> +def P192 : NVPTXReg<"%p192">;
> +def P193 : NVPTXReg<"%p193">;
> +def P194 : NVPTXReg<"%p194">;
> +def P195 : NVPTXReg<"%p195">;
> +def P196 : NVPTXReg<"%p196">;
> +def P197 : NVPTXReg<"%p197">;
> +def P198 : NVPTXReg<"%p198">;
> +def P199 : NVPTXReg<"%p199">;
> +def P200 : NVPTXReg<"%p200">;
> +def P201 : NVPTXReg<"%p201">;
> +def P202 : NVPTXReg<"%p202">;
> +def P203 : NVPTXReg<"%p203">;
> +def P204 : NVPTXReg<"%p204">;
> +def P205 : NVPTXReg<"%p205">;
> +def P206 : NVPTXReg<"%p206">;
> +def P207 : NVPTXReg<"%p207">;
> +def P208 : NVPTXReg<"%p208">;
> +def P209 : NVPTXReg<"%p209">;
> +def P210 : NVPTXReg<"%p210">;
> +def P211 : NVPTXReg<"%p211">;
> +def P212 : NVPTXReg<"%p212">;
> +def P213 : NVPTXReg<"%p213">;
> +def P214 : NVPTXReg<"%p214">;
> +def P215 : NVPTXReg<"%p215">;
> +def P216 : NVPTXReg<"%p216">;
> +def P217 : NVPTXReg<"%p217">;
> +def P218 : NVPTXReg<"%p218">;
> +def P219 : NVPTXReg<"%p219">;
> +def P220 : NVPTXReg<"%p220">;
> +def P221 : NVPTXReg<"%p221">;
> +def P222 : NVPTXReg<"%p222">;
> +def P223 : NVPTXReg<"%p223">;
> +def P224 : NVPTXReg<"%p224">;
> +def P225 : NVPTXReg<"%p225">;
> +def P226 : NVPTXReg<"%p226">;
> +def P227 : NVPTXReg<"%p227">;
> +def P228 : NVPTXReg<"%p228">;
> +def P229 : NVPTXReg<"%p229">;
> +def P230 : NVPTXReg<"%p230">;
> +def P231 : NVPTXReg<"%p231">;
> +def P232 : NVPTXReg<"%p232">;
> +def P233 : NVPTXReg<"%p233">;
> +def P234 : NVPTXReg<"%p234">;
> +def P235 : NVPTXReg<"%p235">;
> +def P236 : NVPTXReg<"%p236">;
> +def P237 : NVPTXReg<"%p237">;
> +def P238 : NVPTXReg<"%p238">;
> +def P239 : NVPTXReg<"%p239">;
> +def P240 : NVPTXReg<"%p240">;
> +def P241 : NVPTXReg<"%p241">;
> +def P242 : NVPTXReg<"%p242">;
> +def P243 : NVPTXReg<"%p243">;
> +def P244 : NVPTXReg<"%p244">;
> +def P245 : NVPTXReg<"%p245">;
> +def P246 : NVPTXReg<"%p246">;
> +def P247 : NVPTXReg<"%p247">;
> +def P248 : NVPTXReg<"%p248">;
> +def P249 : NVPTXReg<"%p249">;
> +def P250 : NVPTXReg<"%p250">;
> +def P251 : NVPTXReg<"%p251">;
> +def P252 : NVPTXReg<"%p252">;
> +def P253 : NVPTXReg<"%p253">;
> +def P254 : NVPTXReg<"%p254">;
> +def P255 : NVPTXReg<"%p255">;
> +def P256 : NVPTXReg<"%p256">;
> +def P257 : NVPTXReg<"%p257">;
> +def P258 : NVPTXReg<"%p258">;
> +def P259 : NVPTXReg<"%p259">;
> +def P260 : NVPTXReg<"%p260">;
> +def P261 : NVPTXReg<"%p261">;
> +def P262 : NVPTXReg<"%p262">;
> +def P263 : NVPTXReg<"%p263">;
> +def P264 : NVPTXReg<"%p264">;
> +def P265 : NVPTXReg<"%p265">;
> +def P266 : NVPTXReg<"%p266">;
> +def P267 : NVPTXReg<"%p267">;
> +def P268 : NVPTXReg<"%p268">;
> +def P269 : NVPTXReg<"%p269">;
> +def P270 : NVPTXReg<"%p270">;
> +def P271 : NVPTXReg<"%p271">;
> +def P272 : NVPTXReg<"%p272">;
> +def P273 : NVPTXReg<"%p273">;
> +def P274 : NVPTXReg<"%p274">;
> +def P275 : NVPTXReg<"%p275">;
> +def P276 : NVPTXReg<"%p276">;
> +def P277 : NVPTXReg<"%p277">;
> +def P278 : NVPTXReg<"%p278">;
> +def P279 : NVPTXReg<"%p279">;
> +def P280 : NVPTXReg<"%p280">;
> +def P281 : NVPTXReg<"%p281">;
> +def P282 : NVPTXReg<"%p282">;
> +def P283 : NVPTXReg<"%p283">;
> +def P284 : NVPTXReg<"%p284">;
> +def P285 : NVPTXReg<"%p285">;
> +def P286 : NVPTXReg<"%p286">;
> +def P287 : NVPTXReg<"%p287">;
> +def P288 : NVPTXReg<"%p288">;
> +def P289 : NVPTXReg<"%p289">;
> +def P290 : NVPTXReg<"%p290">;
> +def P291 : NVPTXReg<"%p291">;
> +def P292 : NVPTXReg<"%p292">;
> +def P293 : NVPTXReg<"%p293">;
> +def P294 : NVPTXReg<"%p294">;
> +def P295 : NVPTXReg<"%p295">;
> +def P296 : NVPTXReg<"%p296">;
> +def P297 : NVPTXReg<"%p297">;
> +def P298 : NVPTXReg<"%p298">;
> +def P299 : NVPTXReg<"%p299">;
> +def P300 : NVPTXReg<"%p300">;
> +def P301 : NVPTXReg<"%p301">;
> +def P302 : NVPTXReg<"%p302">;
> +def P303 : NVPTXReg<"%p303">;
> +def P304 : NVPTXReg<"%p304">;
> +def P305 : NVPTXReg<"%p305">;
> +def P306 : NVPTXReg<"%p306">;
> +def P307 : NVPTXReg<"%p307">;
> +def P308 : NVPTXReg<"%p308">;
> +def P309 : NVPTXReg<"%p309">;
> +def P310 : NVPTXReg<"%p310">;
> +def P311 : NVPTXReg<"%p311">;
> +def P312 : NVPTXReg<"%p312">;
> +def P313 : NVPTXReg<"%p313">;
> +def P314 : NVPTXReg<"%p314">;
> +def P315 : NVPTXReg<"%p315">;
> +def P316 : NVPTXReg<"%p316">;
> +def P317 : NVPTXReg<"%p317">;
> +def P318 : NVPTXReg<"%p318">;
> +def P319 : NVPTXReg<"%p319">;
> +def P320 : NVPTXReg<"%p320">;
> +def P321 : NVPTXReg<"%p321">;
> +def P322 : NVPTXReg<"%p322">;
> +def P323 : NVPTXReg<"%p323">;
> +def P324 : NVPTXReg<"%p324">;
> +def P325 : NVPTXReg<"%p325">;
> +def P326 : NVPTXReg<"%p326">;
> +def P327 : NVPTXReg<"%p327">;
> +def P328 : NVPTXReg<"%p328">;
> +def P329 : NVPTXReg<"%p329">;
> +def P330 : NVPTXReg<"%p330">;
> +def P331 : NVPTXReg<"%p331">;
> +def P332 : NVPTXReg<"%p332">;
> +def P333 : NVPTXReg<"%p333">;
> +def P334 : NVPTXReg<"%p334">;
> +def P335 : NVPTXReg<"%p335">;
> +def P336 : NVPTXReg<"%p336">;
> +def P337 : NVPTXReg<"%p337">;
> +def P338 : NVPTXReg<"%p338">;
> +def P339 : NVPTXReg<"%p339">;
> +def P340 : NVPTXReg<"%p340">;
> +def P341 : NVPTXReg<"%p341">;
> +def P342 : NVPTXReg<"%p342">;
> +def P343 : NVPTXReg<"%p343">;
> +def P344 : NVPTXReg<"%p344">;
> +def P345 : NVPTXReg<"%p345">;
> +def P346 : NVPTXReg<"%p346">;
> +def P347 : NVPTXReg<"%p347">;
> +def P348 : NVPTXReg<"%p348">;
> +def P349 : NVPTXReg<"%p349">;
> +def P350 : NVPTXReg<"%p350">;
> +def P351 : NVPTXReg<"%p351">;
> +def P352 : NVPTXReg<"%p352">;
> +def P353 : NVPTXReg<"%p353">;
> +def P354 : NVPTXReg<"%p354">;
> +def P355 : NVPTXReg<"%p355">;
> +def P356 : NVPTXReg<"%p356">;
> +def P357 : NVPTXReg<"%p357">;
> +def P358 : NVPTXReg<"%p358">;
> +def P359 : NVPTXReg<"%p359">;
> +def P360 : NVPTXReg<"%p360">;
> +def P361 : NVPTXReg<"%p361">;
> +def P362 : NVPTXReg<"%p362">;
> +def P363 : NVPTXReg<"%p363">;
> +def P364 : NVPTXReg<"%p364">;
> +def P365 : NVPTXReg<"%p365">;
> +def P366 : NVPTXReg<"%p366">;
> +def P367 : NVPTXReg<"%p367">;
> +def P368 : NVPTXReg<"%p368">;
> +def P369 : NVPTXReg<"%p369">;
> +def P370 : NVPTXReg<"%p370">;
> +def P371 : NVPTXReg<"%p371">;
> +def P372 : NVPTXReg<"%p372">;
> +def P373 : NVPTXReg<"%p373">;
> +def P374 : NVPTXReg<"%p374">;
> +def P375 : NVPTXReg<"%p375">;
> +def P376 : NVPTXReg<"%p376">;
> +def P377 : NVPTXReg<"%p377">;
> +def P378 : NVPTXReg<"%p378">;
> +def P379 : NVPTXReg<"%p379">;
> +def P380 : NVPTXReg<"%p380">;
> +def P381 : NVPTXReg<"%p381">;
> +def P382 : NVPTXReg<"%p382">;
> +def P383 : NVPTXReg<"%p383">;
> +def P384 : NVPTXReg<"%p384">;
> +def P385 : NVPTXReg<"%p385">;
> +def P386 : NVPTXReg<"%p386">;
> +def P387 : NVPTXReg<"%p387">;
> +def P388 : NVPTXReg<"%p388">;
> +def P389 : NVPTXReg<"%p389">;
> +def P390 : NVPTXReg<"%p390">;
> +def P391 : NVPTXReg<"%p391">;
> +def P392 : NVPTXReg<"%p392">;
> +def P393 : NVPTXReg<"%p393">;
> +def P394 : NVPTXReg<"%p394">;
> +def P395 : NVPTXReg<"%p395">;
> +
> +//===--- 8-bit ------------------------------------------------------------===//
> +def RC0 : NVPTXReg<"%rc0">;
> +def RC1 : NVPTXReg<"%rc1">;
> +def RC2 : NVPTXReg<"%rc2">;
> +def RC3 : NVPTXReg<"%rc3">;
> +def RC4 : NVPTXReg<"%rc4">;
> +def RC5 : NVPTXReg<"%rc5">;
> +def RC6 : NVPTXReg<"%rc6">;
> +def RC7 : NVPTXReg<"%rc7">;
> +def RC8 : NVPTXReg<"%rc8">;
> +def RC9 : NVPTXReg<"%rc9">;
> +def RC10 : NVPTXReg<"%rc10">;
> +def RC11 : NVPTXReg<"%rc11">;
> +def RC12 : NVPTXReg<"%rc12">;
> +def RC13 : NVPTXReg<"%rc13">;
> +def RC14 : NVPTXReg<"%rc14">;
> +def RC15 : NVPTXReg<"%rc15">;
> +def RC16 : NVPTXReg<"%rc16">;
> +def RC17 : NVPTXReg<"%rc17">;
> +def RC18 : NVPTXReg<"%rc18">;
> +def RC19 : NVPTXReg<"%rc19">;
> +def RC20 : NVPTXReg<"%rc20">;
> +def RC21 : NVPTXReg<"%rc21">;
> +def RC22 : NVPTXReg<"%rc22">;
> +def RC23 : NVPTXReg<"%rc23">;
> +def RC24 : NVPTXReg<"%rc24">;
> +def RC25 : NVPTXReg<"%rc25">;
> +def RC26 : NVPTXReg<"%rc26">;
> +def RC27 : NVPTXReg<"%rc27">;
> +def RC28 : NVPTXReg<"%rc28">;
> +def RC29 : NVPTXReg<"%rc29">;
> +def RC30 : NVPTXReg<"%rc30">;
> +def RC31 : NVPTXReg<"%rc31">;
> +def RC32 : NVPTXReg<"%rc32">;
> +def RC33 : NVPTXReg<"%rc33">;
> +def RC34 : NVPTXReg<"%rc34">;
> +def RC35 : NVPTXReg<"%rc35">;
> +def RC36 : NVPTXReg<"%rc36">;
> +def RC37 : NVPTXReg<"%rc37">;
> +def RC38 : NVPTXReg<"%rc38">;
> +def RC39 : NVPTXReg<"%rc39">;
> +def RC40 : NVPTXReg<"%rc40">;
> +def RC41 : NVPTXReg<"%rc41">;
> +def RC42 : NVPTXReg<"%rc42">;
> +def RC43 : NVPTXReg<"%rc43">;
> +def RC44 : NVPTXReg<"%rc44">;
> +def RC45 : NVPTXReg<"%rc45">;
> +def RC46 : NVPTXReg<"%rc46">;
> +def RC47 : NVPTXReg<"%rc47">;
> +def RC48 : NVPTXReg<"%rc48">;
> +def RC49 : NVPTXReg<"%rc49">;
> +def RC50 : NVPTXReg<"%rc50">;
> +def RC51 : NVPTXReg<"%rc51">;
> +def RC52 : NVPTXReg<"%rc52">;
> +def RC53 : NVPTXReg<"%rc53">;
> +def RC54 : NVPTXReg<"%rc54">;
> +def RC55 : NVPTXReg<"%rc55">;
> +def RC56 : NVPTXReg<"%rc56">;
> +def RC57 : NVPTXReg<"%rc57">;
> +def RC58 : NVPTXReg<"%rc58">;
> +def RC59 : NVPTXReg<"%rc59">;
> +def RC60 : NVPTXReg<"%rc60">;
> +def RC61 : NVPTXReg<"%rc61">;
> +def RC62 : NVPTXReg<"%rc62">;
> +def RC63 : NVPTXReg<"%rc63">;
> +def RC64 : NVPTXReg<"%rc64">;
> +def RC65 : NVPTXReg<"%rc65">;
> +def RC66 : NVPTXReg<"%rc66">;
> +def RC67 : NVPTXReg<"%rc67">;
> +def RC68 : NVPTXReg<"%rc68">;
> +def RC69 : NVPTXReg<"%rc69">;
> +def RC70 : NVPTXReg<"%rc70">;
> +def RC71 : NVPTXReg<"%rc71">;
> +def RC72 : NVPTXReg<"%rc72">;
> +def RC73 : NVPTXReg<"%rc73">;
> +def RC74 : NVPTXReg<"%rc74">;
> +def RC75 : NVPTXReg<"%rc75">;
> +def RC76 : NVPTXReg<"%rc76">;
> +def RC77 : NVPTXReg<"%rc77">;
> +def RC78 : NVPTXReg<"%rc78">;
> +def RC79 : NVPTXReg<"%rc79">;
> +def RC80 : NVPTXReg<"%rc80">;
> +def RC81 : NVPTXReg<"%rc81">;
> +def RC82 : NVPTXReg<"%rc82">;
> +def RC83 : NVPTXReg<"%rc83">;
> +def RC84 : NVPTXReg<"%rc84">;
> +def RC85 : NVPTXReg<"%rc85">;
> +def RC86 : NVPTXReg<"%rc86">;
> +def RC87 : NVPTXReg<"%rc87">;
> +def RC88 : NVPTXReg<"%rc88">;
> +def RC89 : NVPTXReg<"%rc89">;
> +def RC90 : NVPTXReg<"%rc90">;
> +def RC91 : NVPTXReg<"%rc91">;
> +def RC92 : NVPTXReg<"%rc92">;
> +def RC93 : NVPTXReg<"%rc93">;
> +def RC94 : NVPTXReg<"%rc94">;
> +def RC95 : NVPTXReg<"%rc95">;
> +def RC96 : NVPTXReg<"%rc96">;
> +def RC97 : NVPTXReg<"%rc97">;
> +def RC98 : NVPTXReg<"%rc98">;
> +def RC99 : NVPTXReg<"%rc99">;
> +def RC100 : NVPTXReg<"%rc100">;
> +def RC101 : NVPTXReg<"%rc101">;
> +def RC102 : NVPTXReg<"%rc102">;
> +def RC103 : NVPTXReg<"%rc103">;
> +def RC104 : NVPTXReg<"%rc104">;
> +def RC105 : NVPTXReg<"%rc105">;
> +def RC106 : NVPTXReg<"%rc106">;
> +def RC107 : NVPTXReg<"%rc107">;
> +def RC108 : NVPTXReg<"%rc108">;
> +def RC109 : NVPTXReg<"%rc109">;
> +def RC110 : NVPTXReg<"%rc110">;
> +def RC111 : NVPTXReg<"%rc111">;
> +def RC112 : NVPTXReg<"%rc112">;
> +def RC113 : NVPTXReg<"%rc113">;
> +def RC114 : NVPTXReg<"%rc114">;
> +def RC115 : NVPTXReg<"%rc115">;
> +def RC116 : NVPTXReg<"%rc116">;
> +def RC117 : NVPTXReg<"%rc117">;
> +def RC118 : NVPTXReg<"%rc118">;
> +def RC119 : NVPTXReg<"%rc119">;
> +def RC120 : NVPTXReg<"%rc120">;
> +def RC121 : NVPTXReg<"%rc121">;
> +def RC122 : NVPTXReg<"%rc122">;
> +def RC123 : NVPTXReg<"%rc123">;
> +def RC124 : NVPTXReg<"%rc124">;
> +def RC125 : NVPTXReg<"%rc125">;
> +def RC126 : NVPTXReg<"%rc126">;
> +def RC127 : NVPTXReg<"%rc127">;
> +def RC128 : NVPTXReg<"%rc128">;
> +def RC129 : NVPTXReg<"%rc129">;
> +def RC130 : NVPTXReg<"%rc130">;
> +def RC131 : NVPTXReg<"%rc131">;
> +def RC132 : NVPTXReg<"%rc132">;
> +def RC133 : NVPTXReg<"%rc133">;
> +def RC134 : NVPTXReg<"%rc134">;
> +def RC135 : NVPTXReg<"%rc135">;
> +def RC136 : NVPTXReg<"%rc136">;
> +def RC137 : NVPTXReg<"%rc137">;
> +def RC138 : NVPTXReg<"%rc138">;
> +def RC139 : NVPTXReg<"%rc139">;
> +def RC140 : NVPTXReg<"%rc140">;
> +def RC141 : NVPTXReg<"%rc141">;
> +def RC142 : NVPTXReg<"%rc142">;
> +def RC143 : NVPTXReg<"%rc143">;
> +def RC144 : NVPTXReg<"%rc144">;
> +def RC145 : NVPTXReg<"%rc145">;
> +def RC146 : NVPTXReg<"%rc146">;
> +def RC147 : NVPTXReg<"%rc147">;
> +def RC148 : NVPTXReg<"%rc148">;
> +def RC149 : NVPTXReg<"%rc149">;
> +def RC150 : NVPTXReg<"%rc150">;
> +def RC151 : NVPTXReg<"%rc151">;
> +def RC152 : NVPTXReg<"%rc152">;
> +def RC153 : NVPTXReg<"%rc153">;
> +def RC154 : NVPTXReg<"%rc154">;
> +def RC155 : NVPTXReg<"%rc155">;
> +def RC156 : NVPTXReg<"%rc156">;
> +def RC157 : NVPTXReg<"%rc157">;
> +def RC158 : NVPTXReg<"%rc158">;
> +def RC159 : NVPTXReg<"%rc159">;
> +def RC160 : NVPTXReg<"%rc160">;
> +def RC161 : NVPTXReg<"%rc161">;
> +def RC162 : NVPTXReg<"%rc162">;
> +def RC163 : NVPTXReg<"%rc163">;
> +def RC164 : NVPTXReg<"%rc164">;
> +def RC165 : NVPTXReg<"%rc165">;
> +def RC166 : NVPTXReg<"%rc166">;
> +def RC167 : NVPTXReg<"%rc167">;
> +def RC168 : NVPTXReg<"%rc168">;
> +def RC169 : NVPTXReg<"%rc169">;
> +def RC170 : NVPTXReg<"%rc170">;
> +def RC171 : NVPTXReg<"%rc171">;
> +def RC172 : NVPTXReg<"%rc172">;
> +def RC173 : NVPTXReg<"%rc173">;
> +def RC174 : NVPTXReg<"%rc174">;
> +def RC175 : NVPTXReg<"%rc175">;
> +def RC176 : NVPTXReg<"%rc176">;
> +def RC177 : NVPTXReg<"%rc177">;
> +def RC178 : NVPTXReg<"%rc178">;
> +def RC179 : NVPTXReg<"%rc179">;
> +def RC180 : NVPTXReg<"%rc180">;
> +def RC181 : NVPTXReg<"%rc181">;
> +def RC182 : NVPTXReg<"%rc182">;
> +def RC183 : NVPTXReg<"%rc183">;
> +def RC184 : NVPTXReg<"%rc184">;
> +def RC185 : NVPTXReg<"%rc185">;
> +def RC186 : NVPTXReg<"%rc186">;
> +def RC187 : NVPTXReg<"%rc187">;
> +def RC188 : NVPTXReg<"%rc188">;
> +def RC189 : NVPTXReg<"%rc189">;
> +def RC190 : NVPTXReg<"%rc190">;
> +def RC191 : NVPTXReg<"%rc191">;
> +def RC192 : NVPTXReg<"%rc192">;
> +def RC193 : NVPTXReg<"%rc193">;
> +def RC194 : NVPTXReg<"%rc194">;
> +def RC195 : NVPTXReg<"%rc195">;
> +def RC196 : NVPTXReg<"%rc196">;
> +def RC197 : NVPTXReg<"%rc197">;
> +def RC198 : NVPTXReg<"%rc198">;
> +def RC199 : NVPTXReg<"%rc199">;
> +def RC200 : NVPTXReg<"%rc200">;
> +def RC201 : NVPTXReg<"%rc201">;
> +def RC202 : NVPTXReg<"%rc202">;
> +def RC203 : NVPTXReg<"%rc203">;
> +def RC204 : NVPTXReg<"%rc204">;
> +def RC205 : NVPTXReg<"%rc205">;
> +def RC206 : NVPTXReg<"%rc206">;
> +def RC207 : NVPTXReg<"%rc207">;
> +def RC208 : NVPTXReg<"%rc208">;
> +def RC209 : NVPTXReg<"%rc209">;
> +def RC210 : NVPTXReg<"%rc210">;
> +def RC211 : NVPTXReg<"%rc211">;
> +def RC212 : NVPTXReg<"%rc212">;
> +def RC213 : NVPTXReg<"%rc213">;
> +def RC214 : NVPTXReg<"%rc214">;
> +def RC215 : NVPTXReg<"%rc215">;
> +def RC216 : NVPTXReg<"%rc216">;
> +def RC217 : NVPTXReg<"%rc217">;
> +def RC218 : NVPTXReg<"%rc218">;
> +def RC219 : NVPTXReg<"%rc219">;
> +def RC220 : NVPTXReg<"%rc220">;
> +def RC221 : NVPTXReg<"%rc221">;
> +def RC222 : NVPTXReg<"%rc222">;
> +def RC223 : NVPTXReg<"%rc223">;
> +def RC224 : NVPTXReg<"%rc224">;
> +def RC225 : NVPTXReg<"%rc225">;
> +def RC226 : NVPTXReg<"%rc226">;
> +def RC227 : NVPTXReg<"%rc227">;
> +def RC228 : NVPTXReg<"%rc228">;
> +def RC229 : NVPTXReg<"%rc229">;
> +def RC230 : NVPTXReg<"%rc230">;
> +def RC231 : NVPTXReg<"%rc231">;
> +def RC232 : NVPTXReg<"%rc232">;
> +def RC233 : NVPTXReg<"%rc233">;
> +def RC234 : NVPTXReg<"%rc234">;
> +def RC235 : NVPTXReg<"%rc235">;
> +def RC236 : NVPTXReg<"%rc236">;
> +def RC237 : NVPTXReg<"%rc237">;
> +def RC238 : NVPTXReg<"%rc238">;
> +def RC239 : NVPTXReg<"%rc239">;
> +def RC240 : NVPTXReg<"%rc240">;
> +def RC241 : NVPTXReg<"%rc241">;
> +def RC242 : NVPTXReg<"%rc242">;
> +def RC243 : NVPTXReg<"%rc243">;
> +def RC244 : NVPTXReg<"%rc244">;
> +def RC245 : NVPTXReg<"%rc245">;
> +def RC246 : NVPTXReg<"%rc246">;
> +def RC247 : NVPTXReg<"%rc247">;
> +def RC248 : NVPTXReg<"%rc248">;
> +def RC249 : NVPTXReg<"%rc249">;
> +def RC250 : NVPTXReg<"%rc250">;
> +def RC251 : NVPTXReg<"%rc251">;
> +def RC252 : NVPTXReg<"%rc252">;
> +def RC253 : NVPTXReg<"%rc253">;
> +def RC254 : NVPTXReg<"%rc254">;
> +def RC255 : NVPTXReg<"%rc255">;
> +def RC256 : NVPTXReg<"%rc256">;
> +def RC257 : NVPTXReg<"%rc257">;
> +def RC258 : NVPTXReg<"%rc258">;
> +def RC259 : NVPTXReg<"%rc259">;
> +def RC260 : NVPTXReg<"%rc260">;
> +def RC261 : NVPTXReg<"%rc261">;
> +def RC262 : NVPTXReg<"%rc262">;
> +def RC263 : NVPTXReg<"%rc263">;
> +def RC264 : NVPTXReg<"%rc264">;
> +def RC265 : NVPTXReg<"%rc265">;
> +def RC266 : NVPTXReg<"%rc266">;
> +def RC267 : NVPTXReg<"%rc267">;
> +def RC268 : NVPTXReg<"%rc268">;
> +def RC269 : NVPTXReg<"%rc269">;
> +def RC270 : NVPTXReg<"%rc270">;
> +def RC271 : NVPTXReg<"%rc271">;
> +def RC272 : NVPTXReg<"%rc272">;
> +def RC273 : NVPTXReg<"%rc273">;
> +def RC274 : NVPTXReg<"%rc274">;
> +def RC275 : NVPTXReg<"%rc275">;
> +def RC276 : NVPTXReg<"%rc276">;
> +def RC277 : NVPTXReg<"%rc277">;
> +def RC278 : NVPTXReg<"%rc278">;
> +def RC279 : NVPTXReg<"%rc279">;
> +def RC280 : NVPTXReg<"%rc280">;
> +def RC281 : NVPTXReg<"%rc281">;
> +def RC282 : NVPTXReg<"%rc282">;
> +def RC283 : NVPTXReg<"%rc283">;
> +def RC284 : NVPTXReg<"%rc284">;
> +def RC285 : NVPTXReg<"%rc285">;
> +def RC286 : NVPTXReg<"%rc286">;
> +def RC287 : NVPTXReg<"%rc287">;
> +def RC288 : NVPTXReg<"%rc288">;
> +def RC289 : NVPTXReg<"%rc289">;
> +def RC290 : NVPTXReg<"%rc290">;
> +def RC291 : NVPTXReg<"%rc291">;
> +def RC292 : NVPTXReg<"%rc292">;
> +def RC293 : NVPTXReg<"%rc293">;
> +def RC294 : NVPTXReg<"%rc294">;
> +def RC295 : NVPTXReg<"%rc295">;
> +def RC296 : NVPTXReg<"%rc296">;
> +def RC297 : NVPTXReg<"%rc297">;
> +def RC298 : NVPTXReg<"%rc298">;
> +def RC299 : NVPTXReg<"%rc299">;
> +def RC300 : NVPTXReg<"%rc300">;
> +def RC301 : NVPTXReg<"%rc301">;
> +def RC302 : NVPTXReg<"%rc302">;
> +def RC303 : NVPTXReg<"%rc303">;
> +def RC304 : NVPTXReg<"%rc304">;
> +def RC305 : NVPTXReg<"%rc305">;
> +def RC306 : NVPTXReg<"%rc306">;
> +def RC307 : NVPTXReg<"%rc307">;
> +def RC308 : NVPTXReg<"%rc308">;
> +def RC309 : NVPTXReg<"%rc309">;
> +def RC310 : NVPTXReg<"%rc310">;
> +def RC311 : NVPTXReg<"%rc311">;
> +def RC312 : NVPTXReg<"%rc312">;
> +def RC313 : NVPTXReg<"%rc313">;
> +def RC314 : NVPTXReg<"%rc314">;
> +def RC315 : NVPTXReg<"%rc315">;
> +def RC316 : NVPTXReg<"%rc316">;
> +def RC317 : NVPTXReg<"%rc317">;
> +def RC318 : NVPTXReg<"%rc318">;
> +def RC319 : NVPTXReg<"%rc319">;
> +def RC320 : NVPTXReg<"%rc320">;
> +def RC321 : NVPTXReg<"%rc321">;
> +def RC322 : NVPTXReg<"%rc322">;
> +def RC323 : NVPTXReg<"%rc323">;
> +def RC324 : NVPTXReg<"%rc324">;
> +def RC325 : NVPTXReg<"%rc325">;
> +def RC326 : NVPTXReg<"%rc326">;
> +def RC327 : NVPTXReg<"%rc327">;
> +def RC328 : NVPTXReg<"%rc328">;
> +def RC329 : NVPTXReg<"%rc329">;
> +def RC330 : NVPTXReg<"%rc330">;
> +def RC331 : NVPTXReg<"%rc331">;
> +def RC332 : NVPTXReg<"%rc332">;
> +def RC333 : NVPTXReg<"%rc333">;
> +def RC334 : NVPTXReg<"%rc334">;
> +def RC335 : NVPTXReg<"%rc335">;
> +def RC336 : NVPTXReg<"%rc336">;
> +def RC337 : NVPTXReg<"%rc337">;
> +def RC338 : NVPTXReg<"%rc338">;
> +def RC339 : NVPTXReg<"%rc339">;
> +def RC340 : NVPTXReg<"%rc340">;
> +def RC341 : NVPTXReg<"%rc341">;
> +def RC342 : NVPTXReg<"%rc342">;
> +def RC343 : NVPTXReg<"%rc343">;
> +def RC344 : NVPTXReg<"%rc344">;
> +def RC345 : NVPTXReg<"%rc345">;
> +def RC346 : NVPTXReg<"%rc346">;
> +def RC347 : NVPTXReg<"%rc347">;
> +def RC348 : NVPTXReg<"%rc348">;
> +def RC349 : NVPTXReg<"%rc349">;
> +def RC350 : NVPTXReg<"%rc350">;
> +def RC351 : NVPTXReg<"%rc351">;
> +def RC352 : NVPTXReg<"%rc352">;
> +def RC353 : NVPTXReg<"%rc353">;
> +def RC354 : NVPTXReg<"%rc354">;
> +def RC355 : NVPTXReg<"%rc355">;
> +def RC356 : NVPTXReg<"%rc356">;
> +def RC357 : NVPTXReg<"%rc357">;
> +def RC358 : NVPTXReg<"%rc358">;
> +def RC359 : NVPTXReg<"%rc359">;
> +def RC360 : NVPTXReg<"%rc360">;
> +def RC361 : NVPTXReg<"%rc361">;
> +def RC362 : NVPTXReg<"%rc362">;
> +def RC363 : NVPTXReg<"%rc363">;
> +def RC364 : NVPTXReg<"%rc364">;
> +def RC365 : NVPTXReg<"%rc365">;
> +def RC366 : NVPTXReg<"%rc366">;
> +def RC367 : NVPTXReg<"%rc367">;
> +def RC368 : NVPTXReg<"%rc368">;
> +def RC369 : NVPTXReg<"%rc369">;
> +def RC370 : NVPTXReg<"%rc370">;
> +def RC371 : NVPTXReg<"%rc371">;
> +def RC372 : NVPTXReg<"%rc372">;
> +def RC373 : NVPTXReg<"%rc373">;
> +def RC374 : NVPTXReg<"%rc374">;
> +def RC375 : NVPTXReg<"%rc375">;
> +def RC376 : NVPTXReg<"%rc376">;
> +def RC377 : NVPTXReg<"%rc377">;
> +def RC378 : NVPTXReg<"%rc378">;
> +def RC379 : NVPTXReg<"%rc379">;
> +def RC380 : NVPTXReg<"%rc380">;
> +def RC381 : NVPTXReg<"%rc381">;
> +def RC382 : NVPTXReg<"%rc382">;
> +def RC383 : NVPTXReg<"%rc383">;
> +def RC384 : NVPTXReg<"%rc384">;
> +def RC385 : NVPTXReg<"%rc385">;
> +def RC386 : NVPTXReg<"%rc386">;
> +def RC387 : NVPTXReg<"%rc387">;
> +def RC388 : NVPTXReg<"%rc388">;
> +def RC389 : NVPTXReg<"%rc389">;
> +def RC390 : NVPTXReg<"%rc390">;
> +def RC391 : NVPTXReg<"%rc391">;
> +def RC392 : NVPTXReg<"%rc392">;
> +def RC393 : NVPTXReg<"%rc393">;
> +def RC394 : NVPTXReg<"%rc394">;
> +def RC395 : NVPTXReg<"%rc395">;
> +
> +//===--- 16-bit -----------------------------------------------------------===//
> +def RS0 : NVPTXReg<"%rs0">;
> +def RS1 : NVPTXReg<"%rs1">;
> +def RS2 : NVPTXReg<"%rs2">;
> +def RS3 : NVPTXReg<"%rs3">;
> +def RS4 : NVPTXReg<"%rs4">;
> +def RS5 : NVPTXReg<"%rs5">;
> +def RS6 : NVPTXReg<"%rs6">;
> +def RS7 : NVPTXReg<"%rs7">;
> +def RS8 : NVPTXReg<"%rs8">;
> +def RS9 : NVPTXReg<"%rs9">;
> +def RS10 : NVPTXReg<"%rs10">;
> +def RS11 : NVPTXReg<"%rs11">;
> +def RS12 : NVPTXReg<"%rs12">;
> +def RS13 : NVPTXReg<"%rs13">;
> +def RS14 : NVPTXReg<"%rs14">;
> +def RS15 : NVPTXReg<"%rs15">;
> +def RS16 : NVPTXReg<"%rs16">;
> +def RS17 : NVPTXReg<"%rs17">;
> +def RS18 : NVPTXReg<"%rs18">;
> +def RS19 : NVPTXReg<"%rs19">;
> +def RS20 : NVPTXReg<"%rs20">;
> +def RS21 : NVPTXReg<"%rs21">;
> +def RS22 : NVPTXReg<"%rs22">;
> +def RS23 : NVPTXReg<"%rs23">;
> +def RS24 : NVPTXReg<"%rs24">;
> +def RS25 : NVPTXReg<"%rs25">;
> +def RS26 : NVPTXReg<"%rs26">;
> +def RS27 : NVPTXReg<"%rs27">;
> +def RS28 : NVPTXReg<"%rs28">;
> +def RS29 : NVPTXReg<"%rs29">;
> +def RS30 : NVPTXReg<"%rs30">;
> +def RS31 : NVPTXReg<"%rs31">;
> +def RS32 : NVPTXReg<"%rs32">;
> +def RS33 : NVPTXReg<"%rs33">;
> +def RS34 : NVPTXReg<"%rs34">;
> +def RS35 : NVPTXReg<"%rs35">;
> +def RS36 : NVPTXReg<"%rs36">;
> +def RS37 : NVPTXReg<"%rs37">;
> +def RS38 : NVPTXReg<"%rs38">;
> +def RS39 : NVPTXReg<"%rs39">;
> +def RS40 : NVPTXReg<"%rs40">;
> +def RS41 : NVPTXReg<"%rs41">;
> +def RS42 : NVPTXReg<"%rs42">;
> +def RS43 : NVPTXReg<"%rs43">;
> +def RS44 : NVPTXReg<"%rs44">;
> +def RS45 : NVPTXReg<"%rs45">;
> +def RS46 : NVPTXReg<"%rs46">;
> +def RS47 : NVPTXReg<"%rs47">;
> +def RS48 : NVPTXReg<"%rs48">;
> +def RS49 : NVPTXReg<"%rs49">;
> +def RS50 : NVPTXReg<"%rs50">;
> +def RS51 : NVPTXReg<"%rs51">;
> +def RS52 : NVPTXReg<"%rs52">;
> +def RS53 : NVPTXReg<"%rs53">;
> +def RS54 : NVPTXReg<"%rs54">;
> +def RS55 : NVPTXReg<"%rs55">;
> +def RS56 : NVPTXReg<"%rs56">;
> +def RS57 : NVPTXReg<"%rs57">;
> +def RS58 : NVPTXReg<"%rs58">;
> +def RS59 : NVPTXReg<"%rs59">;
> +def RS60 : NVPTXReg<"%rs60">;
> +def RS61 : NVPTXReg<"%rs61">;
> +def RS62 : NVPTXReg<"%rs62">;
> +def RS63 : NVPTXReg<"%rs63">;
> +def RS64 : NVPTXReg<"%rs64">;
> +def RS65 : NVPTXReg<"%rs65">;
> +def RS66 : NVPTXReg<"%rs66">;
> +def RS67 : NVPTXReg<"%rs67">;
> +def RS68 : NVPTXReg<"%rs68">;
> +def RS69 : NVPTXReg<"%rs69">;
> +def RS70 : NVPTXReg<"%rs70">;
> +def RS71 : NVPTXReg<"%rs71">;
> +def RS72 : NVPTXReg<"%rs72">;
> +def RS73 : NVPTXReg<"%rs73">;
> +def RS74 : NVPTXReg<"%rs74">;
> +def RS75 : NVPTXReg<"%rs75">;
> +def RS76 : NVPTXReg<"%rs76">;
> +def RS77 : NVPTXReg<"%rs77">;
> +def RS78 : NVPTXReg<"%rs78">;
> +def RS79 : NVPTXReg<"%rs79">;
> +def RS80 : NVPTXReg<"%rs80">;
> +def RS81 : NVPTXReg<"%rs81">;
> +def RS82 : NVPTXReg<"%rs82">;
> +def RS83 : NVPTXReg<"%rs83">;
> +def RS84 : NVPTXReg<"%rs84">;
> +def RS85 : NVPTXReg<"%rs85">;
> +def RS86 : NVPTXReg<"%rs86">;
> +def RS87 : NVPTXReg<"%rs87">;
> +def RS88 : NVPTXReg<"%rs88">;
> +def RS89 : NVPTXReg<"%rs89">;
> +def RS90 : NVPTXReg<"%rs90">;
> +def RS91 : NVPTXReg<"%rs91">;
> +def RS92 : NVPTXReg<"%rs92">;
> +def RS93 : NVPTXReg<"%rs93">;
> +def RS94 : NVPTXReg<"%rs94">;
> +def RS95 : NVPTXReg<"%rs95">;
> +def RS96 : NVPTXReg<"%rs96">;
> +def RS97 : NVPTXReg<"%rs97">;
> +def RS98 : NVPTXReg<"%rs98">;
> +def RS99 : NVPTXReg<"%rs99">;
> +def RS100 : NVPTXReg<"%rs100">;
> +def RS101 : NVPTXReg<"%rs101">;
> +def RS102 : NVPTXReg<"%rs102">;
> +def RS103 : NVPTXReg<"%rs103">;
> +def RS104 : NVPTXReg<"%rs104">;
> +def RS105 : NVPTXReg<"%rs105">;
> +def RS106 : NVPTXReg<"%rs106">;
> +def RS107 : NVPTXReg<"%rs107">;
> +def RS108 : NVPTXReg<"%rs108">;
> +def RS109 : NVPTXReg<"%rs109">;
> +def RS110 : NVPTXReg<"%rs110">;
> +def RS111 : NVPTXReg<"%rs111">;
> +def RS112 : NVPTXReg<"%rs112">;
> +def RS113 : NVPTXReg<"%rs113">;
> +def RS114 : NVPTXReg<"%rs114">;
> +def RS115 : NVPTXReg<"%rs115">;
> +def RS116 : NVPTXReg<"%rs116">;
> +def RS117 : NVPTXReg<"%rs117">;
> +def RS118 : NVPTXReg<"%rs118">;
> +def RS119 : NVPTXReg<"%rs119">;
> +def RS120 : NVPTXReg<"%rs120">;
> +def RS121 : NVPTXReg<"%rs121">;
> +def RS122 : NVPTXReg<"%rs122">;
> +def RS123 : NVPTXReg<"%rs123">;
> +def RS124 : NVPTXReg<"%rs124">;
> +def RS125 : NVPTXReg<"%rs125">;
> +def RS126 : NVPTXReg<"%rs126">;
> +def RS127 : NVPTXReg<"%rs127">;
> +def RS128 : NVPTXReg<"%rs128">;
> +def RS129 : NVPTXReg<"%rs129">;
> +def RS130 : NVPTXReg<"%rs130">;
> +def RS131 : NVPTXReg<"%rs131">;
> +def RS132 : NVPTXReg<"%rs132">;
> +def RS133 : NVPTXReg<"%rs133">;
> +def RS134 : NVPTXReg<"%rs134">;
> +def RS135 : NVPTXReg<"%rs135">;
> +def RS136 : NVPTXReg<"%rs136">;
> +def RS137 : NVPTXReg<"%rs137">;
> +def RS138 : NVPTXReg<"%rs138">;
> +def RS139 : NVPTXReg<"%rs139">;
> +def RS140 : NVPTXReg<"%rs140">;
> +def RS141 : NVPTXReg<"%rs141">;
> +def RS142 : NVPTXReg<"%rs142">;
> +def RS143 : NVPTXReg<"%rs143">;
> +def RS144 : NVPTXReg<"%rs144">;
> +def RS145 : NVPTXReg<"%rs145">;
> +def RS146 : NVPTXReg<"%rs146">;
> +def RS147 : NVPTXReg<"%rs147">;
> +def RS148 : NVPTXReg<"%rs148">;
> +def RS149 : NVPTXReg<"%rs149">;
> +def RS150 : NVPTXReg<"%rs150">;
> +def RS151 : NVPTXReg<"%rs151">;
> +def RS152 : NVPTXReg<"%rs152">;
> +def RS153 : NVPTXReg<"%rs153">;
> +def RS154 : NVPTXReg<"%rs154">;
> +def RS155 : NVPTXReg<"%rs155">;
> +def RS156 : NVPTXReg<"%rs156">;
> +def RS157 : NVPTXReg<"%rs157">;
> +def RS158 : NVPTXReg<"%rs158">;
> +def RS159 : NVPTXReg<"%rs159">;
> +def RS160 : NVPTXReg<"%rs160">;
> +def RS161 : NVPTXReg<"%rs161">;
> +def RS162 : NVPTXReg<"%rs162">;
> +def RS163 : NVPTXReg<"%rs163">;
> +def RS164 : NVPTXReg<"%rs164">;
> +def RS165 : NVPTXReg<"%rs165">;
> +def RS166 : NVPTXReg<"%rs166">;
> +def RS167 : NVPTXReg<"%rs167">;
> +def RS168 : NVPTXReg<"%rs168">;
> +def RS169 : NVPTXReg<"%rs169">;
> +def RS170 : NVPTXReg<"%rs170">;
> +def RS171 : NVPTXReg<"%rs171">;
> +def RS172 : NVPTXReg<"%rs172">;
> +def RS173 : NVPTXReg<"%rs173">;
> +def RS174 : NVPTXReg<"%rs174">;
> +def RS175 : NVPTXReg<"%rs175">;
> +def RS176 : NVPTXReg<"%rs176">;
> +def RS177 : NVPTXReg<"%rs177">;
> +def RS178 : NVPTXReg<"%rs178">;
> +def RS179 : NVPTXReg<"%rs179">;
> +def RS180 : NVPTXReg<"%rs180">;
> +def RS181 : NVPTXReg<"%rs181">;
> +def RS182 : NVPTXReg<"%rs182">;
> +def RS183 : NVPTXReg<"%rs183">;
> +def RS184 : NVPTXReg<"%rs184">;
> +def RS185 : NVPTXReg<"%rs185">;
> +def RS186 : NVPTXReg<"%rs186">;
> +def RS187 : NVPTXReg<"%rs187">;
> +def RS188 : NVPTXReg<"%rs188">;
> +def RS189 : NVPTXReg<"%rs189">;
> +def RS190 : NVPTXReg<"%rs190">;
> +def RS191 : NVPTXReg<"%rs191">;
> +def RS192 : NVPTXReg<"%rs192">;
> +def RS193 : NVPTXReg<"%rs193">;
> +def RS194 : NVPTXReg<"%rs194">;
> +def RS195 : NVPTXReg<"%rs195">;
> +def RS196 : NVPTXReg<"%rs196">;
> +def RS197 : NVPTXReg<"%rs197">;
> +def RS198 : NVPTXReg<"%rs198">;
> +def RS199 : NVPTXReg<"%rs199">;
> +def RS200 : NVPTXReg<"%rs200">;
> +def RS201 : NVPTXReg<"%rs201">;
> +def RS202 : NVPTXReg<"%rs202">;
> +def RS203 : NVPTXReg<"%rs203">;
> +def RS204 : NVPTXReg<"%rs204">;
> +def RS205 : NVPTXReg<"%rs205">;
> +def RS206 : NVPTXReg<"%rs206">;
> +def RS207 : NVPTXReg<"%rs207">;
> +def RS208 : NVPTXReg<"%rs208">;
> +def RS209 : NVPTXReg<"%rs209">;
> +def RS210 : NVPTXReg<"%rs210">;
> +def RS211 : NVPTXReg<"%rs211">;
> +def RS212 : NVPTXReg<"%rs212">;
> +def RS213 : NVPTXReg<"%rs213">;
> +def RS214 : NVPTXReg<"%rs214">;
> +def RS215 : NVPTXReg<"%rs215">;
> +def RS216 : NVPTXReg<"%rs216">;
> +def RS217 : NVPTXReg<"%rs217">;
> +def RS218 : NVPTXReg<"%rs218">;
> +def RS219 : NVPTXReg<"%rs219">;
> +def RS220 : NVPTXReg<"%rs220">;
> +def RS221 : NVPTXReg<"%rs221">;
> +def RS222 : NVPTXReg<"%rs222">;
> +def RS223 : NVPTXReg<"%rs223">;
> +def RS224 : NVPTXReg<"%rs224">;
> +def RS225 : NVPTXReg<"%rs225">;
> +def RS226 : NVPTXReg<"%rs226">;
> +def RS227 : NVPTXReg<"%rs227">;
> +def RS228 : NVPTXReg<"%rs228">;
> +def RS229 : NVPTXReg<"%rs229">;
> +def RS230 : NVPTXReg<"%rs230">;
> +def RS231 : NVPTXReg<"%rs231">;
> +def RS232 : NVPTXReg<"%rs232">;
> +def RS233 : NVPTXReg<"%rs233">;
> +def RS234 : NVPTXReg<"%rs234">;
> +def RS235 : NVPTXReg<"%rs235">;
> +def RS236 : NVPTXReg<"%rs236">;
> +def RS237 : NVPTXReg<"%rs237">;
> +def RS238 : NVPTXReg<"%rs238">;
> +def RS239 : NVPTXReg<"%rs239">;
> +def RS240 : NVPTXReg<"%rs240">;
> +def RS241 : NVPTXReg<"%rs241">;
> +def RS242 : NVPTXReg<"%rs242">;
> +def RS243 : NVPTXReg<"%rs243">;
> +def RS244 : NVPTXReg<"%rs244">;
> +def RS245 : NVPTXReg<"%rs245">;
> +def RS246 : NVPTXReg<"%rs246">;
> +def RS247 : NVPTXReg<"%rs247">;
> +def RS248 : NVPTXReg<"%rs248">;
> +def RS249 : NVPTXReg<"%rs249">;
> +def RS250 : NVPTXReg<"%rs250">;
> +def RS251 : NVPTXReg<"%rs251">;
> +def RS252 : NVPTXReg<"%rs252">;
> +def RS253 : NVPTXReg<"%rs253">;
> +def RS254 : NVPTXReg<"%rs254">;
> +def RS255 : NVPTXReg<"%rs255">;
> +def RS256 : NVPTXReg<"%rs256">;
> +def RS257 : NVPTXReg<"%rs257">;
> +def RS258 : NVPTXReg<"%rs258">;
> +def RS259 : NVPTXReg<"%rs259">;
> +def RS260 : NVPTXReg<"%rs260">;
> +def RS261 : NVPTXReg<"%rs261">;
> +def RS262 : NVPTXReg<"%rs262">;
> +def RS263 : NVPTXReg<"%rs263">;
> +def RS264 : NVPTXReg<"%rs264">;
> +def RS265 : NVPTXReg<"%rs265">;
> +def RS266 : NVPTXReg<"%rs266">;
> +def RS267 : NVPTXReg<"%rs267">;
> +def RS268 : NVPTXReg<"%rs268">;
> +def RS269 : NVPTXReg<"%rs269">;
> +def RS270 : NVPTXReg<"%rs270">;
> +def RS271 : NVPTXReg<"%rs271">;
> +def RS272 : NVPTXReg<"%rs272">;
> +def RS273 : NVPTXReg<"%rs273">;
> +def RS274 : NVPTXReg<"%rs274">;
> +def RS275 : NVPTXReg<"%rs275">;
> +def RS276 : NVPTXReg<"%rs276">;
> +def RS277 : NVPTXReg<"%rs277">;
> +def RS278 : NVPTXReg<"%rs278">;
> +def RS279 : NVPTXReg<"%rs279">;
> +def RS280 : NVPTXReg<"%rs280">;
> +def RS281 : NVPTXReg<"%rs281">;
> +def RS282 : NVPTXReg<"%rs282">;
> +def RS283 : NVPTXReg<"%rs283">;
> +def RS284 : NVPTXReg<"%rs284">;
> +def RS285 : NVPTXReg<"%rs285">;
> +def RS286 : NVPTXReg<"%rs286">;
> +def RS287 : NVPTXReg<"%rs287">;
> +def RS288 : NVPTXReg<"%rs288">;
> +def RS289 : NVPTXReg<"%rs289">;
> +def RS290 : NVPTXReg<"%rs290">;
> +def RS291 : NVPTXReg<"%rs291">;
> +def RS292 : NVPTXReg<"%rs292">;
> +def RS293 : NVPTXReg<"%rs293">;
> +def RS294 : NVPTXReg<"%rs294">;
> +def RS295 : NVPTXReg<"%rs295">;
> +def RS296 : NVPTXReg<"%rs296">;
> +def RS297 : NVPTXReg<"%rs297">;
> +def RS298 : NVPTXReg<"%rs298">;
> +def RS299 : NVPTXReg<"%rs299">;
> +def RS300 : NVPTXReg<"%rs300">;
> +def RS301 : NVPTXReg<"%rs301">;
> +def RS302 : NVPTXReg<"%rs302">;
> +def RS303 : NVPTXReg<"%rs303">;
> +def RS304 : NVPTXReg<"%rs304">;
> +def RS305 : NVPTXReg<"%rs305">;
> +def RS306 : NVPTXReg<"%rs306">;
> +def RS307 : NVPTXReg<"%rs307">;
> +def RS308 : NVPTXReg<"%rs308">;
> +def RS309 : NVPTXReg<"%rs309">;
> +def RS310 : NVPTXReg<"%rs310">;
> +def RS311 : NVPTXReg<"%rs311">;
> +def RS312 : NVPTXReg<"%rs312">;
> +def RS313 : NVPTXReg<"%rs313">;
> +def RS314 : NVPTXReg<"%rs314">;
> +def RS315 : NVPTXReg<"%rs315">;
> +def RS316 : NVPTXReg<"%rs316">;
> +def RS317 : NVPTXReg<"%rs317">;
> +def RS318 : NVPTXReg<"%rs318">;
> +def RS319 : NVPTXReg<"%rs319">;
> +def RS320 : NVPTXReg<"%rs320">;
> +def RS321 : NVPTXReg<"%rs321">;
> +def RS322 : NVPTXReg<"%rs322">;
> +def RS323 : NVPTXReg<"%rs323">;
> +def RS324 : NVPTXReg<"%rs324">;
> +def RS325 : NVPTXReg<"%rs325">;
> +def RS326 : NVPTXReg<"%rs326">;
> +def RS327 : NVPTXReg<"%rs327">;
> +def RS328 : NVPTXReg<"%rs328">;
> +def RS329 : NVPTXReg<"%rs329">;
> +def RS330 : NVPTXReg<"%rs330">;
> +def RS331 : NVPTXReg<"%rs331">;
> +def RS332 : NVPTXReg<"%rs332">;
> +def RS333 : NVPTXReg<"%rs333">;
> +def RS334 : NVPTXReg<"%rs334">;
> +def RS335 : NVPTXReg<"%rs335">;
> +def RS336 : NVPTXReg<"%rs336">;
> +def RS337 : NVPTXReg<"%rs337">;
> +def RS338 : NVPTXReg<"%rs338">;
> +def RS339 : NVPTXReg<"%rs339">;
> +def RS340 : NVPTXReg<"%rs340">;
> +def RS341 : NVPTXReg<"%rs341">;
> +def RS342 : NVPTXReg<"%rs342">;
> +def RS343 : NVPTXReg<"%rs343">;
> +def RS344 : NVPTXReg<"%rs344">;
> +def RS345 : NVPTXReg<"%rs345">;
> +def RS346 : NVPTXReg<"%rs346">;
> +def RS347 : NVPTXReg<"%rs347">;
> +def RS348 : NVPTXReg<"%rs348">;
> +def RS349 : NVPTXReg<"%rs349">;
> +def RS350 : NVPTXReg<"%rs350">;
> +def RS351 : NVPTXReg<"%rs351">;
> +def RS352 : NVPTXReg<"%rs352">;
> +def RS353 : NVPTXReg<"%rs353">;
> +def RS354 : NVPTXReg<"%rs354">;
> +def RS355 : NVPTXReg<"%rs355">;
> +def RS356 : NVPTXReg<"%rs356">;
> +def RS357 : NVPTXReg<"%rs357">;
> +def RS358 : NVPTXReg<"%rs358">;
> +def RS359 : NVPTXReg<"%rs359">;
> +def RS360 : NVPTXReg<"%rs360">;
> +def RS361 : NVPTXReg<"%rs361">;
> +def RS362 : NVPTXReg<"%rs362">;
> +def RS363 : NVPTXReg<"%rs363">;
> +def RS364 : NVPTXReg<"%rs364">;
> +def RS365 : NVPTXReg<"%rs365">;
> +def RS366 : NVPTXReg<"%rs366">;
> +def RS367 : NVPTXReg<"%rs367">;
> +def RS368 : NVPTXReg<"%rs368">;
> +def RS369 : NVPTXReg<"%rs369">;
> +def RS370 : NVPTXReg<"%rs370">;
> +def RS371 : NVPTXReg<"%rs371">;
> +def RS372 : NVPTXReg<"%rs372">;
> +def RS373 : NVPTXReg<"%rs373">;
> +def RS374 : NVPTXReg<"%rs374">;
> +def RS375 : NVPTXReg<"%rs375">;
> +def RS376 : NVPTXReg<"%rs376">;
> +def RS377 : NVPTXReg<"%rs377">;
> +def RS378 : NVPTXReg<"%rs378">;
> +def RS379 : NVPTXReg<"%rs379">;
> +def RS380 : NVPTXReg<"%rs380">;
> +def RS381 : NVPTXReg<"%rs381">;
> +def RS382 : NVPTXReg<"%rs382">;
> +def RS383 : NVPTXReg<"%rs383">;
> +def RS384 : NVPTXReg<"%rs384">;
> +def RS385 : NVPTXReg<"%rs385">;
> +def RS386 : NVPTXReg<"%rs386">;
> +def RS387 : NVPTXReg<"%rs387">;
> +def RS388 : NVPTXReg<"%rs388">;
> +def RS389 : NVPTXReg<"%rs389">;
> +def RS390 : NVPTXReg<"%rs390">;
> +def RS391 : NVPTXReg<"%rs391">;
> +def RS392 : NVPTXReg<"%rs392">;
> +def RS393 : NVPTXReg<"%rs393">;
> +def RS394 : NVPTXReg<"%rs394">;
> +def RS395 : NVPTXReg<"%rs395">;
> +
> +//===--- 32-bit -----------------------------------------------------------===//
> +def R0 : NVPTXReg<"%r0">;
> +def R1 : NVPTXReg<"%r1">;
> +def R2 : NVPTXReg<"%r2">;
> +def R3 : NVPTXReg<"%r3">;
> +def R4 : NVPTXReg<"%r4">;
> +def R5 : NVPTXReg<"%r5">;
> +def R6 : NVPTXReg<"%r6">;
> +def R7 : NVPTXReg<"%r7">;
> +def R8 : NVPTXReg<"%r8">;
> +def R9 : NVPTXReg<"%r9">;
> +def R10 : NVPTXReg<"%r10">;
> +def R11 : NVPTXReg<"%r11">;
> +def R12 : NVPTXReg<"%r12">;
> +def R13 : NVPTXReg<"%r13">;
> +def R14 : NVPTXReg<"%r14">;
> +def R15 : NVPTXReg<"%r15">;
> +def R16 : NVPTXReg<"%r16">;
> +def R17 : NVPTXReg<"%r17">;
> +def R18 : NVPTXReg<"%r18">;
> +def R19 : NVPTXReg<"%r19">;
> +def R20 : NVPTXReg<"%r20">;
> +def R21 : NVPTXReg<"%r21">;
> +def R22 : NVPTXReg<"%r22">;
> +def R23 : NVPTXReg<"%r23">;
> +def R24 : NVPTXReg<"%r24">;
> +def R25 : NVPTXReg<"%r25">;
> +def R26 : NVPTXReg<"%r26">;
> +def R27 : NVPTXReg<"%r27">;
> +def R28 : NVPTXReg<"%r28">;
> +def R29 : NVPTXReg<"%r29">;
> +def R30 : NVPTXReg<"%r30">;
> +def R31 : NVPTXReg<"%r31">;
> +def R32 : NVPTXReg<"%r32">;
> +def R33 : NVPTXReg<"%r33">;
> +def R34 : NVPTXReg<"%r34">;
> +def R35 : NVPTXReg<"%r35">;
> +def R36 : NVPTXReg<"%r36">;
> +def R37 : NVPTXReg<"%r37">;
> +def R38 : NVPTXReg<"%r38">;
> +def R39 : NVPTXReg<"%r39">;
> +def R40 : NVPTXReg<"%r40">;
> +def R41 : NVPTXReg<"%r41">;
> +def R42 : NVPTXReg<"%r42">;
> +def R43 : NVPTXReg<"%r43">;
> +def R44 : NVPTXReg<"%r44">;
> +def R45 : NVPTXReg<"%r45">;
> +def R46 : NVPTXReg<"%r46">;
> +def R47 : NVPTXReg<"%r47">;
> +def R48 : NVPTXReg<"%r48">;
> +def R49 : NVPTXReg<"%r49">;
> +def R50 : NVPTXReg<"%r50">;
> +def R51 : NVPTXReg<"%r51">;
> +def R52 : NVPTXReg<"%r52">;
> +def R53 : NVPTXReg<"%r53">;
> +def R54 : NVPTXReg<"%r54">;
> +def R55 : NVPTXReg<"%r55">;
> +def R56 : NVPTXReg<"%r56">;
> +def R57 : NVPTXReg<"%r57">;
> +def R58 : NVPTXReg<"%r58">;
> +def R59 : NVPTXReg<"%r59">;
> +def R60 : NVPTXReg<"%r60">;
> +def R61 : NVPTXReg<"%r61">;
> +def R62 : NVPTXReg<"%r62">;
> +def R63 : NVPTXReg<"%r63">;
> +def R64 : NVPTXReg<"%r64">;
> +def R65 : NVPTXReg<"%r65">;
> +def R66 : NVPTXReg<"%r66">;
> +def R67 : NVPTXReg<"%r67">;
> +def R68 : NVPTXReg<"%r68">;
> +def R69 : NVPTXReg<"%r69">;
> +def R70 : NVPTXReg<"%r70">;
> +def R71 : NVPTXReg<"%r71">;
> +def R72 : NVPTXReg<"%r72">;
> +def R73 : NVPTXReg<"%r73">;
> +def R74 : NVPTXReg<"%r74">;
> +def R75 : NVPTXReg<"%r75">;
> +def R76 : NVPTXReg<"%r76">;
> +def R77 : NVPTXReg<"%r77">;
> +def R78 : NVPTXReg<"%r78">;
> +def R79 : NVPTXReg<"%r79">;
> +def R80 : NVPTXReg<"%r80">;
> +def R81 : NVPTXReg<"%r81">;
> +def R82 : NVPTXReg<"%r82">;
> +def R83 : NVPTXReg<"%r83">;
> +def R84 : NVPTXReg<"%r84">;
> +def R85 : NVPTXReg<"%r85">;
> +def R86 : NVPTXReg<"%r86">;
> +def R87 : NVPTXReg<"%r87">;
> +def R88 : NVPTXReg<"%r88">;
> +def R89 : NVPTXReg<"%r89">;
> +def R90 : NVPTXReg<"%r90">;
> +def R91 : NVPTXReg<"%r91">;
> +def R92 : NVPTXReg<"%r92">;
> +def R93 : NVPTXReg<"%r93">;
> +def R94 : NVPTXReg<"%r94">;
> +def R95 : NVPTXReg<"%r95">;
> +def R96 : NVPTXReg<"%r96">;
> +def R97 : NVPTXReg<"%r97">;
> +def R98 : NVPTXReg<"%r98">;
> +def R99 : NVPTXReg<"%r99">;
> +def R100 : NVPTXReg<"%r100">;
> +def R101 : NVPTXReg<"%r101">;
> +def R102 : NVPTXReg<"%r102">;
> +def R103 : NVPTXReg<"%r103">;
> +def R104 : NVPTXReg<"%r104">;
> +def R105 : NVPTXReg<"%r105">;
> +def R106 : NVPTXReg<"%r106">;
> +def R107 : NVPTXReg<"%r107">;
> +def R108 : NVPTXReg<"%r108">;
> +def R109 : NVPTXReg<"%r109">;
> +def R110 : NVPTXReg<"%r110">;
> +def R111 : NVPTXReg<"%r111">;
> +def R112 : NVPTXReg<"%r112">;
> +def R113 : NVPTXReg<"%r113">;
> +def R114 : NVPTXReg<"%r114">;
> +def R115 : NVPTXReg<"%r115">;
> +def R116 : NVPTXReg<"%r116">;
> +def R117 : NVPTXReg<"%r117">;
> +def R118 : NVPTXReg<"%r118">;
> +def R119 : NVPTXReg<"%r119">;
> +def R120 : NVPTXReg<"%r120">;
> +def R121 : NVPTXReg<"%r121">;
> +def R122 : NVPTXReg<"%r122">;
> +def R123 : NVPTXReg<"%r123">;
> +def R124 : NVPTXReg<"%r124">;
> +def R125 : NVPTXReg<"%r125">;
> +def R126 : NVPTXReg<"%r126">;
> +def R127 : NVPTXReg<"%r127">;
> +def R128 : NVPTXReg<"%r128">;
> +def R129 : NVPTXReg<"%r129">;
> +def R130 : NVPTXReg<"%r130">;
> +def R131 : NVPTXReg<"%r131">;
> +def R132 : NVPTXReg<"%r132">;
> +def R133 : NVPTXReg<"%r133">;
> +def R134 : NVPTXReg<"%r134">;
> +def R135 : NVPTXReg<"%r135">;
> +def R136 : NVPTXReg<"%r136">;
> +def R137 : NVPTXReg<"%r137">;
> +def R138 : NVPTXReg<"%r138">;
> +def R139 : NVPTXReg<"%r139">;
> +def R140 : NVPTXReg<"%r140">;
> +def R141 : NVPTXReg<"%r141">;
> +def R142 : NVPTXReg<"%r142">;
> +def R143 : NVPTXReg<"%r143">;
> +def R144 : NVPTXReg<"%r144">;
> +def R145 : NVPTXReg<"%r145">;
> +def R146 : NVPTXReg<"%r146">;
> +def R147 : NVPTXReg<"%r147">;
> +def R148 : NVPTXReg<"%r148">;
> +def R149 : NVPTXReg<"%r149">;
> +def R150 : NVPTXReg<"%r150">;
> +def R151 : NVPTXReg<"%r151">;
> +def R152 : NVPTXReg<"%r152">;
> +def R153 : NVPTXReg<"%r153">;
> +def R154 : NVPTXReg<"%r154">;
> +def R155 : NVPTXReg<"%r155">;
> +def R156 : NVPTXReg<"%r156">;
> +def R157 : NVPTXReg<"%r157">;
> +def R158 : NVPTXReg<"%r158">;
> +def R159 : NVPTXReg<"%r159">;
> +def R160 : NVPTXReg<"%r160">;
> +def R161 : NVPTXReg<"%r161">;
> +def R162 : NVPTXReg<"%r162">;
> +def R163 : NVPTXReg<"%r163">;
> +def R164 : NVPTXReg<"%r164">;
> +def R165 : NVPTXReg<"%r165">;
> +def R166 : NVPTXReg<"%r166">;
> +def R167 : NVPTXReg<"%r167">;
> +def R168 : NVPTXReg<"%r168">;
> +def R169 : NVPTXReg<"%r169">;
> +def R170 : NVPTXReg<"%r170">;
> +def R171 : NVPTXReg<"%r171">;
> +def R172 : NVPTXReg<"%r172">;
> +def R173 : NVPTXReg<"%r173">;
> +def R174 : NVPTXReg<"%r174">;
> +def R175 : NVPTXReg<"%r175">;
> +def R176 : NVPTXReg<"%r176">;
> +def R177 : NVPTXReg<"%r177">;
> +def R178 : NVPTXReg<"%r178">;
> +def R179 : NVPTXReg<"%r179">;
> +def R180 : NVPTXReg<"%r180">;
> +def R181 : NVPTXReg<"%r181">;
> +def R182 : NVPTXReg<"%r182">;
> +def R183 : NVPTXReg<"%r183">;
> +def R184 : NVPTXReg<"%r184">;
> +def R185 : NVPTXReg<"%r185">;
> +def R186 : NVPTXReg<"%r186">;
> +def R187 : NVPTXReg<"%r187">;
> +def R188 : NVPTXReg<"%r188">;
> +def R189 : NVPTXReg<"%r189">;
> +def R190 : NVPTXReg<"%r190">;
> +def R191 : NVPTXReg<"%r191">;
> +def R192 : NVPTXReg<"%r192">;
> +def R193 : NVPTXReg<"%r193">;
> +def R194 : NVPTXReg<"%r194">;
> +def R195 : NVPTXReg<"%r195">;
> +def R196 : NVPTXReg<"%r196">;
> +def R197 : NVPTXReg<"%r197">;
> +def R198 : NVPTXReg<"%r198">;
> +def R199 : NVPTXReg<"%r199">;
> +def R200 : NVPTXReg<"%r200">;
> +def R201 : NVPTXReg<"%r201">;
> +def R202 : NVPTXReg<"%r202">;
> +def R203 : NVPTXReg<"%r203">;
> +def R204 : NVPTXReg<"%r204">;
> +def R205 : NVPTXReg<"%r205">;
> +def R206 : NVPTXReg<"%r206">;
> +def R207 : NVPTXReg<"%r207">;
> +def R208 : NVPTXReg<"%r208">;
> +def R209 : NVPTXReg<"%r209">;
> +def R210 : NVPTXReg<"%r210">;
> +def R211 : NVPTXReg<"%r211">;
> +def R212 : NVPTXReg<"%r212">;
> +def R213 : NVPTXReg<"%r213">;
> +def R214 : NVPTXReg<"%r214">;
> +def R215 : NVPTXReg<"%r215">;
> +def R216 : NVPTXReg<"%r216">;
> +def R217 : NVPTXReg<"%r217">;
> +def R218 : NVPTXReg<"%r218">;
> +def R219 : NVPTXReg<"%r219">;
> +def R220 : NVPTXReg<"%r220">;
> +def R221 : NVPTXReg<"%r221">;
> +def R222 : NVPTXReg<"%r222">;
> +def R223 : NVPTXReg<"%r223">;
> +def R224 : NVPTXReg<"%r224">;
> +def R225 : NVPTXReg<"%r225">;
> +def R226 : NVPTXReg<"%r226">;
> +def R227 : NVPTXReg<"%r227">;
> +def R228 : NVPTXReg<"%r228">;
> +def R229 : NVPTXReg<"%r229">;
> +def R230 : NVPTXReg<"%r230">;
> +def R231 : NVPTXReg<"%r231">;
> +def R232 : NVPTXReg<"%r232">;
> +def R233 : NVPTXReg<"%r233">;
> +def R234 : NVPTXReg<"%r234">;
> +def R235 : NVPTXReg<"%r235">;
> +def R236 : NVPTXReg<"%r236">;
> +def R237 : NVPTXReg<"%r237">;
> +def R238 : NVPTXReg<"%r238">;
> +def R239 : NVPTXReg<"%r239">;
> +def R240 : NVPTXReg<"%r240">;
> +def R241 : NVPTXReg<"%r241">;
> +def R242 : NVPTXReg<"%r242">;
> +def R243 : NVPTXReg<"%r243">;
> +def R244 : NVPTXReg<"%r244">;
> +def R245 : NVPTXReg<"%r245">;
> +def R246 : NVPTXReg<"%r246">;
> +def R247 : NVPTXReg<"%r247">;
> +def R248 : NVPTXReg<"%r248">;
> +def R249 : NVPTXReg<"%r249">;
> +def R250 : NVPTXReg<"%r250">;
> +def R251 : NVPTXReg<"%r251">;
> +def R252 : NVPTXReg<"%r252">;
> +def R253 : NVPTXReg<"%r253">;
> +def R254 : NVPTXReg<"%r254">;
> +def R255 : NVPTXReg<"%r255">;
> +def R256 : NVPTXReg<"%r256">;
> +def R257 : NVPTXReg<"%r257">;
> +def R258 : NVPTXReg<"%r258">;
> +def R259 : NVPTXReg<"%r259">;
> +def R260 : NVPTXReg<"%r260">;
> +def R261 : NVPTXReg<"%r261">;
> +def R262 : NVPTXReg<"%r262">;
> +def R263 : NVPTXReg<"%r263">;
> +def R264 : NVPTXReg<"%r264">;
> +def R265 : NVPTXReg<"%r265">;
> +def R266 : NVPTXReg<"%r266">;
> +def R267 : NVPTXReg<"%r267">;
> +def R268 : NVPTXReg<"%r268">;
> +def R269 : NVPTXReg<"%r269">;
> +def R270 : NVPTXReg<"%r270">;
> +def R271 : NVPTXReg<"%r271">;
> +def R272 : NVPTXReg<"%r272">;
> +def R273 : NVPTXReg<"%r273">;
> +def R274 : NVPTXReg<"%r274">;
> +def R275 : NVPTXReg<"%r275">;
> +def R276 : NVPTXReg<"%r276">;
> +def R277 : NVPTXReg<"%r277">;
> +def R278 : NVPTXReg<"%r278">;
> +def R279 : NVPTXReg<"%r279">;
> +def R280 : NVPTXReg<"%r280">;
> +def R281 : NVPTXReg<"%r281">;
> +def R282 : NVPTXReg<"%r282">;
> +def R283 : NVPTXReg<"%r283">;
> +def R284 : NVPTXReg<"%r284">;
> +def R285 : NVPTXReg<"%r285">;
> +def R286 : NVPTXReg<"%r286">;
> +def R287 : NVPTXReg<"%r287">;
> +def R288 : NVPTXReg<"%r288">;
> +def R289 : NVPTXReg<"%r289">;
> +def R290 : NVPTXReg<"%r290">;
> +def R291 : NVPTXReg<"%r291">;
> +def R292 : NVPTXReg<"%r292">;
> +def R293 : NVPTXReg<"%r293">;
> +def R294 : NVPTXReg<"%r294">;
> +def R295 : NVPTXReg<"%r295">;
> +def R296 : NVPTXReg<"%r296">;
> +def R297 : NVPTXReg<"%r297">;
> +def R298 : NVPTXReg<"%r298">;
> +def R299 : NVPTXReg<"%r299">;
> +def R300 : NVPTXReg<"%r300">;
> +def R301 : NVPTXReg<"%r301">;
> +def R302 : NVPTXReg<"%r302">;
> +def R303 : NVPTXReg<"%r303">;
> +def R304 : NVPTXReg<"%r304">;
> +def R305 : NVPTXReg<"%r305">;
> +def R306 : NVPTXReg<"%r306">;
> +def R307 : NVPTXReg<"%r307">;
> +def R308 : NVPTXReg<"%r308">;
> +def R309 : NVPTXReg<"%r309">;
> +def R310 : NVPTXReg<"%r310">;
> +def R311 : NVPTXReg<"%r311">;
> +def R312 : NVPTXReg<"%r312">;
> +def R313 : NVPTXReg<"%r313">;
> +def R314 : NVPTXReg<"%r314">;
> +def R315 : NVPTXReg<"%r315">;
> +def R316 : NVPTXReg<"%r316">;
> +def R317 : NVPTXReg<"%r317">;
> +def R318 : NVPTXReg<"%r318">;
> +def R319 : NVPTXReg<"%r319">;
> +def R320 : NVPTXReg<"%r320">;
> +def R321 : NVPTXReg<"%r321">;
> +def R322 : NVPTXReg<"%r322">;
> +def R323 : NVPTXReg<"%r323">;
> +def R324 : NVPTXReg<"%r324">;
> +def R325 : NVPTXReg<"%r325">;
> +def R326 : NVPTXReg<"%r326">;
> +def R327 : NVPTXReg<"%r327">;
> +def R328 : NVPTXReg<"%r328">;
> +def R329 : NVPTXReg<"%r329">;
> +def R330 : NVPTXReg<"%r330">;
> +def R331 : NVPTXReg<"%r331">;
> +def R332 : NVPTXReg<"%r332">;
> +def R333 : NVPTXReg<"%r333">;
> +def R334 : NVPTXReg<"%r334">;
> +def R335 : NVPTXReg<"%r335">;
> +def R336 : NVPTXReg<"%r336">;
> +def R337 : NVPTXReg<"%r337">;
> +def R338 : NVPTXReg<"%r338">;
> +def R339 : NVPTXReg<"%r339">;
> +def R340 : NVPTXReg<"%r340">;
> +def R341 : NVPTXReg<"%r341">;
> +def R342 : NVPTXReg<"%r342">;
> +def R343 : NVPTXReg<"%r343">;
> +def R344 : NVPTXReg<"%r344">;
> +def R345 : NVPTXReg<"%r345">;
> +def R346 : NVPTXReg<"%r346">;
> +def R347 : NVPTXReg<"%r347">;
> +def R348 : NVPTXReg<"%r348">;
> +def R349 : NVPTXReg<"%r349">;
> +def R350 : NVPTXReg<"%r350">;
> +def R351 : NVPTXReg<"%r351">;
> +def R352 : NVPTXReg<"%r352">;
> +def R353 : NVPTXReg<"%r353">;
> +def R354 : NVPTXReg<"%r354">;
> +def R355 : NVPTXReg<"%r355">;
> +def R356 : NVPTXReg<"%r356">;
> +def R357 : NVPTXReg<"%r357">;
> +def R358 : NVPTXReg<"%r358">;
> +def R359 : NVPTXReg<"%r359">;
> +def R360 : NVPTXReg<"%r360">;
> +def R361 : NVPTXReg<"%r361">;
> +def R362 : NVPTXReg<"%r362">;
> +def R363 : NVPTXReg<"%r363">;
> +def R364 : NVPTXReg<"%r364">;
> +def R365 : NVPTXReg<"%r365">;
> +def R366 : NVPTXReg<"%r366">;
> +def R367 : NVPTXReg<"%r367">;
> +def R368 : NVPTXReg<"%r368">;
> +def R369 : NVPTXReg<"%r369">;
> +def R370 : NVPTXReg<"%r370">;
> +def R371 : NVPTXReg<"%r371">;
> +def R372 : NVPTXReg<"%r372">;
> +def R373 : NVPTXReg<"%r373">;
> +def R374 : NVPTXReg<"%r374">;
> +def R375 : NVPTXReg<"%r375">;
> +def R376 : NVPTXReg<"%r376">;
> +def R377 : NVPTXReg<"%r377">;
> +def R378 : NVPTXReg<"%r378">;
> +def R379 : NVPTXReg<"%r379">;
> +def R380 : NVPTXReg<"%r380">;
> +def R381 : NVPTXReg<"%r381">;
> +def R382 : NVPTXReg<"%r382">;
> +def R383 : NVPTXReg<"%r383">;
> +def R384 : NVPTXReg<"%r384">;
> +def R385 : NVPTXReg<"%r385">;
> +def R386 : NVPTXReg<"%r386">;
> +def R387 : NVPTXReg<"%r387">;
> +def R388 : NVPTXReg<"%r388">;
> +def R389 : NVPTXReg<"%r389">;
> +def R390 : NVPTXReg<"%r390">;
> +def R391 : NVPTXReg<"%r391">;
> +def R392 : NVPTXReg<"%r392">;
> +def R393 : NVPTXReg<"%r393">;
> +def R394 : NVPTXReg<"%r394">;
> +def R395 : NVPTXReg<"%r395">;
> +
> +//===--- 64-bit -----------------------------------------------------------===//
> +def RL0 : NVPTXReg<"%rl0">;
> +def RL1 : NVPTXReg<"%rl1">;
> +def RL2 : NVPTXReg<"%rl2">;
> +def RL3 : NVPTXReg<"%rl3">;
> +def RL4 : NVPTXReg<"%rl4">;
> +def RL5 : NVPTXReg<"%rl5">;
> +def RL6 : NVPTXReg<"%rl6">;
> +def RL7 : NVPTXReg<"%rl7">;
> +def RL8 : NVPTXReg<"%rl8">;
> +def RL9 : NVPTXReg<"%rl9">;
> +def RL10 : NVPTXReg<"%rl10">;
> +def RL11 : NVPTXReg<"%rl11">;
> +def RL12 : NVPTXReg<"%rl12">;
> +def RL13 : NVPTXReg<"%rl13">;
> +def RL14 : NVPTXReg<"%rl14">;
> +def RL15 : NVPTXReg<"%rl15">;
> +def RL16 : NVPTXReg<"%rl16">;
> +def RL17 : NVPTXReg<"%rl17">;
> +def RL18 : NVPTXReg<"%rl18">;
> +def RL19 : NVPTXReg<"%rl19">;
> +def RL20 : NVPTXReg<"%rl20">;
> +def RL21 : NVPTXReg<"%rl21">;
> +def RL22 : NVPTXReg<"%rl22">;
> +def RL23 : NVPTXReg<"%rl23">;
> +def RL24 : NVPTXReg<"%rl24">;
> +def RL25 : NVPTXReg<"%rl25">;
> +def RL26 : NVPTXReg<"%rl26">;
> +def RL27 : NVPTXReg<"%rl27">;
> +def RL28 : NVPTXReg<"%rl28">;
> +def RL29 : NVPTXReg<"%rl29">;
> +def RL30 : NVPTXReg<"%rl30">;
> +def RL31 : NVPTXReg<"%rl31">;
> +def RL32 : NVPTXReg<"%rl32">;
> +def RL33 : NVPTXReg<"%rl33">;
> +def RL34 : NVPTXReg<"%rl34">;
> +def RL35 : NVPTXReg<"%rl35">;
> +def RL36 : NVPTXReg<"%rl36">;
> +def RL37 : NVPTXReg<"%rl37">;
> +def RL38 : NVPTXReg<"%rl38">;
> +def RL39 : NVPTXReg<"%rl39">;
> +def RL40 : NVPTXReg<"%rl40">;
> +def RL41 : NVPTXReg<"%rl41">;
> +def RL42 : NVPTXReg<"%rl42">;
> +def RL43 : NVPTXReg<"%rl43">;
> +def RL44 : NVPTXReg<"%rl44">;
> +def RL45 : NVPTXReg<"%rl45">;
> +def RL46 : NVPTXReg<"%rl46">;
> +def RL47 : NVPTXReg<"%rl47">;
> +def RL48 : NVPTXReg<"%rl48">;
> +def RL49 : NVPTXReg<"%rl49">;
> +def RL50 : NVPTXReg<"%rl50">;
> +def RL51 : NVPTXReg<"%rl51">;
> +def RL52 : NVPTXReg<"%rl52">;
> +def RL53 : NVPTXReg<"%rl53">;
> +def RL54 : NVPTXReg<"%rl54">;
> +def RL55 : NVPTXReg<"%rl55">;
> +def RL56 : NVPTXReg<"%rl56">;
> +def RL57 : NVPTXReg<"%rl57">;
> +def RL58 : NVPTXReg<"%rl58">;
> +def RL59 : NVPTXReg<"%rl59">;
> +def RL60 : NVPTXReg<"%rl60">;
> +def RL61 : NVPTXReg<"%rl61">;
> +def RL62 : NVPTXReg<"%rl62">;
> +def RL63 : NVPTXReg<"%rl63">;
> +def RL64 : NVPTXReg<"%rl64">;
> +def RL65 : NVPTXReg<"%rl65">;
> +def RL66 : NVPTXReg<"%rl66">;
> +def RL67 : NVPTXReg<"%rl67">;
> +def RL68 : NVPTXReg<"%rl68">;
> +def RL69 : NVPTXReg<"%rl69">;
> +def RL70 : NVPTXReg<"%rl70">;
> +def RL71 : NVPTXReg<"%rl71">;
> +def RL72 : NVPTXReg<"%rl72">;
> +def RL73 : NVPTXReg<"%rl73">;
> +def RL74 : NVPTXReg<"%rl74">;
> +def RL75 : NVPTXReg<"%rl75">;
> +def RL76 : NVPTXReg<"%rl76">;
> +def RL77 : NVPTXReg<"%rl77">;
> +def RL78 : NVPTXReg<"%rl78">;
> +def RL79 : NVPTXReg<"%rl79">;
> +def RL80 : NVPTXReg<"%rl80">;
> +def RL81 : NVPTXReg<"%rl81">;
> +def RL82 : NVPTXReg<"%rl82">;
> +def RL83 : NVPTXReg<"%rl83">;
> +def RL84 : NVPTXReg<"%rl84">;
> +def RL85 : NVPTXReg<"%rl85">;
> +def RL86 : NVPTXReg<"%rl86">;
> +def RL87 : NVPTXReg<"%rl87">;
> +def RL88 : NVPTXReg<"%rl88">;
> +def RL89 : NVPTXReg<"%rl89">;
> +def RL90 : NVPTXReg<"%rl90">;
> +def RL91 : NVPTXReg<"%rl91">;
> +def RL92 : NVPTXReg<"%rl92">;
> +def RL93 : NVPTXReg<"%rl93">;
> +def RL94 : NVPTXReg<"%rl94">;
> +def RL95 : NVPTXReg<"%rl95">;
> +def RL96 : NVPTXReg<"%rl96">;
> +def RL97 : NVPTXReg<"%rl97">;
> +def RL98 : NVPTXReg<"%rl98">;
> +def RL99 : NVPTXReg<"%rl99">;
> +def RL100 : NVPTXReg<"%rl100">;
> +def RL101 : NVPTXReg<"%rl101">;
> +def RL102 : NVPTXReg<"%rl102">;
> +def RL103 : NVPTXReg<"%rl103">;
> +def RL104 : NVPTXReg<"%rl104">;
> +def RL105 : NVPTXReg<"%rl105">;
> +def RL106 : NVPTXReg<"%rl106">;
> +def RL107 : NVPTXReg<"%rl107">;
> +def RL108 : NVPTXReg<"%rl108">;
> +def RL109 : NVPTXReg<"%rl109">;
> +def RL110 : NVPTXReg<"%rl110">;
> +def RL111 : NVPTXReg<"%rl111">;
> +def RL112 : NVPTXReg<"%rl112">;
> +def RL113 : NVPTXReg<"%rl113">;
> +def RL114 : NVPTXReg<"%rl114">;
> +def RL115 : NVPTXReg<"%rl115">;
> +def RL116 : NVPTXReg<"%rl116">;
> +def RL117 : NVPTXReg<"%rl117">;
> +def RL118 : NVPTXReg<"%rl118">;
> +def RL119 : NVPTXReg<"%rl119">;
> +def RL120 : NVPTXReg<"%rl120">;
> +def RL121 : NVPTXReg<"%rl121">;
> +def RL122 : NVPTXReg<"%rl122">;
> +def RL123 : NVPTXReg<"%rl123">;
> +def RL124 : NVPTXReg<"%rl124">;
> +def RL125 : NVPTXReg<"%rl125">;
> +def RL126 : NVPTXReg<"%rl126">;
> +def RL127 : NVPTXReg<"%rl127">;
> +def RL128 : NVPTXReg<"%rl128">;
> +def RL129 : NVPTXReg<"%rl129">;
> +def RL130 : NVPTXReg<"%rl130">;
> +def RL131 : NVPTXReg<"%rl131">;
> +def RL132 : NVPTXReg<"%rl132">;
> +def RL133 : NVPTXReg<"%rl133">;
> +def RL134 : NVPTXReg<"%rl134">;
> +def RL135 : NVPTXReg<"%rl135">;
> +def RL136 : NVPTXReg<"%rl136">;
> +def RL137 : NVPTXReg<"%rl137">;
> +def RL138 : NVPTXReg<"%rl138">;
> +def RL139 : NVPTXReg<"%rl139">;
> +def RL140 : NVPTXReg<"%rl140">;
> +def RL141 : NVPTXReg<"%rl141">;
> +def RL142 : NVPTXReg<"%rl142">;
> +def RL143 : NVPTXReg<"%rl143">;
> +def RL144 : NVPTXReg<"%rl144">;
> +def RL145 : NVPTXReg<"%rl145">;
> +def RL146 : NVPTXReg<"%rl146">;
> +def RL147 : NVPTXReg<"%rl147">;
> +def RL148 : NVPTXReg<"%rl148">;
> +def RL149 : NVPTXReg<"%rl149">;
> +def RL150 : NVPTXReg<"%rl150">;
> +def RL151 : NVPTXReg<"%rl151">;
> +def RL152 : NVPTXReg<"%rl152">;
> +def RL153 : NVPTXReg<"%rl153">;
> +def RL154 : NVPTXReg<"%rl154">;
> +def RL155 : NVPTXReg<"%rl155">;
> +def RL156 : NVPTXReg<"%rl156">;
> +def RL157 : NVPTXReg<"%rl157">;
> +def RL158 : NVPTXReg<"%rl158">;
> +def RL159 : NVPTXReg<"%rl159">;
> +def RL160 : NVPTXReg<"%rl160">;
> +def RL161 : NVPTXReg<"%rl161">;
> +def RL162 : NVPTXReg<"%rl162">;
> +def RL163 : NVPTXReg<"%rl163">;
> +def RL164 : NVPTXReg<"%rl164">;
> +def RL165 : NVPTXReg<"%rl165">;
> +def RL166 : NVPTXReg<"%rl166">;
> +def RL167 : NVPTXReg<"%rl167">;
> +def RL168 : NVPTXReg<"%rl168">;
> +def RL169 : NVPTXReg<"%rl169">;
> +def RL170 : NVPTXReg<"%rl170">;
> +def RL171 : NVPTXReg<"%rl171">;
> +def RL172 : NVPTXReg<"%rl172">;
> +def RL173 : NVPTXReg<"%rl173">;
> +def RL174 : NVPTXReg<"%rl174">;
> +def RL175 : NVPTXReg<"%rl175">;
> +def RL176 : NVPTXReg<"%rl176">;
> +def RL177 : NVPTXReg<"%rl177">;
> +def RL178 : NVPTXReg<"%rl178">;
> +def RL179 : NVPTXReg<"%rl179">;
> +def RL180 : NVPTXReg<"%rl180">;
> +def RL181 : NVPTXReg<"%rl181">;
> +def RL182 : NVPTXReg<"%rl182">;
> +def RL183 : NVPTXReg<"%rl183">;
> +def RL184 : NVPTXReg<"%rl184">;
> +def RL185 : NVPTXReg<"%rl185">;
> +def RL186 : NVPTXReg<"%rl186">;
> +def RL187 : NVPTXReg<"%rl187">;
> +def RL188 : NVPTXReg<"%rl188">;
> +def RL189 : NVPTXReg<"%rl189">;
> +def RL190 : NVPTXReg<"%rl190">;
> +def RL191 : NVPTXReg<"%rl191">;
> +def RL192 : NVPTXReg<"%rl192">;
> +def RL193 : NVPTXReg<"%rl193">;
> +def RL194 : NVPTXReg<"%rl194">;
> +def RL195 : NVPTXReg<"%rl195">;
> +def RL196 : NVPTXReg<"%rl196">;
> +def RL197 : NVPTXReg<"%rl197">;
> +def RL198 : NVPTXReg<"%rl198">;
> +def RL199 : NVPTXReg<"%rl199">;
> +def RL200 : NVPTXReg<"%rl200">;
> +def RL201 : NVPTXReg<"%rl201">;
> +def RL202 : NVPTXReg<"%rl202">;
> +def RL203 : NVPTXReg<"%rl203">;
> +def RL204 : NVPTXReg<"%rl204">;
> +def RL205 : NVPTXReg<"%rl205">;
> +def RL206 : NVPTXReg<"%rl206">;
> +def RL207 : NVPTXReg<"%rl207">;
> +def RL208 : NVPTXReg<"%rl208">;
> +def RL209 : NVPTXReg<"%rl209">;
> +def RL210 : NVPTXReg<"%rl210">;
> +def RL211 : NVPTXReg<"%rl211">;
> +def RL212 : NVPTXReg<"%rl212">;
> +def RL213 : NVPTXReg<"%rl213">;
> +def RL214 : NVPTXReg<"%rl214">;
> +def RL215 : NVPTXReg<"%rl215">;
> +def RL216 : NVPTXReg<"%rl216">;
> +def RL217 : NVPTXReg<"%rl217">;
> +def RL218 : NVPTXReg<"%rl218">;
> +def RL219 : NVPTXReg<"%rl219">;
> +def RL220 : NVPTXReg<"%rl220">;
> +def RL221 : NVPTXReg<"%rl221">;
> +def RL222 : NVPTXReg<"%rl222">;
> +def RL223 : NVPTXReg<"%rl223">;
> +def RL224 : NVPTXReg<"%rl224">;
> +def RL225 : NVPTXReg<"%rl225">;
> +def RL226 : NVPTXReg<"%rl226">;
> +def RL227 : NVPTXReg<"%rl227">;
> +def RL228 : NVPTXReg<"%rl228">;
> +def RL229 : NVPTXReg<"%rl229">;
> +def RL230 : NVPTXReg<"%rl230">;
> +def RL231 : NVPTXReg<"%rl231">;
> +def RL232 : NVPTXReg<"%rl232">;
> +def RL233 : NVPTXReg<"%rl233">;
> +def RL234 : NVPTXReg<"%rl234">;
> +def RL235 : NVPTXReg<"%rl235">;
> +def RL236 : NVPTXReg<"%rl236">;
> +def RL237 : NVPTXReg<"%rl237">;
> +def RL238 : NVPTXReg<"%rl238">;
> +def RL239 : NVPTXReg<"%rl239">;
> +def RL240 : NVPTXReg<"%rl240">;
> +def RL241 : NVPTXReg<"%rl241">;
> +def RL242 : NVPTXReg<"%rl242">;
> +def RL243 : NVPTXReg<"%rl243">;
> +def RL244 : NVPTXReg<"%rl244">;
> +def RL245 : NVPTXReg<"%rl245">;
> +def RL246 : NVPTXReg<"%rl246">;
> +def RL247 : NVPTXReg<"%rl247">;
> +def RL248 : NVPTXReg<"%rl248">;
> +def RL249 : NVPTXReg<"%rl249">;
> +def RL250 : NVPTXReg<"%rl250">;
> +def RL251 : NVPTXReg<"%rl251">;
> +def RL252 : NVPTXReg<"%rl252">;
> +def RL253 : NVPTXReg<"%rl253">;
> +def RL254 : NVPTXReg<"%rl254">;
> +def RL255 : NVPTXReg<"%rl255">;
> +def RL256 : NVPTXReg<"%rl256">;
> +def RL257 : NVPTXReg<"%rl257">;
> +def RL258 : NVPTXReg<"%rl258">;
> +def RL259 : NVPTXReg<"%rl259">;
> +def RL260 : NVPTXReg<"%rl260">;
> +def RL261 : NVPTXReg<"%rl261">;
> +def RL262 : NVPTXReg<"%rl262">;
> +def RL263 : NVPTXReg<"%rl263">;
> +def RL264 : NVPTXReg<"%rl264">;
> +def RL265 : NVPTXReg<"%rl265">;
> +def RL266 : NVPTXReg<"%rl266">;
> +def RL267 : NVPTXReg<"%rl267">;
> +def RL268 : NVPTXReg<"%rl268">;
> +def RL269 : NVPTXReg<"%rl269">;
> +def RL270 : NVPTXReg<"%rl270">;
> +def RL271 : NVPTXReg<"%rl271">;
> +def RL272 : NVPTXReg<"%rl272">;
> +def RL273 : NVPTXReg<"%rl273">;
> +def RL274 : NVPTXReg<"%rl274">;
> +def RL275 : NVPTXReg<"%rl275">;
> +def RL276 : NVPTXReg<"%rl276">;
> +def RL277 : NVPTXReg<"%rl277">;
> +def RL278 : NVPTXReg<"%rl278">;
> +def RL279 : NVPTXReg<"%rl279">;
> +def RL280 : NVPTXReg<"%rl280">;
> +def RL281 : NVPTXReg<"%rl281">;
> +def RL282 : NVPTXReg<"%rl282">;
> +def RL283 : NVPTXReg<"%rl283">;
> +def RL284 : NVPTXReg<"%rl284">;
> +def RL285 : NVPTXReg<"%rl285">;
> +def RL286 : NVPTXReg<"%rl286">;
> +def RL287 : NVPTXReg<"%rl287">;
> +def RL288 : NVPTXReg<"%rl288">;
> +def RL289 : NVPTXReg<"%rl289">;
> +def RL290 : NVPTXReg<"%rl290">;
> +def RL291 : NVPTXReg<"%rl291">;
> +def RL292 : NVPTXReg<"%rl292">;
> +def RL293 : NVPTXReg<"%rl293">;
> +def RL294 : NVPTXReg<"%rl294">;
> +def RL295 : NVPTXReg<"%rl295">;
> +def RL296 : NVPTXReg<"%rl296">;
> +def RL297 : NVPTXReg<"%rl297">;
> +def RL298 : NVPTXReg<"%rl298">;
> +def RL299 : NVPTXReg<"%rl299">;
> +def RL300 : NVPTXReg<"%rl300">;
> +def RL301 : NVPTXReg<"%rl301">;
> +def RL302 : NVPTXReg<"%rl302">;
> +def RL303 : NVPTXReg<"%rl303">;
> +def RL304 : NVPTXReg<"%rl304">;
> +def RL305 : NVPTXReg<"%rl305">;
> +def RL306 : NVPTXReg<"%rl306">;
> +def RL307 : NVPTXReg<"%rl307">;
> +def RL308 : NVPTXReg<"%rl308">;
> +def RL309 : NVPTXReg<"%rl309">;
> +def RL310 : NVPTXReg<"%rl310">;
> +def RL311 : NVPTXReg<"%rl311">;
> +def RL312 : NVPTXReg<"%rl312">;
> +def RL313 : NVPTXReg<"%rl313">;
> +def RL314 : NVPTXReg<"%rl314">;
> +def RL315 : NVPTXReg<"%rl315">;
> +def RL316 : NVPTXReg<"%rl316">;
> +def RL317 : NVPTXReg<"%rl317">;
> +def RL318 : NVPTXReg<"%rl318">;
> +def RL319 : NVPTXReg<"%rl319">;
> +def RL320 : NVPTXReg<"%rl320">;
> +def RL321 : NVPTXReg<"%rl321">;
> +def RL322 : NVPTXReg<"%rl322">;
> +def RL323 : NVPTXReg<"%rl323">;
> +def RL324 : NVPTXReg<"%rl324">;
> +def RL325 : NVPTXReg<"%rl325">;
> +def RL326 : NVPTXReg<"%rl326">;
> +def RL327 : NVPTXReg<"%rl327">;
> +def RL328 : NVPTXReg<"%rl328">;
> +def RL329 : NVPTXReg<"%rl329">;
> +def RL330 : NVPTXReg<"%rl330">;
> +def RL331 : NVPTXReg<"%rl331">;
> +def RL332 : NVPTXReg<"%rl332">;
> +def RL333 : NVPTXReg<"%rl333">;
> +def RL334 : NVPTXReg<"%rl334">;
> +def RL335 : NVPTXReg<"%rl335">;
> +def RL336 : NVPTXReg<"%rl336">;
> +def RL337 : NVPTXReg<"%rl337">;
> +def RL338 : NVPTXReg<"%rl338">;
> +def RL339 : NVPTXReg<"%rl339">;
> +def RL340 : NVPTXReg<"%rl340">;
> +def RL341 : NVPTXReg<"%rl341">;
> +def RL342 : NVPTXReg<"%rl342">;
> +def RL343 : NVPTXReg<"%rl343">;
> +def RL344 : NVPTXReg<"%rl344">;
> +def RL345 : NVPTXReg<"%rl345">;
> +def RL346 : NVPTXReg<"%rl346">;
> +def RL347 : NVPTXReg<"%rl347">;
> +def RL348 : NVPTXReg<"%rl348">;
> +def RL349 : NVPTXReg<"%rl349">;
> +def RL350 : NVPTXReg<"%rl350">;
> +def RL351 : NVPTXReg<"%rl351">;
> +def RL352 : NVPTXReg<"%rl352">;
> +def RL353 : NVPTXReg<"%rl353">;
> +def RL354 : NVPTXReg<"%rl354">;
> +def RL355 : NVPTXReg<"%rl355">;
> +def RL356 : NVPTXReg<"%rl356">;
> +def RL357 : NVPTXReg<"%rl357">;
> +def RL358 : NVPTXReg<"%rl358">;
> +def RL359 : NVPTXReg<"%rl359">;
> +def RL360 : NVPTXReg<"%rl360">;
> +def RL361 : NVPTXReg<"%rl361">;
> +def RL362 : NVPTXReg<"%rl362">;
> +def RL363 : NVPTXReg<"%rl363">;
> +def RL364 : NVPTXReg<"%rl364">;
> +def RL365 : NVPTXReg<"%rl365">;
> +def RL366 : NVPTXReg<"%rl366">;
> +def RL367 : NVPTXReg<"%rl367">;
> +def RL368 : NVPTXReg<"%rl368">;
> +def RL369 : NVPTXReg<"%rl369">;
> +def RL370 : NVPTXReg<"%rl370">;
> +def RL371 : NVPTXReg<"%rl371">;
> +def RL372 : NVPTXReg<"%rl372">;
> +def RL373 : NVPTXReg<"%rl373">;
> +def RL374 : NVPTXReg<"%rl374">;
> +def RL375 : NVPTXReg<"%rl375">;
> +def RL376 : NVPTXReg<"%rl376">;
> +def RL377 : NVPTXReg<"%rl377">;
> +def RL378 : NVPTXReg<"%rl378">;
> +def RL379 : NVPTXReg<"%rl379">;
> +def RL380 : NVPTXReg<"%rl380">;
> +def RL381 : NVPTXReg<"%rl381">;
> +def RL382 : NVPTXReg<"%rl382">;
> +def RL383 : NVPTXReg<"%rl383">;
> +def RL384 : NVPTXReg<"%rl384">;
> +def RL385 : NVPTXReg<"%rl385">;
> +def RL386 : NVPTXReg<"%rl386">;
> +def RL387 : NVPTXReg<"%rl387">;
> +def RL388 : NVPTXReg<"%rl388">;
> +def RL389 : NVPTXReg<"%rl389">;
> +def RL390 : NVPTXReg<"%rl390">;
> +def RL391 : NVPTXReg<"%rl391">;
> +def RL392 : NVPTXReg<"%rl392">;
> +def RL393 : NVPTXReg<"%rl393">;
> +def RL394 : NVPTXReg<"%rl394">;
> +def RL395 : NVPTXReg<"%rl395">;
> +
> +//===--- 32-bit float -----------------------------------------------------===//
> +def F0 : NVPTXReg<"%f0">;
> +def F1 : NVPTXReg<"%f1">;
> +def F2 : NVPTXReg<"%f2">;
> +def F3 : NVPTXReg<"%f3">;
> +def F4 : NVPTXReg<"%f4">;
> +def F5 : NVPTXReg<"%f5">;
> +def F6 : NVPTXReg<"%f6">;
> +def F7 : NVPTXReg<"%f7">;
> +def F8 : NVPTXReg<"%f8">;
> +def F9 : NVPTXReg<"%f9">;
> +def F10 : NVPTXReg<"%f10">;
> +def F11 : NVPTXReg<"%f11">;
> +def F12 : NVPTXReg<"%f12">;
> +def F13 : NVPTXReg<"%f13">;
> +def F14 : NVPTXReg<"%f14">;
> +def F15 : NVPTXReg<"%f15">;
> +def F16 : NVPTXReg<"%f16">;
> +def F17 : NVPTXReg<"%f17">;
> +def F18 : NVPTXReg<"%f18">;
> +def F19 : NVPTXReg<"%f19">;
> +def F20 : NVPTXReg<"%f20">;
> +def F21 : NVPTXReg<"%f21">;
> +def F22 : NVPTXReg<"%f22">;
> +def F23 : NVPTXReg<"%f23">;
> +def F24 : NVPTXReg<"%f24">;
> +def F25 : NVPTXReg<"%f25">;
> +def F26 : NVPTXReg<"%f26">;
> +def F27 : NVPTXReg<"%f27">;
> +def F28 : NVPTXReg<"%f28">;
> +def F29 : NVPTXReg<"%f29">;
> +def F30 : NVPTXReg<"%f30">;
> +def F31 : NVPTXReg<"%f31">;
> +def F32 : NVPTXReg<"%f32">;
> +def F33 : NVPTXReg<"%f33">;
> +def F34 : NVPTXReg<"%f34">;
> +def F35 : NVPTXReg<"%f35">;
> +def F36 : NVPTXReg<"%f36">;
> +def F37 : NVPTXReg<"%f37">;
> +def F38 : NVPTXReg<"%f38">;
> +def F39 : NVPTXReg<"%f39">;
> +def F40 : NVPTXReg<"%f40">;
> +def F41 : NVPTXReg<"%f41">;
> +def F42 : NVPTXReg<"%f42">;
> +def F43 : NVPTXReg<"%f43">;
> +def F44 : NVPTXReg<"%f44">;
> +def F45 : NVPTXReg<"%f45">;
> +def F46 : NVPTXReg<"%f46">;
> +def F47 : NVPTXReg<"%f47">;
> +def F48 : NVPTXReg<"%f48">;
> +def F49 : NVPTXReg<"%f49">;
> +def F50 : NVPTXReg<"%f50">;
> +def F51 : NVPTXReg<"%f51">;
> +def F52 : NVPTXReg<"%f52">;
> +def F53 : NVPTXReg<"%f53">;
> +def F54 : NVPTXReg<"%f54">;
> +def F55 : NVPTXReg<"%f55">;
> +def F56 : NVPTXReg<"%f56">;
> +def F57 : NVPTXReg<"%f57">;
> +def F58 : NVPTXReg<"%f58">;
> +def F59 : NVPTXReg<"%f59">;
> +def F60 : NVPTXReg<"%f60">;
> +def F61 : NVPTXReg<"%f61">;
> +def F62 : NVPTXReg<"%f62">;
> +def F63 : NVPTXReg<"%f63">;
> +def F64 : NVPTXReg<"%f64">;
> +def F65 : NVPTXReg<"%f65">;
> +def F66 : NVPTXReg<"%f66">;
> +def F67 : NVPTXReg<"%f67">;
> +def F68 : NVPTXReg<"%f68">;
> +def F69 : NVPTXReg<"%f69">;
> +def F70 : NVPTXReg<"%f70">;
> +def F71 : NVPTXReg<"%f71">;
> +def F72 : NVPTXReg<"%f72">;
> +def F73 : NVPTXReg<"%f73">;
> +def F74 : NVPTXReg<"%f74">;
> +def F75 : NVPTXReg<"%f75">;
> +def F76 : NVPTXReg<"%f76">;
> +def F77 : NVPTXReg<"%f77">;
> +def F78 : NVPTXReg<"%f78">;
> +def F79 : NVPTXReg<"%f79">;
> +def F80 : NVPTXReg<"%f80">;
> +def F81 : NVPTXReg<"%f81">;
> +def F82 : NVPTXReg<"%f82">;
> +def F83 : NVPTXReg<"%f83">;
> +def F84 : NVPTXReg<"%f84">;
> +def F85 : NVPTXReg<"%f85">;
> +def F86 : NVPTXReg<"%f86">;
> +def F87 : NVPTXReg<"%f87">;
> +def F88 : NVPTXReg<"%f88">;
> +def F89 : NVPTXReg<"%f89">;
> +def F90 : NVPTXReg<"%f90">;
> +def F91 : NVPTXReg<"%f91">;
> +def F92 : NVPTXReg<"%f92">;
> +def F93 : NVPTXReg<"%f93">;
> +def F94 : NVPTXReg<"%f94">;
> +def F95 : NVPTXReg<"%f95">;
> +def F96 : NVPTXReg<"%f96">;
> +def F97 : NVPTXReg<"%f97">;
> +def F98 : NVPTXReg<"%f98">;
> +def F99 : NVPTXReg<"%f99">;
> +def F100 : NVPTXReg<"%f100">;
> +def F101 : NVPTXReg<"%f101">;
> +def F102 : NVPTXReg<"%f102">;
> +def F103 : NVPTXReg<"%f103">;
> +def F104 : NVPTXReg<"%f104">;
> +def F105 : NVPTXReg<"%f105">;
> +def F106 : NVPTXReg<"%f106">;
> +def F107 : NVPTXReg<"%f107">;
> +def F108 : NVPTXReg<"%f108">;
> +def F109 : NVPTXReg<"%f109">;
> +def F110 : NVPTXReg<"%f110">;
> +def F111 : NVPTXReg<"%f111">;
> +def F112 : NVPTXReg<"%f112">;
> +def F113 : NVPTXReg<"%f113">;
> +def F114 : NVPTXReg<"%f114">;
> +def F115 : NVPTXReg<"%f115">;
> +def F116 : NVPTXReg<"%f116">;
> +def F117 : NVPTXReg<"%f117">;
> +def F118 : NVPTXReg<"%f118">;
> +def F119 : NVPTXReg<"%f119">;
> +def F120 : NVPTXReg<"%f120">;
> +def F121 : NVPTXReg<"%f121">;
> +def F122 : NVPTXReg<"%f122">;
> +def F123 : NVPTXReg<"%f123">;
> +def F124 : NVPTXReg<"%f124">;
> +def F125 : NVPTXReg<"%f125">;
> +def F126 : NVPTXReg<"%f126">;
> +def F127 : NVPTXReg<"%f127">;
> +def F128 : NVPTXReg<"%f128">;
> +def F129 : NVPTXReg<"%f129">;
> +def F130 : NVPTXReg<"%f130">;
> +def F131 : NVPTXReg<"%f131">;
> +def F132 : NVPTXReg<"%f132">;
> +def F133 : NVPTXReg<"%f133">;
> +def F134 : NVPTXReg<"%f134">;
> +def F135 : NVPTXReg<"%f135">;
> +def F136 : NVPTXReg<"%f136">;
> +def F137 : NVPTXReg<"%f137">;
> +def F138 : NVPTXReg<"%f138">;
> +def F139 : NVPTXReg<"%f139">;
> +def F140 : NVPTXReg<"%f140">;
> +def F141 : NVPTXReg<"%f141">;
> +def F142 : NVPTXReg<"%f142">;
> +def F143 : NVPTXReg<"%f143">;
> +def F144 : NVPTXReg<"%f144">;
> +def F145 : NVPTXReg<"%f145">;
> +def F146 : NVPTXReg<"%f146">;
> +def F147 : NVPTXReg<"%f147">;
> +def F148 : NVPTXReg<"%f148">;
> +def F149 : NVPTXReg<"%f149">;
> +def F150 : NVPTXReg<"%f150">;
> +def F151 : NVPTXReg<"%f151">;
> +def F152 : NVPTXReg<"%f152">;
> +def F153 : NVPTXReg<"%f153">;
> +def F154 : NVPTXReg<"%f154">;
> +def F155 : NVPTXReg<"%f155">;
> +def F156 : NVPTXReg<"%f156">;
> +def F157 : NVPTXReg<"%f157">;
> +def F158 : NVPTXReg<"%f158">;
> +def F159 : NVPTXReg<"%f159">;
> +def F160 : NVPTXReg<"%f160">;
> +def F161 : NVPTXReg<"%f161">;
> +def F162 : NVPTXReg<"%f162">;
> +def F163 : NVPTXReg<"%f163">;
> +def F164 : NVPTXReg<"%f164">;
> +def F165 : NVPTXReg<"%f165">;
> +def F166 : NVPTXReg<"%f166">;
> +def F167 : NVPTXReg<"%f167">;
> +def F168 : NVPTXReg<"%f168">;
> +def F169 : NVPTXReg<"%f169">;
> +def F170 : NVPTXReg<"%f170">;
> +def F171 : NVPTXReg<"%f171">;
> +def F172 : NVPTXReg<"%f172">;
> +def F173 : NVPTXReg<"%f173">;
> +def F174 : NVPTXReg<"%f174">;
> +def F175 : NVPTXReg<"%f175">;
> +def F176 : NVPTXReg<"%f176">;
> +def F177 : NVPTXReg<"%f177">;
> +def F178 : NVPTXReg<"%f178">;
> +def F179 : NVPTXReg<"%f179">;
> +def F180 : NVPTXReg<"%f180">;
> +def F181 : NVPTXReg<"%f181">;
> +def F182 : NVPTXReg<"%f182">;
> +def F183 : NVPTXReg<"%f183">;
> +def F184 : NVPTXReg<"%f184">;
> +def F185 : NVPTXReg<"%f185">;
> +def F186 : NVPTXReg<"%f186">;
> +def F187 : NVPTXReg<"%f187">;
> +def F188 : NVPTXReg<"%f188">;
> +def F189 : NVPTXReg<"%f189">;
> +def F190 : NVPTXReg<"%f190">;
> +def F191 : NVPTXReg<"%f191">;
> +def F192 : NVPTXReg<"%f192">;
> +def F193 : NVPTXReg<"%f193">;
> +def F194 : NVPTXReg<"%f194">;
> +def F195 : NVPTXReg<"%f195">;
> +def F196 : NVPTXReg<"%f196">;
> +def F197 : NVPTXReg<"%f197">;
> +def F198 : NVPTXReg<"%f198">;
> +def F199 : NVPTXReg<"%f199">;
> +def F200 : NVPTXReg<"%f200">;
> +def F201 : NVPTXReg<"%f201">;
> +def F202 : NVPTXReg<"%f202">;
> +def F203 : NVPTXReg<"%f203">;
> +def F204 : NVPTXReg<"%f204">;
> +def F205 : NVPTXReg<"%f205">;
> +def F206 : NVPTXReg<"%f206">;
> +def F207 : NVPTXReg<"%f207">;
> +def F208 : NVPTXReg<"%f208">;
> +def F209 : NVPTXReg<"%f209">;
> +def F210 : NVPTXReg<"%f210">;
> +def F211 : NVPTXReg<"%f211">;
> +def F212 : NVPTXReg<"%f212">;
> +def F213 : NVPTXReg<"%f213">;
> +def F214 : NVPTXReg<"%f214">;
> +def F215 : NVPTXReg<"%f215">;
> +def F216 : NVPTXReg<"%f216">;
> +def F217 : NVPTXReg<"%f217">;
> +def F218 : NVPTXReg<"%f218">;
> +def F219 : NVPTXReg<"%f219">;
> +def F220 : NVPTXReg<"%f220">;
> +def F221 : NVPTXReg<"%f221">;
> +def F222 : NVPTXReg<"%f222">;
> +def F223 : NVPTXReg<"%f223">;
> +def F224 : NVPTXReg<"%f224">;
> +def F225 : NVPTXReg<"%f225">;
> +def F226 : NVPTXReg<"%f226">;
> +def F227 : NVPTXReg<"%f227">;
> +def F228 : NVPTXReg<"%f228">;
> +def F229 : NVPTXReg<"%f229">;
> +def F230 : NVPTXReg<"%f230">;
> +def F231 : NVPTXReg<"%f231">;
> +def F232 : NVPTXReg<"%f232">;
> +def F233 : NVPTXReg<"%f233">;
> +def F234 : NVPTXReg<"%f234">;
> +def F235 : NVPTXReg<"%f235">;
> +def F236 : NVPTXReg<"%f236">;
> +def F237 : NVPTXReg<"%f237">;
> +def F238 : NVPTXReg<"%f238">;
> +def F239 : NVPTXReg<"%f239">;
> +def F240 : NVPTXReg<"%f240">;
> +def F241 : NVPTXReg<"%f241">;
> +def F242 : NVPTXReg<"%f242">;
> +def F243 : NVPTXReg<"%f243">;
> +def F244 : NVPTXReg<"%f244">;
> +def F245 : NVPTXReg<"%f245">;
> +def F246 : NVPTXReg<"%f246">;
> +def F247 : NVPTXReg<"%f247">;
> +def F248 : NVPTXReg<"%f248">;
> +def F249 : NVPTXReg<"%f249">;
> +def F250 : NVPTXReg<"%f250">;
> +def F251 : NVPTXReg<"%f251">;
> +def F252 : NVPTXReg<"%f252">;
> +def F253 : NVPTXReg<"%f253">;
> +def F254 : NVPTXReg<"%f254">;
> +def F255 : NVPTXReg<"%f255">;
> +def F256 : NVPTXReg<"%f256">;
> +def F257 : NVPTXReg<"%f257">;
> +def F258 : NVPTXReg<"%f258">;
> +def F259 : NVPTXReg<"%f259">;
> +def F260 : NVPTXReg<"%f260">;
> +def F261 : NVPTXReg<"%f261">;
> +def F262 : NVPTXReg<"%f262">;
> +def F263 : NVPTXReg<"%f263">;
> +def F264 : NVPTXReg<"%f264">;
> +def F265 : NVPTXReg<"%f265">;
> +def F266 : NVPTXReg<"%f266">;
> +def F267 : NVPTXReg<"%f267">;
> +def F268 : NVPTXReg<"%f268">;
> +def F269 : NVPTXReg<"%f269">;
> +def F270 : NVPTXReg<"%f270">;
> +def F271 : NVPTXReg<"%f271">;
> +def F272 : NVPTXReg<"%f272">;
> +def F273 : NVPTXReg<"%f273">;
> +def F274 : NVPTXReg<"%f274">;
> +def F275 : NVPTXReg<"%f275">;
> +def F276 : NVPTXReg<"%f276">;
> +def F277 : NVPTXReg<"%f277">;
> +def F278 : NVPTXReg<"%f278">;
> +def F279 : NVPTXReg<"%f279">;
> +def F280 : NVPTXReg<"%f280">;
> +def F281 : NVPTXReg<"%f281">;
> +def F282 : NVPTXReg<"%f282">;
> +def F283 : NVPTXReg<"%f283">;
> +def F284 : NVPTXReg<"%f284">;
> +def F285 : NVPTXReg<"%f285">;
> +def F286 : NVPTXReg<"%f286">;
> +def F287 : NVPTXReg<"%f287">;
> +def F288 : NVPTXReg<"%f288">;
> +def F289 : NVPTXReg<"%f289">;
> +def F290 : NVPTXReg<"%f290">;
> +def F291 : NVPTXReg<"%f291">;
> +def F292 : NVPTXReg<"%f292">;
> +def F293 : NVPTXReg<"%f293">;
> +def F294 : NVPTXReg<"%f294">;
> +def F295 : NVPTXReg<"%f295">;
> +def F296 : NVPTXReg<"%f296">;
> +def F297 : NVPTXReg<"%f297">;
> +def F298 : NVPTXReg<"%f298">;
> +def F299 : NVPTXReg<"%f299">;
> +def F300 : NVPTXReg<"%f300">;
> +def F301 : NVPTXReg<"%f301">;
> +def F302 : NVPTXReg<"%f302">;
> +def F303 : NVPTXReg<"%f303">;
> +def F304 : NVPTXReg<"%f304">;
> +def F305 : NVPTXReg<"%f305">;
> +def F306 : NVPTXReg<"%f306">;
> +def F307 : NVPTXReg<"%f307">;
> +def F308 : NVPTXReg<"%f308">;
> +def F309 : NVPTXReg<"%f309">;
> +def F310 : NVPTXReg<"%f310">;
> +def F311 : NVPTXReg<"%f311">;
> +def F312 : NVPTXReg<"%f312">;
> +def F313 : NVPTXReg<"%f313">;
> +def F314 : NVPTXReg<"%f314">;
> +def F315 : NVPTXReg<"%f315">;
> +def F316 : NVPTXReg<"%f316">;
> +def F317 : NVPTXReg<"%f317">;
> +def F318 : NVPTXReg<"%f318">;
> +def F319 : NVPTXReg<"%f319">;
> +def F320 : NVPTXReg<"%f320">;
> +def F321 : NVPTXReg<"%f321">;
> +def F322 : NVPTXReg<"%f322">;
> +def F323 : NVPTXReg<"%f323">;
> +def F324 : NVPTXReg<"%f324">;
> +def F325 : NVPTXReg<"%f325">;
> +def F326 : NVPTXReg<"%f326">;
> +def F327 : NVPTXReg<"%f327">;
> +def F328 : NVPTXReg<"%f328">;
> +def F329 : NVPTXReg<"%f329">;
> +def F330 : NVPTXReg<"%f330">;
> +def F331 : NVPTXReg<"%f331">;
> +def F332 : NVPTXReg<"%f332">;
> +def F333 : NVPTXReg<"%f333">;
> +def F334 : NVPTXReg<"%f334">;
> +def F335 : NVPTXReg<"%f335">;
> +def F336 : NVPTXReg<"%f336">;
> +def F337 : NVPTXReg<"%f337">;
> +def F338 : NVPTXReg<"%f338">;
> +def F339 : NVPTXReg<"%f339">;
> +def F340 : NVPTXReg<"%f340">;
> +def F341 : NVPTXReg<"%f341">;
> +def F342 : NVPTXReg<"%f342">;
> +def F343 : NVPTXReg<"%f343">;
> +def F344 : NVPTXReg<"%f344">;
> +def F345 : NVPTXReg<"%f345">;
> +def F346 : NVPTXReg<"%f346">;
> +def F347 : NVPTXReg<"%f347">;
> +def F348 : NVPTXReg<"%f348">;
> +def F349 : NVPTXReg<"%f349">;
> +def F350 : NVPTXReg<"%f350">;
> +def F351 : NVPTXReg<"%f351">;
> +def F352 : NVPTXReg<"%f352">;
> +def F353 : NVPTXReg<"%f353">;
> +def F354 : NVPTXReg<"%f354">;
> +def F355 : NVPTXReg<"%f355">;
> +def F356 : NVPTXReg<"%f356">;
> +def F357 : NVPTXReg<"%f357">;
> +def F358 : NVPTXReg<"%f358">;
> +def F359 : NVPTXReg<"%f359">;
> +def F360 : NVPTXReg<"%f360">;
> +def F361 : NVPTXReg<"%f361">;
> +def F362 : NVPTXReg<"%f362">;
> +def F363 : NVPTXReg<"%f363">;
> +def F364 : NVPTXReg<"%f364">;
> +def F365 : NVPTXReg<"%f365">;
> +def F366 : NVPTXReg<"%f366">;
> +def F367 : NVPTXReg<"%f367">;
> +def F368 : NVPTXReg<"%f368">;
> +def F369 : NVPTXReg<"%f369">;
> +def F370 : NVPTXReg<"%f370">;
> +def F371 : NVPTXReg<"%f371">;
> +def F372 : NVPTXReg<"%f372">;
> +def F373 : NVPTXReg<"%f373">;
> +def F374 : NVPTXReg<"%f374">;
> +def F375 : NVPTXReg<"%f375">;
> +def F376 : NVPTXReg<"%f376">;
> +def F377 : NVPTXReg<"%f377">;
> +def F378 : NVPTXReg<"%f378">;
> +def F379 : NVPTXReg<"%f379">;
> +def F380 : NVPTXReg<"%f380">;
> +def F381 : NVPTXReg<"%f381">;
> +def F382 : NVPTXReg<"%f382">;
> +def F383 : NVPTXReg<"%f383">;
> +def F384 : NVPTXReg<"%f384">;
> +def F385 : NVPTXReg<"%f385">;
> +def F386 : NVPTXReg<"%f386">;
> +def F387 : NVPTXReg<"%f387">;
> +def F388 : NVPTXReg<"%f388">;
> +def F389 : NVPTXReg<"%f389">;
> +def F390 : NVPTXReg<"%f390">;
> +def F391 : NVPTXReg<"%f391">;
> +def F392 : NVPTXReg<"%f392">;
> +def F393 : NVPTXReg<"%f393">;
> +def F394 : NVPTXReg<"%f394">;
> +def F395 : NVPTXReg<"%f395">;
> +
> +//===--- 64-bit float -----------------------------------------------------===//
> +def FL0 : NVPTXReg<"%fl0">;
> +def FL1 : NVPTXReg<"%fl1">;
> +def FL2 : NVPTXReg<"%fl2">;
> +def FL3 : NVPTXReg<"%fl3">;
> +def FL4 : NVPTXReg<"%fl4">;
> +def FL5 : NVPTXReg<"%fl5">;
> +def FL6 : NVPTXReg<"%fl6">;
> +def FL7 : NVPTXReg<"%fl7">;
> +def FL8 : NVPTXReg<"%fl8">;
> +def FL9 : NVPTXReg<"%fl9">;
> +def FL10 : NVPTXReg<"%fl10">;
> +def FL11 : NVPTXReg<"%fl11">;
> +def FL12 : NVPTXReg<"%fl12">;
> +def FL13 : NVPTXReg<"%fl13">;
> +def FL14 : NVPTXReg<"%fl14">;
> +def FL15 : NVPTXReg<"%fl15">;
> +def FL16 : NVPTXReg<"%fl16">;
> +def FL17 : NVPTXReg<"%fl17">;
> +def FL18 : NVPTXReg<"%fl18">;
> +def FL19 : NVPTXReg<"%fl19">;
> +def FL20 : NVPTXReg<"%fl20">;
> +def FL21 : NVPTXReg<"%fl21">;
> +def FL22 : NVPTXReg<"%fl22">;
> +def FL23 : NVPTXReg<"%fl23">;
> +def FL24 : NVPTXReg<"%fl24">;
> +def FL25 : NVPTXReg<"%fl25">;
> +def FL26 : NVPTXReg<"%fl26">;
> +def FL27 : NVPTXReg<"%fl27">;
> +def FL28 : NVPTXReg<"%fl28">;
> +def FL29 : NVPTXReg<"%fl29">;
> +def FL30 : NVPTXReg<"%fl30">;
> +def FL31 : NVPTXReg<"%fl31">;
> +def FL32 : NVPTXReg<"%fl32">;
> +def FL33 : NVPTXReg<"%fl33">;
> +def FL34 : NVPTXReg<"%fl34">;
> +def FL35 : NVPTXReg<"%fl35">;
> +def FL36 : NVPTXReg<"%fl36">;
> +def FL37 : NVPTXReg<"%fl37">;
> +def FL38 : NVPTXReg<"%fl38">;
> +def FL39 : NVPTXReg<"%fl39">;
> +def FL40 : NVPTXReg<"%fl40">;
> +def FL41 : NVPTXReg<"%fl41">;
> +def FL42 : NVPTXReg<"%fl42">;
> +def FL43 : NVPTXReg<"%fl43">;
> +def FL44 : NVPTXReg<"%fl44">;
> +def FL45 : NVPTXReg<"%fl45">;
> +def FL46 : NVPTXReg<"%fl46">;
> +def FL47 : NVPTXReg<"%fl47">;
> +def FL48 : NVPTXReg<"%fl48">;
> +def FL49 : NVPTXReg<"%fl49">;
> +def FL50 : NVPTXReg<"%fl50">;
> +def FL51 : NVPTXReg<"%fl51">;
> +def FL52 : NVPTXReg<"%fl52">;
> +def FL53 : NVPTXReg<"%fl53">;
> +def FL54 : NVPTXReg<"%fl54">;
> +def FL55 : NVPTXReg<"%fl55">;
> +def FL56 : NVPTXReg<"%fl56">;
> +def FL57 : NVPTXReg<"%fl57">;
> +def FL58 : NVPTXReg<"%fl58">;
> +def FL59 : NVPTXReg<"%fl59">;
> +def FL60 : NVPTXReg<"%fl60">;
> +def FL61 : NVPTXReg<"%fl61">;
> +def FL62 : NVPTXReg<"%fl62">;
> +def FL63 : NVPTXReg<"%fl63">;
> +def FL64 : NVPTXReg<"%fl64">;
> +def FL65 : NVPTXReg<"%fl65">;
> +def FL66 : NVPTXReg<"%fl66">;
> +def FL67 : NVPTXReg<"%fl67">;
> +def FL68 : NVPTXReg<"%fl68">;
> +def FL69 : NVPTXReg<"%fl69">;
> +def FL70 : NVPTXReg<"%fl70">;
> +def FL71 : NVPTXReg<"%fl71">;
> +def FL72 : NVPTXReg<"%fl72">;
> +def FL73 : NVPTXReg<"%fl73">;
> +def FL74 : NVPTXReg<"%fl74">;
> +def FL75 : NVPTXReg<"%fl75">;
> +def FL76 : NVPTXReg<"%fl76">;
> +def FL77 : NVPTXReg<"%fl77">;
> +def FL78 : NVPTXReg<"%fl78">;
> +def FL79 : NVPTXReg<"%fl79">;
> +def FL80 : NVPTXReg<"%fl80">;
> +def FL81 : NVPTXReg<"%fl81">;
> +def FL82 : NVPTXReg<"%fl82">;
> +def FL83 : NVPTXReg<"%fl83">;
> +def FL84 : NVPTXReg<"%fl84">;
> +def FL85 : NVPTXReg<"%fl85">;
> +def FL86 : NVPTXReg<"%fl86">;
> +def FL87 : NVPTXReg<"%fl87">;
> +def FL88 : NVPTXReg<"%fl88">;
> +def FL89 : NVPTXReg<"%fl89">;
> +def FL90 : NVPTXReg<"%fl90">;
> +def FL91 : NVPTXReg<"%fl91">;
> +def FL92 : NVPTXReg<"%fl92">;
> +def FL93 : NVPTXReg<"%fl93">;
> +def FL94 : NVPTXReg<"%fl94">;
> +def FL95 : NVPTXReg<"%fl95">;
> +def FL96 : NVPTXReg<"%fl96">;
> +def FL97 : NVPTXReg<"%fl97">;
> +def FL98 : NVPTXReg<"%fl98">;
> +def FL99 : NVPTXReg<"%fl99">;
> +def FL100 : NVPTXReg<"%fl100">;
> +def FL101 : NVPTXReg<"%fl101">;
> +def FL102 : NVPTXReg<"%fl102">;
> +def FL103 : NVPTXReg<"%fl103">;
> +def FL104 : NVPTXReg<"%fl104">;
> +def FL105 : NVPTXReg<"%fl105">;
> +def FL106 : NVPTXReg<"%fl106">;
> +def FL107 : NVPTXReg<"%fl107">;
> +def FL108 : NVPTXReg<"%fl108">;
> +def FL109 : NVPTXReg<"%fl109">;
> +def FL110 : NVPTXReg<"%fl110">;
> +def FL111 : NVPTXReg<"%fl111">;
> +def FL112 : NVPTXReg<"%fl112">;
> +def FL113 : NVPTXReg<"%fl113">;
> +def FL114 : NVPTXReg<"%fl114">;
> +def FL115 : NVPTXReg<"%fl115">;
> +def FL116 : NVPTXReg<"%fl116">;
> +def FL117 : NVPTXReg<"%fl117">;
> +def FL118 : NVPTXReg<"%fl118">;
> +def FL119 : NVPTXReg<"%fl119">;
> +def FL120 : NVPTXReg<"%fl120">;
> +def FL121 : NVPTXReg<"%fl121">;
> +def FL122 : NVPTXReg<"%fl122">;
> +def FL123 : NVPTXReg<"%fl123">;
> +def FL124 : NVPTXReg<"%fl124">;
> +def FL125 : NVPTXReg<"%fl125">;
> +def FL126 : NVPTXReg<"%fl126">;
> +def FL127 : NVPTXReg<"%fl127">;
> +def FL128 : NVPTXReg<"%fl128">;
> +def FL129 : NVPTXReg<"%fl129">;
> +def FL130 : NVPTXReg<"%fl130">;
> +def FL131 : NVPTXReg<"%fl131">;
> +def FL132 : NVPTXReg<"%fl132">;
> +def FL133 : NVPTXReg<"%fl133">;
> +def FL134 : NVPTXReg<"%fl134">;
> +def FL135 : NVPTXReg<"%fl135">;
> +def FL136 : NVPTXReg<"%fl136">;
> +def FL137 : NVPTXReg<"%fl137">;
> +def FL138 : NVPTXReg<"%fl138">;
> +def FL139 : NVPTXReg<"%fl139">;
> +def FL140 : NVPTXReg<"%fl140">;
> +def FL141 : NVPTXReg<"%fl141">;
> +def FL142 : NVPTXReg<"%fl142">;
> +def FL143 : NVPTXReg<"%fl143">;
> +def FL144 : NVPTXReg<"%fl144">;
> +def FL145 : NVPTXReg<"%fl145">;
> +def FL146 : NVPTXReg<"%fl146">;
> +def FL147 : NVPTXReg<"%fl147">;
> +def FL148 : NVPTXReg<"%fl148">;
> +def FL149 : NVPTXReg<"%fl149">;
> +def FL150 : NVPTXReg<"%fl150">;
> +def FL151 : NVPTXReg<"%fl151">;
> +def FL152 : NVPTXReg<"%fl152">;
> +def FL153 : NVPTXReg<"%fl153">;
> +def FL154 : NVPTXReg<"%fl154">;
> +def FL155 : NVPTXReg<"%fl155">;
> +def FL156 : NVPTXReg<"%fl156">;
> +def FL157 : NVPTXReg<"%fl157">;
> +def FL158 : NVPTXReg<"%fl158">;
> +def FL159 : NVPTXReg<"%fl159">;
> +def FL160 : NVPTXReg<"%fl160">;
> +def FL161 : NVPTXReg<"%fl161">;
> +def FL162 : NVPTXReg<"%fl162">;
> +def FL163 : NVPTXReg<"%fl163">;
> +def FL164 : NVPTXReg<"%fl164">;
> +def FL165 : NVPTXReg<"%fl165">;
> +def FL166 : NVPTXReg<"%fl166">;
> +def FL167 : NVPTXReg<"%fl167">;
> +def FL168 : NVPTXReg<"%fl168">;
> +def FL169 : NVPTXReg<"%fl169">;
> +def FL170 : NVPTXReg<"%fl170">;
> +def FL171 : NVPTXReg<"%fl171">;
> +def FL172 : NVPTXReg<"%fl172">;
> +def FL173 : NVPTXReg<"%fl173">;
> +def FL174 : NVPTXReg<"%fl174">;
> +def FL175 : NVPTXReg<"%fl175">;
> +def FL176 : NVPTXReg<"%fl176">;
> +def FL177 : NVPTXReg<"%fl177">;
> +def FL178 : NVPTXReg<"%fl178">;
> +def FL179 : NVPTXReg<"%fl179">;
> +def FL180 : NVPTXReg<"%fl180">;
> +def FL181 : NVPTXReg<"%fl181">;
> +def FL182 : NVPTXReg<"%fl182">;
> +def FL183 : NVPTXReg<"%fl183">;
> +def FL184 : NVPTXReg<"%fl184">;
> +def FL185 : NVPTXReg<"%fl185">;
> +def FL186 : NVPTXReg<"%fl186">;
> +def FL187 : NVPTXReg<"%fl187">;
> +def FL188 : NVPTXReg<"%fl188">;
> +def FL189 : NVPTXReg<"%fl189">;
> +def FL190 : NVPTXReg<"%fl190">;
> +def FL191 : NVPTXReg<"%fl191">;
> +def FL192 : NVPTXReg<"%fl192">;
> +def FL193 : NVPTXReg<"%fl193">;
> +def FL194 : NVPTXReg<"%fl194">;
> +def FL195 : NVPTXReg<"%fl195">;
> +def FL196 : NVPTXReg<"%fl196">;
> +def FL197 : NVPTXReg<"%fl197">;
> +def FL198 : NVPTXReg<"%fl198">;
> +def FL199 : NVPTXReg<"%fl199">;
> +def FL200 : NVPTXReg<"%fl200">;
> +def FL201 : NVPTXReg<"%fl201">;
> +def FL202 : NVPTXReg<"%fl202">;
> +def FL203 : NVPTXReg<"%fl203">;
> +def FL204 : NVPTXReg<"%fl204">;
> +def FL205 : NVPTXReg<"%fl205">;
> +def FL206 : NVPTXReg<"%fl206">;
> +def FL207 : NVPTXReg<"%fl207">;
> +def FL208 : NVPTXReg<"%fl208">;
> +def FL209 : NVPTXReg<"%fl209">;
> +def FL210 : NVPTXReg<"%fl210">;
> +def FL211 : NVPTXReg<"%fl211">;
> +def FL212 : NVPTXReg<"%fl212">;
> +def FL213 : NVPTXReg<"%fl213">;
> +def FL214 : NVPTXReg<"%fl214">;
> +def FL215 : NVPTXReg<"%fl215">;
> +def FL216 : NVPTXReg<"%fl216">;
> +def FL217 : NVPTXReg<"%fl217">;
> +def FL218 : NVPTXReg<"%fl218">;
> +def FL219 : NVPTXReg<"%fl219">;
> +def FL220 : NVPTXReg<"%fl220">;
> +def FL221 : NVPTXReg<"%fl221">;
> +def FL222 : NVPTXReg<"%fl222">;
> +def FL223 : NVPTXReg<"%fl223">;
> +def FL224 : NVPTXReg<"%fl224">;
> +def FL225 : NVPTXReg<"%fl225">;
> +def FL226 : NVPTXReg<"%fl226">;
> +def FL227 : NVPTXReg<"%fl227">;
> +def FL228 : NVPTXReg<"%fl228">;
> +def FL229 : NVPTXReg<"%fl229">;
> +def FL230 : NVPTXReg<"%fl230">;
> +def FL231 : NVPTXReg<"%fl231">;
> +def FL232 : NVPTXReg<"%fl232">;
> +def FL233 : NVPTXReg<"%fl233">;
> +def FL234 : NVPTXReg<"%fl234">;
> +def FL235 : NVPTXReg<"%fl235">;
> +def FL236 : NVPTXReg<"%fl236">;
> +def FL237 : NVPTXReg<"%fl237">;
> +def FL238 : NVPTXReg<"%fl238">;
> +def FL239 : NVPTXReg<"%fl239">;
> +def FL240 : NVPTXReg<"%fl240">;
> +def FL241 : NVPTXReg<"%fl241">;
> +def FL242 : NVPTXReg<"%fl242">;
> +def FL243 : NVPTXReg<"%fl243">;
> +def FL244 : NVPTXReg<"%fl244">;
> +def FL245 : NVPTXReg<"%fl245">;
> +def FL246 : NVPTXReg<"%fl246">;
> +def FL247 : NVPTXReg<"%fl247">;
> +def FL248 : NVPTXReg<"%fl248">;
> +def FL249 : NVPTXReg<"%fl249">;
> +def FL250 : NVPTXReg<"%fl250">;
> +def FL251 : NVPTXReg<"%fl251">;
> +def FL252 : NVPTXReg<"%fl252">;
> +def FL253 : NVPTXReg<"%fl253">;
> +def FL254 : NVPTXReg<"%fl254">;
> +def FL255 : NVPTXReg<"%fl255">;
> +def FL256 : NVPTXReg<"%fl256">;
> +def FL257 : NVPTXReg<"%fl257">;
> +def FL258 : NVPTXReg<"%fl258">;
> +def FL259 : NVPTXReg<"%fl259">;
> +def FL260 : NVPTXReg<"%fl260">;
> +def FL261 : NVPTXReg<"%fl261">;
> +def FL262 : NVPTXReg<"%fl262">;
> +def FL263 : NVPTXReg<"%fl263">;
> +def FL264 : NVPTXReg<"%fl264">;
> +def FL265 : NVPTXReg<"%fl265">;
> +def FL266 : NVPTXReg<"%fl266">;
> +def FL267 : NVPTXReg<"%fl267">;
> +def FL268 : NVPTXReg<"%fl268">;
> +def FL269 : NVPTXReg<"%fl269">;
> +def FL270 : NVPTXReg<"%fl270">;
> +def FL271 : NVPTXReg<"%fl271">;
> +def FL272 : NVPTXReg<"%fl272">;
> +def FL273 : NVPTXReg<"%fl273">;
> +def FL274 : NVPTXReg<"%fl274">;
> +def FL275 : NVPTXReg<"%fl275">;
> +def FL276 : NVPTXReg<"%fl276">;
> +def FL277 : NVPTXReg<"%fl277">;
> +def FL278 : NVPTXReg<"%fl278">;
> +def FL279 : NVPTXReg<"%fl279">;
> +def FL280 : NVPTXReg<"%fl280">;
> +def FL281 : NVPTXReg<"%fl281">;
> +def FL282 : NVPTXReg<"%fl282">;
> +def FL283 : NVPTXReg<"%fl283">;
> +def FL284 : NVPTXReg<"%fl284">;
> +def FL285 : NVPTXReg<"%fl285">;
> +def FL286 : NVPTXReg<"%fl286">;
> +def FL287 : NVPTXReg<"%fl287">;
> +def FL288 : NVPTXReg<"%fl288">;
> +def FL289 : NVPTXReg<"%fl289">;
> +def FL290 : NVPTXReg<"%fl290">;
> +def FL291 : NVPTXReg<"%fl291">;
> +def FL292 : NVPTXReg<"%fl292">;
> +def FL293 : NVPTXReg<"%fl293">;
> +def FL294 : NVPTXReg<"%fl294">;
> +def FL295 : NVPTXReg<"%fl295">;
> +def FL296 : NVPTXReg<"%fl296">;
> +def FL297 : NVPTXReg<"%fl297">;
> +def FL298 : NVPTXReg<"%fl298">;
> +def FL299 : NVPTXReg<"%fl299">;
> +def FL300 : NVPTXReg<"%fl300">;
> +def FL301 : NVPTXReg<"%fl301">;
> +def FL302 : NVPTXReg<"%fl302">;
> +def FL303 : NVPTXReg<"%fl303">;
> +def FL304 : NVPTXReg<"%fl304">;
> +def FL305 : NVPTXReg<"%fl305">;
> +def FL306 : NVPTXReg<"%fl306">;
> +def FL307 : NVPTXReg<"%fl307">;
> +def FL308 : NVPTXReg<"%fl308">;
> +def FL309 : NVPTXReg<"%fl309">;
> +def FL310 : NVPTXReg<"%fl310">;
> +def FL311 : NVPTXReg<"%fl311">;
> +def FL312 : NVPTXReg<"%fl312">;
> +def FL313 : NVPTXReg<"%fl313">;
> +def FL314 : NVPTXReg<"%fl314">;
> +def FL315 : NVPTXReg<"%fl315">;
> +def FL316 : NVPTXReg<"%fl316">;
> +def FL317 : NVPTXReg<"%fl317">;
> +def FL318 : NVPTXReg<"%fl318">;
> +def FL319 : NVPTXReg<"%fl319">;
> +def FL320 : NVPTXReg<"%fl320">;
> +def FL321 : NVPTXReg<"%fl321">;
> +def FL322 : NVPTXReg<"%fl322">;
> +def FL323 : NVPTXReg<"%fl323">;
> +def FL324 : NVPTXReg<"%fl324">;
> +def FL325 : NVPTXReg<"%fl325">;
> +def FL326 : NVPTXReg<"%fl326">;
> +def FL327 : NVPTXReg<"%fl327">;
> +def FL328 : NVPTXReg<"%fl328">;
> +def FL329 : NVPTXReg<"%fl329">;
> +def FL330 : NVPTXReg<"%fl330">;
> +def FL331 : NVPTXReg<"%fl331">;
> +def FL332 : NVPTXReg<"%fl332">;
> +def FL333 : NVPTXReg<"%fl333">;
> +def FL334 : NVPTXReg<"%fl334">;
> +def FL335 : NVPTXReg<"%fl335">;
> +def FL336 : NVPTXReg<"%fl336">;
> +def FL337 : NVPTXReg<"%fl337">;
> +def FL338 : NVPTXReg<"%fl338">;
> +def FL339 : NVPTXReg<"%fl339">;
> +def FL340 : NVPTXReg<"%fl340">;
> +def FL341 : NVPTXReg<"%fl341">;
> +def FL342 : NVPTXReg<"%fl342">;
> +def FL343 : NVPTXReg<"%fl343">;
> +def FL344 : NVPTXReg<"%fl344">;
> +def FL345 : NVPTXReg<"%fl345">;
> +def FL346 : NVPTXReg<"%fl346">;
> +def FL347 : NVPTXReg<"%fl347">;
> +def FL348 : NVPTXReg<"%fl348">;
> +def FL349 : NVPTXReg<"%fl349">;
> +def FL350 : NVPTXReg<"%fl350">;
> +def FL351 : NVPTXReg<"%fl351">;
> +def FL352 : NVPTXReg<"%fl352">;
> +def FL353 : NVPTXReg<"%fl353">;
> +def FL354 : NVPTXReg<"%fl354">;
> +def FL355 : NVPTXReg<"%fl355">;
> +def FL356 : NVPTXReg<"%fl356">;
> +def FL357 : NVPTXReg<"%fl357">;
> +def FL358 : NVPTXReg<"%fl358">;
> +def FL359 : NVPTXReg<"%fl359">;
> +def FL360 : NVPTXReg<"%fl360">;
> +def FL361 : NVPTXReg<"%fl361">;
> +def FL362 : NVPTXReg<"%fl362">;
> +def FL363 : NVPTXReg<"%fl363">;
> +def FL364 : NVPTXReg<"%fl364">;
> +def FL365 : NVPTXReg<"%fl365">;
> +def FL366 : NVPTXReg<"%fl366">;
> +def FL367 : NVPTXReg<"%fl367">;
> +def FL368 : NVPTXReg<"%fl368">;
> +def FL369 : NVPTXReg<"%fl369">;
> +def FL370 : NVPTXReg<"%fl370">;
> +def FL371 : NVPTXReg<"%fl371">;
> +def FL372 : NVPTXReg<"%fl372">;
> +def FL373 : NVPTXReg<"%fl373">;
> +def FL374 : NVPTXReg<"%fl374">;
> +def FL375 : NVPTXReg<"%fl375">;
> +def FL376 : NVPTXReg<"%fl376">;
> +def FL377 : NVPTXReg<"%fl377">;
> +def FL378 : NVPTXReg<"%fl378">;
> +def FL379 : NVPTXReg<"%fl379">;
> +def FL380 : NVPTXReg<"%fl380">;
> +def FL381 : NVPTXReg<"%fl381">;
> +def FL382 : NVPTXReg<"%fl382">;
> +def FL383 : NVPTXReg<"%fl383">;
> +def FL384 : NVPTXReg<"%fl384">;
> +def FL385 : NVPTXReg<"%fl385">;
> +def FL386 : NVPTXReg<"%fl386">;
> +def FL387 : NVPTXReg<"%fl387">;
> +def FL388 : NVPTXReg<"%fl388">;
> +def FL389 : NVPTXReg<"%fl389">;
> +def FL390 : NVPTXReg<"%fl390">;
> +def FL391 : NVPTXReg<"%fl391">;
> +def FL392 : NVPTXReg<"%fl392">;
> +def FL393 : NVPTXReg<"%fl393">;
> +def FL394 : NVPTXReg<"%fl394">;
> +def FL395 : NVPTXReg<"%fl395">;
> +
> +//===--- Vector -----------------------------------------------------------===//
> +def v2b8_0 : NVPTXReg<"%v2b8_0">;
> +def v2b8_1 : NVPTXReg<"%v2b8_1">;
> +def v2b8_2 : NVPTXReg<"%v2b8_2">;
> +def v2b8_3 : NVPTXReg<"%v2b8_3">;
> +def v2b8_4 : NVPTXReg<"%v2b8_4">;
> +def v2b8_5 : NVPTXReg<"%v2b8_5">;
> +def v2b8_6 : NVPTXReg<"%v2b8_6">;
> +def v2b8_7 : NVPTXReg<"%v2b8_7">;
> +def v2b8_8 : NVPTXReg<"%v2b8_8">;
> +def v2b8_9 : NVPTXReg<"%v2b8_9">;
> +def v2b8_10 : NVPTXReg<"%v2b8_10">;
> +def v2b8_11 : NVPTXReg<"%v2b8_11">;
> +def v2b8_12 : NVPTXReg<"%v2b8_12">;
> +def v2b8_13 : NVPTXReg<"%v2b8_13">;
> +def v2b8_14 : NVPTXReg<"%v2b8_14">;
> +def v2b8_15 : NVPTXReg<"%v2b8_15">;
> +def v2b8_16 : NVPTXReg<"%v2b8_16">;
> +def v2b8_17 : NVPTXReg<"%v2b8_17">;
> +def v2b8_18 : NVPTXReg<"%v2b8_18">;
> +def v2b8_19 : NVPTXReg<"%v2b8_19">;
> +def v2b8_20 : NVPTXReg<"%v2b8_20">;
> +def v2b8_21 : NVPTXReg<"%v2b8_21">;
> +def v2b8_22 : NVPTXReg<"%v2b8_22">;
> +def v2b8_23 : NVPTXReg<"%v2b8_23">;
> +def v2b8_24 : NVPTXReg<"%v2b8_24">;
> +def v2b8_25 : NVPTXReg<"%v2b8_25">;
> +def v2b8_26 : NVPTXReg<"%v2b8_26">;
> +def v2b8_27 : NVPTXReg<"%v2b8_27">;
> +def v2b8_28 : NVPTXReg<"%v2b8_28">;
> +def v2b8_29 : NVPTXReg<"%v2b8_29">;
> +def v2b8_30 : NVPTXReg<"%v2b8_30">;
> +def v2b8_31 : NVPTXReg<"%v2b8_31">;
> +def v2b8_32 : NVPTXReg<"%v2b8_32">;
> +def v2b8_33 : NVPTXReg<"%v2b8_33">;
> +def v2b8_34 : NVPTXReg<"%v2b8_34">;
> +def v2b8_35 : NVPTXReg<"%v2b8_35">;
> +def v2b8_36 : NVPTXReg<"%v2b8_36">;
> +def v2b8_37 : NVPTXReg<"%v2b8_37">;
> +def v2b8_38 : NVPTXReg<"%v2b8_38">;
> +def v2b8_39 : NVPTXReg<"%v2b8_39">;
> +def v2b8_40 : NVPTXReg<"%v2b8_40">;
> +def v2b8_41 : NVPTXReg<"%v2b8_41">;
> +def v2b8_42 : NVPTXReg<"%v2b8_42">;
> +def v2b8_43 : NVPTXReg<"%v2b8_43">;
> +def v2b8_44 : NVPTXReg<"%v2b8_44">;
> +def v2b8_45 : NVPTXReg<"%v2b8_45">;
> +def v2b8_46 : NVPTXReg<"%v2b8_46">;
> +def v2b8_47 : NVPTXReg<"%v2b8_47">;
> +def v2b8_48 : NVPTXReg<"%v2b8_48">;
> +def v2b8_49 : NVPTXReg<"%v2b8_49">;
> +def v2b8_50 : NVPTXReg<"%v2b8_50">;
> +def v2b8_51 : NVPTXReg<"%v2b8_51">;
> +def v2b8_52 : NVPTXReg<"%v2b8_52">;
> +def v2b8_53 : NVPTXReg<"%v2b8_53">;
> +def v2b8_54 : NVPTXReg<"%v2b8_54">;
> +def v2b8_55 : NVPTXReg<"%v2b8_55">;
> +def v2b8_56 : NVPTXReg<"%v2b8_56">;
> +def v2b8_57 : NVPTXReg<"%v2b8_57">;
> +def v2b8_58 : NVPTXReg<"%v2b8_58">;
> +def v2b8_59 : NVPTXReg<"%v2b8_59">;
> +def v2b8_60 : NVPTXReg<"%v2b8_60">;
> +def v2b8_61 : NVPTXReg<"%v2b8_61">;
> +def v2b8_62 : NVPTXReg<"%v2b8_62">;
> +def v2b8_63 : NVPTXReg<"%v2b8_63">;
> +def v2b8_64 : NVPTXReg<"%v2b8_64">;
> +def v2b8_65 : NVPTXReg<"%v2b8_65">;
> +def v2b8_66 : NVPTXReg<"%v2b8_66">;
> +def v2b8_67 : NVPTXReg<"%v2b8_67">;
> +def v2b8_68 : NVPTXReg<"%v2b8_68">;
> +def v2b8_69 : NVPTXReg<"%v2b8_69">;
> +def v2b8_70 : NVPTXReg<"%v2b8_70">;
> +def v2b8_71 : NVPTXReg<"%v2b8_71">;
> +def v2b8_72 : NVPTXReg<"%v2b8_72">;
> +def v2b8_73 : NVPTXReg<"%v2b8_73">;
> +def v2b8_74 : NVPTXReg<"%v2b8_74">;
> +def v2b8_75 : NVPTXReg<"%v2b8_75">;
> +def v2b8_76 : NVPTXReg<"%v2b8_76">;
> +def v2b8_77 : NVPTXReg<"%v2b8_77">;
> +def v2b8_78 : NVPTXReg<"%v2b8_78">;
> +def v2b8_79 : NVPTXReg<"%v2b8_79">;
> +def v2b8_80 : NVPTXReg<"%v2b8_80">;
> +def v2b8_81 : NVPTXReg<"%v2b8_81">;
> +def v2b8_82 : NVPTXReg<"%v2b8_82">;
> +def v2b8_83 : NVPTXReg<"%v2b8_83">;
> +def v2b8_84 : NVPTXReg<"%v2b8_84">;
> +def v2b8_85 : NVPTXReg<"%v2b8_85">;
> +def v2b8_86 : NVPTXReg<"%v2b8_86">;
> +def v2b8_87 : NVPTXReg<"%v2b8_87">;
> +def v2b8_88 : NVPTXReg<"%v2b8_88">;
> +def v2b8_89 : NVPTXReg<"%v2b8_89">;
> +def v2b8_90 : NVPTXReg<"%v2b8_90">;
> +def v2b8_91 : NVPTXReg<"%v2b8_91">;
> +def v2b8_92 : NVPTXReg<"%v2b8_92">;
> +def v2b8_93 : NVPTXReg<"%v2b8_93">;
> +def v2b8_94 : NVPTXReg<"%v2b8_94">;
> +def v2b8_95 : NVPTXReg<"%v2b8_95">;
> +def v2b8_96 : NVPTXReg<"%v2b8_96">;
> +def v2b8_97 : NVPTXReg<"%v2b8_97">;
> +def v2b8_98 : NVPTXReg<"%v2b8_98">;
> +def v2b8_99 : NVPTXReg<"%v2b8_99">;
> +def v2b8_100 : NVPTXReg<"%v2b8_100">;
> +def v2b8_101 : NVPTXReg<"%v2b8_101">;
> +def v2b8_102 : NVPTXReg<"%v2b8_102">;
> +def v2b8_103 : NVPTXReg<"%v2b8_103">;
> +def v2b8_104 : NVPTXReg<"%v2b8_104">;
> +def v2b8_105 : NVPTXReg<"%v2b8_105">;
> +def v2b8_106 : NVPTXReg<"%v2b8_106">;
> +def v2b8_107 : NVPTXReg<"%v2b8_107">;
> +def v2b8_108 : NVPTXReg<"%v2b8_108">;
> +def v2b8_109 : NVPTXReg<"%v2b8_109">;
> +def v2b8_110 : NVPTXReg<"%v2b8_110">;
> +def v2b8_111 : NVPTXReg<"%v2b8_111">;
> +def v2b8_112 : NVPTXReg<"%v2b8_112">;
> +def v2b8_113 : NVPTXReg<"%v2b8_113">;
> +def v2b8_114 : NVPTXReg<"%v2b8_114">;
> +def v2b8_115 : NVPTXReg<"%v2b8_115">;
> +def v2b8_116 : NVPTXReg<"%v2b8_116">;
> +def v2b8_117 : NVPTXReg<"%v2b8_117">;
> +def v2b8_118 : NVPTXReg<"%v2b8_118">;
> +def v2b8_119 : NVPTXReg<"%v2b8_119">;
> +def v2b8_120 : NVPTXReg<"%v2b8_120">;
> +def v2b8_121 : NVPTXReg<"%v2b8_121">;
> +def v2b8_122 : NVPTXReg<"%v2b8_122">;
> +def v2b8_123 : NVPTXReg<"%v2b8_123">;
> +def v2b8_124 : NVPTXReg<"%v2b8_124">;
> +def v2b8_125 : NVPTXReg<"%v2b8_125">;
> +def v2b8_126 : NVPTXReg<"%v2b8_126">;
> +def v2b8_127 : NVPTXReg<"%v2b8_127">;
> +def v2b8_128 : NVPTXReg<"%v2b8_128">;
> +def v2b8_129 : NVPTXReg<"%v2b8_129">;
> +def v2b8_130 : NVPTXReg<"%v2b8_130">;
> +def v2b8_131 : NVPTXReg<"%v2b8_131">;
> +def v2b8_132 : NVPTXReg<"%v2b8_132">;
> +def v2b8_133 : NVPTXReg<"%v2b8_133">;
> +def v2b8_134 : NVPTXReg<"%v2b8_134">;
> +def v2b8_135 : NVPTXReg<"%v2b8_135">;
> +def v2b8_136 : NVPTXReg<"%v2b8_136">;
> +def v2b8_137 : NVPTXReg<"%v2b8_137">;
> +def v2b8_138 : NVPTXReg<"%v2b8_138">;
> +def v2b8_139 : NVPTXReg<"%v2b8_139">;
> +def v2b8_140 : NVPTXReg<"%v2b8_140">;
> +def v2b8_141 : NVPTXReg<"%v2b8_141">;
> +def v2b8_142 : NVPTXReg<"%v2b8_142">;
> +def v2b8_143 : NVPTXReg<"%v2b8_143">;
> +def v2b8_144 : NVPTXReg<"%v2b8_144">;
> +def v2b8_145 : NVPTXReg<"%v2b8_145">;
> +def v2b8_146 : NVPTXReg<"%v2b8_146">;
> +def v2b8_147 : NVPTXReg<"%v2b8_147">;
> +def v2b8_148 : NVPTXReg<"%v2b8_148">;
> +def v2b8_149 : NVPTXReg<"%v2b8_149">;
> +def v2b8_150 : NVPTXReg<"%v2b8_150">;
> +def v2b8_151 : NVPTXReg<"%v2b8_151">;
> +def v2b8_152 : NVPTXReg<"%v2b8_152">;
> +def v2b8_153 : NVPTXReg<"%v2b8_153">;
> +def v2b8_154 : NVPTXReg<"%v2b8_154">;
> +def v2b8_155 : NVPTXReg<"%v2b8_155">;
> +def v2b8_156 : NVPTXReg<"%v2b8_156">;
> +def v2b8_157 : NVPTXReg<"%v2b8_157">;
> +def v2b8_158 : NVPTXReg<"%v2b8_158">;
> +def v2b8_159 : NVPTXReg<"%v2b8_159">;
> +def v2b8_160 : NVPTXReg<"%v2b8_160">;
> +def v2b8_161 : NVPTXReg<"%v2b8_161">;
> +def v2b8_162 : NVPTXReg<"%v2b8_162">;
> +def v2b8_163 : NVPTXReg<"%v2b8_163">;
> +def v2b8_164 : NVPTXReg<"%v2b8_164">;
> +def v2b8_165 : NVPTXReg<"%v2b8_165">;
> +def v2b8_166 : NVPTXReg<"%v2b8_166">;
> +def v2b8_167 : NVPTXReg<"%v2b8_167">;
> +def v2b8_168 : NVPTXReg<"%v2b8_168">;
> +def v2b8_169 : NVPTXReg<"%v2b8_169">;
> +def v2b8_170 : NVPTXReg<"%v2b8_170">;
> +def v2b8_171 : NVPTXReg<"%v2b8_171">;
> +def v2b8_172 : NVPTXReg<"%v2b8_172">;
> +def v2b8_173 : NVPTXReg<"%v2b8_173">;
> +def v2b8_174 : NVPTXReg<"%v2b8_174">;
> +def v2b8_175 : NVPTXReg<"%v2b8_175">;
> +def v2b8_176 : NVPTXReg<"%v2b8_176">;
> +def v2b8_177 : NVPTXReg<"%v2b8_177">;
> +def v2b8_178 : NVPTXReg<"%v2b8_178">;
> +def v2b8_179 : NVPTXReg<"%v2b8_179">;
> +def v2b8_180 : NVPTXReg<"%v2b8_180">;
> +def v2b8_181 : NVPTXReg<"%v2b8_181">;
> +def v2b8_182 : NVPTXReg<"%v2b8_182">;
> +def v2b8_183 : NVPTXReg<"%v2b8_183">;
> +def v2b8_184 : NVPTXReg<"%v2b8_184">;
> +def v2b8_185 : NVPTXReg<"%v2b8_185">;
> +def v2b8_186 : NVPTXReg<"%v2b8_186">;
> +def v2b8_187 : NVPTXReg<"%v2b8_187">;
> +def v2b8_188 : NVPTXReg<"%v2b8_188">;
> +def v2b8_189 : NVPTXReg<"%v2b8_189">;
> +def v2b8_190 : NVPTXReg<"%v2b8_190">;
> +def v2b8_191 : NVPTXReg<"%v2b8_191">;
> +def v2b8_192 : NVPTXReg<"%v2b8_192">;
> +def v2b8_193 : NVPTXReg<"%v2b8_193">;
> +def v2b8_194 : NVPTXReg<"%v2b8_194">;
> +def v2b8_195 : NVPTXReg<"%v2b8_195">;
> +def v2b8_196 : NVPTXReg<"%v2b8_196">;
> +def v2b8_197 : NVPTXReg<"%v2b8_197">;
> +def v2b8_198 : NVPTXReg<"%v2b8_198">;
> +def v2b8_199 : NVPTXReg<"%v2b8_199">;
> +def v2b8_200 : NVPTXReg<"%v2b8_200">;
> +def v2b8_201 : NVPTXReg<"%v2b8_201">;
> +def v2b8_202 : NVPTXReg<"%v2b8_202">;
> +def v2b8_203 : NVPTXReg<"%v2b8_203">;
> +def v2b8_204 : NVPTXReg<"%v2b8_204">;
> +def v2b8_205 : NVPTXReg<"%v2b8_205">;
> +def v2b8_206 : NVPTXReg<"%v2b8_206">;
> +def v2b8_207 : NVPTXReg<"%v2b8_207">;
> +def v2b8_208 : NVPTXReg<"%v2b8_208">;
> +def v2b8_209 : NVPTXReg<"%v2b8_209">;
> +def v2b8_210 : NVPTXReg<"%v2b8_210">;
> +def v2b8_211 : NVPTXReg<"%v2b8_211">;
> +def v2b8_212 : NVPTXReg<"%v2b8_212">;
> +def v2b8_213 : NVPTXReg<"%v2b8_213">;
> +def v2b8_214 : NVPTXReg<"%v2b8_214">;
> +def v2b8_215 : NVPTXReg<"%v2b8_215">;
> +def v2b8_216 : NVPTXReg<"%v2b8_216">;
> +def v2b8_217 : NVPTXReg<"%v2b8_217">;
> +def v2b8_218 : NVPTXReg<"%v2b8_218">;
> +def v2b8_219 : NVPTXReg<"%v2b8_219">;
> +def v2b8_220 : NVPTXReg<"%v2b8_220">;
> +def v2b8_221 : NVPTXReg<"%v2b8_221">;
> +def v2b8_222 : NVPTXReg<"%v2b8_222">;
> +def v2b8_223 : NVPTXReg<"%v2b8_223">;
> +def v2b8_224 : NVPTXReg<"%v2b8_224">;
> +def v2b8_225 : NVPTXReg<"%v2b8_225">;
> +def v2b8_226 : NVPTXReg<"%v2b8_226">;
> +def v2b8_227 : NVPTXReg<"%v2b8_227">;
> +def v2b8_228 : NVPTXReg<"%v2b8_228">;
> +def v2b8_229 : NVPTXReg<"%v2b8_229">;
> +def v2b8_230 : NVPTXReg<"%v2b8_230">;
> +def v2b8_231 : NVPTXReg<"%v2b8_231">;
> +def v2b8_232 : NVPTXReg<"%v2b8_232">;
> +def v2b8_233 : NVPTXReg<"%v2b8_233">;
> +def v2b8_234 : NVPTXReg<"%v2b8_234">;
> +def v2b8_235 : NVPTXReg<"%v2b8_235">;
> +def v2b8_236 : NVPTXReg<"%v2b8_236">;
> +def v2b8_237 : NVPTXReg<"%v2b8_237">;
> +def v2b8_238 : NVPTXReg<"%v2b8_238">;
> +def v2b8_239 : NVPTXReg<"%v2b8_239">;
> +def v2b8_240 : NVPTXReg<"%v2b8_240">;
> +def v2b8_241 : NVPTXReg<"%v2b8_241">;
> +def v2b8_242 : NVPTXReg<"%v2b8_242">;
> +def v2b8_243 : NVPTXReg<"%v2b8_243">;
> +def v2b8_244 : NVPTXReg<"%v2b8_244">;
> +def v2b8_245 : NVPTXReg<"%v2b8_245">;
> +def v2b8_246 : NVPTXReg<"%v2b8_246">;
> +def v2b8_247 : NVPTXReg<"%v2b8_247">;
> +def v2b8_248 : NVPTXReg<"%v2b8_248">;
> +def v2b8_249 : NVPTXReg<"%v2b8_249">;
> +def v2b8_250 : NVPTXReg<"%v2b8_250">;
> +def v2b8_251 : NVPTXReg<"%v2b8_251">;
> +def v2b8_252 : NVPTXReg<"%v2b8_252">;
> +def v2b8_253 : NVPTXReg<"%v2b8_253">;
> +def v2b8_254 : NVPTXReg<"%v2b8_254">;
> +def v2b8_255 : NVPTXReg<"%v2b8_255">;
> +def v2b8_256 : NVPTXReg<"%v2b8_256">;
> +def v2b8_257 : NVPTXReg<"%v2b8_257">;
> +def v2b8_258 : NVPTXReg<"%v2b8_258">;
> +def v2b8_259 : NVPTXReg<"%v2b8_259">;
> +def v2b8_260 : NVPTXReg<"%v2b8_260">;
> +def v2b8_261 : NVPTXReg<"%v2b8_261">;
> +def v2b8_262 : NVPTXReg<"%v2b8_262">;
> +def v2b8_263 : NVPTXReg<"%v2b8_263">;
> +def v2b8_264 : NVPTXReg<"%v2b8_264">;
> +def v2b8_265 : NVPTXReg<"%v2b8_265">;
> +def v2b8_266 : NVPTXReg<"%v2b8_266">;
> +def v2b8_267 : NVPTXReg<"%v2b8_267">;
> +def v2b8_268 : NVPTXReg<"%v2b8_268">;
> +def v2b8_269 : NVPTXReg<"%v2b8_269">;
> +def v2b8_270 : NVPTXReg<"%v2b8_270">;
> +def v2b8_271 : NVPTXReg<"%v2b8_271">;
> +def v2b8_272 : NVPTXReg<"%v2b8_272">;
> +def v2b8_273 : NVPTXReg<"%v2b8_273">;
> +def v2b8_274 : NVPTXReg<"%v2b8_274">;
> +def v2b8_275 : NVPTXReg<"%v2b8_275">;
> +def v2b8_276 : NVPTXReg<"%v2b8_276">;
> +def v2b8_277 : NVPTXReg<"%v2b8_277">;
> +def v2b8_278 : NVPTXReg<"%v2b8_278">;
> +def v2b8_279 : NVPTXReg<"%v2b8_279">;
> +def v2b8_280 : NVPTXReg<"%v2b8_280">;
> +def v2b8_281 : NVPTXReg<"%v2b8_281">;
> +def v2b8_282 : NVPTXReg<"%v2b8_282">;
> +def v2b8_283 : NVPTXReg<"%v2b8_283">;
> +def v2b8_284 : NVPTXReg<"%v2b8_284">;
> +def v2b8_285 : NVPTXReg<"%v2b8_285">;
> +def v2b8_286 : NVPTXReg<"%v2b8_286">;
> +def v2b8_287 : NVPTXReg<"%v2b8_287">;
> +def v2b8_288 : NVPTXReg<"%v2b8_288">;
> +def v2b8_289 : NVPTXReg<"%v2b8_289">;
> +def v2b8_290 : NVPTXReg<"%v2b8_290">;
> +def v2b8_291 : NVPTXReg<"%v2b8_291">;
> +def v2b8_292 : NVPTXReg<"%v2b8_292">;
> +def v2b8_293 : NVPTXReg<"%v2b8_293">;
> +def v2b8_294 : NVPTXReg<"%v2b8_294">;
> +def v2b8_295 : NVPTXReg<"%v2b8_295">;
> +def v2b8_296 : NVPTXReg<"%v2b8_296">;
> +def v2b8_297 : NVPTXReg<"%v2b8_297">;
> +def v2b8_298 : NVPTXReg<"%v2b8_298">;
> +def v2b8_299 : NVPTXReg<"%v2b8_299">;
> +def v2b8_300 : NVPTXReg<"%v2b8_300">;
> +def v2b8_301 : NVPTXReg<"%v2b8_301">;
> +def v2b8_302 : NVPTXReg<"%v2b8_302">;
> +def v2b8_303 : NVPTXReg<"%v2b8_303">;
> +def v2b8_304 : NVPTXReg<"%v2b8_304">;
> +def v2b8_305 : NVPTXReg<"%v2b8_305">;
> +def v2b8_306 : NVPTXReg<"%v2b8_306">;
> +def v2b8_307 : NVPTXReg<"%v2b8_307">;
> +def v2b8_308 : NVPTXReg<"%v2b8_308">;
> +def v2b8_309 : NVPTXReg<"%v2b8_309">;
> +def v2b8_310 : NVPTXReg<"%v2b8_310">;
> +def v2b8_311 : NVPTXReg<"%v2b8_311">;
> +def v2b8_312 : NVPTXReg<"%v2b8_312">;
> +def v2b8_313 : NVPTXReg<"%v2b8_313">;
> +def v2b8_314 : NVPTXReg<"%v2b8_314">;
> +def v2b8_315 : NVPTXReg<"%v2b8_315">;
> +def v2b8_316 : NVPTXReg<"%v2b8_316">;
> +def v2b8_317 : NVPTXReg<"%v2b8_317">;
> +def v2b8_318 : NVPTXReg<"%v2b8_318">;
> +def v2b8_319 : NVPTXReg<"%v2b8_319">;
> +def v2b8_320 : NVPTXReg<"%v2b8_320">;
> +def v2b8_321 : NVPTXReg<"%v2b8_321">;
> +def v2b8_322 : NVPTXReg<"%v2b8_322">;
> +def v2b8_323 : NVPTXReg<"%v2b8_323">;
> +def v2b8_324 : NVPTXReg<"%v2b8_324">;
> +def v2b8_325 : NVPTXReg<"%v2b8_325">;
> +def v2b8_326 : NVPTXReg<"%v2b8_326">;
> +def v2b8_327 : NVPTXReg<"%v2b8_327">;
> +def v2b8_328 : NVPTXReg<"%v2b8_328">;
> +def v2b8_329 : NVPTXReg<"%v2b8_329">;
> +def v2b8_330 : NVPTXReg<"%v2b8_330">;
> +def v2b8_331 : NVPTXReg<"%v2b8_331">;
> +def v2b8_332 : NVPTXReg<"%v2b8_332">;
> +def v2b8_333 : NVPTXReg<"%v2b8_333">;
> +def v2b8_334 : NVPTXReg<"%v2b8_334">;
> +def v2b8_335 : NVPTXReg<"%v2b8_335">;
> +def v2b8_336 : NVPTXReg<"%v2b8_336">;
> +def v2b8_337 : NVPTXReg<"%v2b8_337">;
> +def v2b8_338 : NVPTXReg<"%v2b8_338">;
> +def v2b8_339 : NVPTXReg<"%v2b8_339">;
> +def v2b8_340 : NVPTXReg<"%v2b8_340">;
> +def v2b8_341 : NVPTXReg<"%v2b8_341">;
> +def v2b8_342 : NVPTXReg<"%v2b8_342">;
> +def v2b8_343 : NVPTXReg<"%v2b8_343">;
> +def v2b8_344 : NVPTXReg<"%v2b8_344">;
> +def v2b8_345 : NVPTXReg<"%v2b8_345">;
> +def v2b8_346 : NVPTXReg<"%v2b8_346">;
> +def v2b8_347 : NVPTXReg<"%v2b8_347">;
> +def v2b8_348 : NVPTXReg<"%v2b8_348">;
> +def v2b8_349 : NVPTXReg<"%v2b8_349">;
> +def v2b8_350 : NVPTXReg<"%v2b8_350">;
> +def v2b8_351 : NVPTXReg<"%v2b8_351">;
> +def v2b8_352 : NVPTXReg<"%v2b8_352">;
> +def v2b8_353 : NVPTXReg<"%v2b8_353">;
> +def v2b8_354 : NVPTXReg<"%v2b8_354">;
> +def v2b8_355 : NVPTXReg<"%v2b8_355">;
> +def v2b8_356 : NVPTXReg<"%v2b8_356">;
> +def v2b8_357 : NVPTXReg<"%v2b8_357">;
> +def v2b8_358 : NVPTXReg<"%v2b8_358">;
> +def v2b8_359 : NVPTXReg<"%v2b8_359">;
> +def v2b8_360 : NVPTXReg<"%v2b8_360">;
> +def v2b8_361 : NVPTXReg<"%v2b8_361">;
> +def v2b8_362 : NVPTXReg<"%v2b8_362">;
> +def v2b8_363 : NVPTXReg<"%v2b8_363">;
> +def v2b8_364 : NVPTXReg<"%v2b8_364">;
> +def v2b8_365 : NVPTXReg<"%v2b8_365">;
> +def v2b8_366 : NVPTXReg<"%v2b8_366">;
> +def v2b8_367 : NVPTXReg<"%v2b8_367">;
> +def v2b8_368 : NVPTXReg<"%v2b8_368">;
> +def v2b8_369 : NVPTXReg<"%v2b8_369">;
> +def v2b8_370 : NVPTXReg<"%v2b8_370">;
> +def v2b8_371 : NVPTXReg<"%v2b8_371">;
> +def v2b8_372 : NVPTXReg<"%v2b8_372">;
> +def v2b8_373 : NVPTXReg<"%v2b8_373">;
> +def v2b8_374 : NVPTXReg<"%v2b8_374">;
> +def v2b8_375 : NVPTXReg<"%v2b8_375">;
> +def v2b8_376 : NVPTXReg<"%v2b8_376">;
> +def v2b8_377 : NVPTXReg<"%v2b8_377">;
> +def v2b8_378 : NVPTXReg<"%v2b8_378">;
> +def v2b8_379 : NVPTXReg<"%v2b8_379">;
> +def v2b8_380 : NVPTXReg<"%v2b8_380">;
> +def v2b8_381 : NVPTXReg<"%v2b8_381">;
> +def v2b8_382 : NVPTXReg<"%v2b8_382">;
> +def v2b8_383 : NVPTXReg<"%v2b8_383">;
> +def v2b8_384 : NVPTXReg<"%v2b8_384">;
> +def v2b8_385 : NVPTXReg<"%v2b8_385">;
> +def v2b8_386 : NVPTXReg<"%v2b8_386">;
> +def v2b8_387 : NVPTXReg<"%v2b8_387">;
> +def v2b8_388 : NVPTXReg<"%v2b8_388">;
> +def v2b8_389 : NVPTXReg<"%v2b8_389">;
> +def v2b8_390 : NVPTXReg<"%v2b8_390">;
> +def v2b8_391 : NVPTXReg<"%v2b8_391">;
> +def v2b8_392 : NVPTXReg<"%v2b8_392">;
> +def v2b8_393 : NVPTXReg<"%v2b8_393">;
> +def v2b8_394 : NVPTXReg<"%v2b8_394">;
> +def v2b8_395 : NVPTXReg<"%v2b8_395">;
> +def v2b16_0 : NVPTXReg<"%v2b16_0">;
> +def v2b16_1 : NVPTXReg<"%v2b16_1">;
> +def v2b16_2 : NVPTXReg<"%v2b16_2">;
> +def v2b16_3 : NVPTXReg<"%v2b16_3">;
> +def v2b16_4 : NVPTXReg<"%v2b16_4">;
> +def v2b16_5 : NVPTXReg<"%v2b16_5">;
> +def v2b16_6 : NVPTXReg<"%v2b16_6">;
> +def v2b16_7 : NVPTXReg<"%v2b16_7">;
> +def v2b16_8 : NVPTXReg<"%v2b16_8">;
> +def v2b16_9 : NVPTXReg<"%v2b16_9">;
> +def v2b16_10 : NVPTXReg<"%v2b16_10">;
> +def v2b16_11 : NVPTXReg<"%v2b16_11">;
> +def v2b16_12 : NVPTXReg<"%v2b16_12">;
> +def v2b16_13 : NVPTXReg<"%v2b16_13">;
> +def v2b16_14 : NVPTXReg<"%v2b16_14">;
> +def v2b16_15 : NVPTXReg<"%v2b16_15">;
> +def v2b16_16 : NVPTXReg<"%v2b16_16">;
> +def v2b16_17 : NVPTXReg<"%v2b16_17">;
> +def v2b16_18 : NVPTXReg<"%v2b16_18">;
> +def v2b16_19 : NVPTXReg<"%v2b16_19">;
> +def v2b16_20 : NVPTXReg<"%v2b16_20">;
> +def v2b16_21 : NVPTXReg<"%v2b16_21">;
> +def v2b16_22 : NVPTXReg<"%v2b16_22">;
> +def v2b16_23 : NVPTXReg<"%v2b16_23">;
> +def v2b16_24 : NVPTXReg<"%v2b16_24">;
> +def v2b16_25 : NVPTXReg<"%v2b16_25">;
> +def v2b16_26 : NVPTXReg<"%v2b16_26">;
> +def v2b16_27 : NVPTXReg<"%v2b16_27">;
> +def v2b16_28 : NVPTXReg<"%v2b16_28">;
> +def v2b16_29 : NVPTXReg<"%v2b16_29">;
> +def v2b16_30 : NVPTXReg<"%v2b16_30">;
> +def v2b16_31 : NVPTXReg<"%v2b16_31">;
> +def v2b16_32 : NVPTXReg<"%v2b16_32">;
> +def v2b16_33 : NVPTXReg<"%v2b16_33">;
> +def v2b16_34 : NVPTXReg<"%v2b16_34">;
> +def v2b16_35 : NVPTXReg<"%v2b16_35">;
> +def v2b16_36 : NVPTXReg<"%v2b16_36">;
> +def v2b16_37 : NVPTXReg<"%v2b16_37">;
> +def v2b16_38 : NVPTXReg<"%v2b16_38">;
> +def v2b16_39 : NVPTXReg<"%v2b16_39">;
> +def v2b16_40 : NVPTXReg<"%v2b16_40">;
> +def v2b16_41 : NVPTXReg<"%v2b16_41">;
> +def v2b16_42 : NVPTXReg<"%v2b16_42">;
> +def v2b16_43 : NVPTXReg<"%v2b16_43">;
> +def v2b16_44 : NVPTXReg<"%v2b16_44">;
> +def v2b16_45 : NVPTXReg<"%v2b16_45">;
> +def v2b16_46 : NVPTXReg<"%v2b16_46">;
> +def v2b16_47 : NVPTXReg<"%v2b16_47">;
> +def v2b16_48 : NVPTXReg<"%v2b16_48">;
> +def v2b16_49 : NVPTXReg<"%v2b16_49">;
> +def v2b16_50 : NVPTXReg<"%v2b16_50">;
> +def v2b16_51 : NVPTXReg<"%v2b16_51">;
> +def v2b16_52 : NVPTXReg<"%v2b16_52">;
> +def v2b16_53 : NVPTXReg<"%v2b16_53">;
> +def v2b16_54 : NVPTXReg<"%v2b16_54">;
> +def v2b16_55 : NVPTXReg<"%v2b16_55">;
> +def v2b16_56 : NVPTXReg<"%v2b16_56">;
> +def v2b16_57 : NVPTXReg<"%v2b16_57">;
> +def v2b16_58 : NVPTXReg<"%v2b16_58">;
> +def v2b16_59 : NVPTXReg<"%v2b16_59">;
> +def v2b16_60 : NVPTXReg<"%v2b16_60">;
> +def v2b16_61 : NVPTXReg<"%v2b16_61">;
> +def v2b16_62 : NVPTXReg<"%v2b16_62">;
> +def v2b16_63 : NVPTXReg<"%v2b16_63">;
> +def v2b16_64 : NVPTXReg<"%v2b16_64">;
> +def v2b16_65 : NVPTXReg<"%v2b16_65">;
> +def v2b16_66 : NVPTXReg<"%v2b16_66">;
> +def v2b16_67 : NVPTXReg<"%v2b16_67">;
> +def v2b16_68 : NVPTXReg<"%v2b16_68">;
> +def v2b16_69 : NVPTXReg<"%v2b16_69">;
> +def v2b16_70 : NVPTXReg<"%v2b16_70">;
> +def v2b16_71 : NVPTXReg<"%v2b16_71">;
> +def v2b16_72 : NVPTXReg<"%v2b16_72">;
> +def v2b16_73 : NVPTXReg<"%v2b16_73">;
> +def v2b16_74 : NVPTXReg<"%v2b16_74">;
> +def v2b16_75 : NVPTXReg<"%v2b16_75">;
> +def v2b16_76 : NVPTXReg<"%v2b16_76">;
> +def v2b16_77 : NVPTXReg<"%v2b16_77">;
> +def v2b16_78 : NVPTXReg<"%v2b16_78">;
> +def v2b16_79 : NVPTXReg<"%v2b16_79">;
> +def v2b16_80 : NVPTXReg<"%v2b16_80">;
> +def v2b16_81 : NVPTXReg<"%v2b16_81">;
> +def v2b16_82 : NVPTXReg<"%v2b16_82">;
> +def v2b16_83 : NVPTXReg<"%v2b16_83">;
> +def v2b16_84 : NVPTXReg<"%v2b16_84">;
> +def v2b16_85 : NVPTXReg<"%v2b16_85">;
> +def v2b16_86 : NVPTXReg<"%v2b16_86">;
> +def v2b16_87 : NVPTXReg<"%v2b16_87">;
> +def v2b16_88 : NVPTXReg<"%v2b16_88">;
> +def v2b16_89 : NVPTXReg<"%v2b16_89">;
> +def v2b16_90 : NVPTXReg<"%v2b16_90">;
> +def v2b16_91 : NVPTXReg<"%v2b16_91">;
> +def v2b16_92 : NVPTXReg<"%v2b16_92">;
> +def v2b16_93 : NVPTXReg<"%v2b16_93">;
> +def v2b16_94 : NVPTXReg<"%v2b16_94">;
> +def v2b16_95 : NVPTXReg<"%v2b16_95">;
> +def v2b16_96 : NVPTXReg<"%v2b16_96">;
> +def v2b16_97 : NVPTXReg<"%v2b16_97">;
> +def v2b16_98 : NVPTXReg<"%v2b16_98">;
> +def v2b16_99 : NVPTXReg<"%v2b16_99">;
> +def v2b16_100 : NVPTXReg<"%v2b16_100">;
> +def v2b16_101 : NVPTXReg<"%v2b16_101">;
> +def v2b16_102 : NVPTXReg<"%v2b16_102">;
> +def v2b16_103 : NVPTXReg<"%v2b16_103">;
> +def v2b16_104 : NVPTXReg<"%v2b16_104">;
> +def v2b16_105 : NVPTXReg<"%v2b16_105">;
> +def v2b16_106 : NVPTXReg<"%v2b16_106">;
> +def v2b16_107 : NVPTXReg<"%v2b16_107">;
> +def v2b16_108 : NVPTXReg<"%v2b16_108">;
> +def v2b16_109 : NVPTXReg<"%v2b16_109">;
> +def v2b16_110 : NVPTXReg<"%v2b16_110">;
> +def v2b16_111 : NVPTXReg<"%v2b16_111">;
> +def v2b16_112 : NVPTXReg<"%v2b16_112">;
> +def v2b16_113 : NVPTXReg<"%v2b16_113">;
> +def v2b16_114 : NVPTXReg<"%v2b16_114">;
> +def v2b16_115 : NVPTXReg<"%v2b16_115">;
> +def v2b16_116 : NVPTXReg<"%v2b16_116">;
> +def v2b16_117 : NVPTXReg<"%v2b16_117">;
> +def v2b16_118 : NVPTXReg<"%v2b16_118">;
> +def v2b16_119 : NVPTXReg<"%v2b16_119">;
> +def v2b16_120 : NVPTXReg<"%v2b16_120">;
> +def v2b16_121 : NVPTXReg<"%v2b16_121">;
> +def v2b16_122 : NVPTXReg<"%v2b16_122">;
> +def v2b16_123 : NVPTXReg<"%v2b16_123">;
> +def v2b16_124 : NVPTXReg<"%v2b16_124">;
> +def v2b16_125 : NVPTXReg<"%v2b16_125">;
> +def v2b16_126 : NVPTXReg<"%v2b16_126">;
> +def v2b16_127 : NVPTXReg<"%v2b16_127">;
> +def v2b16_128 : NVPTXReg<"%v2b16_128">;
> +def v2b16_129 : NVPTXReg<"%v2b16_129">;
> +def v2b16_130 : NVPTXReg<"%v2b16_130">;
> +def v2b16_131 : NVPTXReg<"%v2b16_131">;
> +def v2b16_132 : NVPTXReg<"%v2b16_132">;
> +def v2b16_133 : NVPTXReg<"%v2b16_133">;
> +def v2b16_134 : NVPTXReg<"%v2b16_134">;
> +def v2b16_135 : NVPTXReg<"%v2b16_135">;
> +def v2b16_136 : NVPTXReg<"%v2b16_136">;
> +def v2b16_137 : NVPTXReg<"%v2b16_137">;
> +def v2b16_138 : NVPTXReg<"%v2b16_138">;
> +def v2b16_139 : NVPTXReg<"%v2b16_139">;
> +def v2b16_140 : NVPTXReg<"%v2b16_140">;
> +def v2b16_141 : NVPTXReg<"%v2b16_141">;
> +def v2b16_142 : NVPTXReg<"%v2b16_142">;
> +def v2b16_143 : NVPTXReg<"%v2b16_143">;
> +def v2b16_144 : NVPTXReg<"%v2b16_144">;
> +def v2b16_145 : NVPTXReg<"%v2b16_145">;
> +def v2b16_146 : NVPTXReg<"%v2b16_146">;
> +def v2b16_147 : NVPTXReg<"%v2b16_147">;
> +def v2b16_148 : NVPTXReg<"%v2b16_148">;
> +def v2b16_149 : NVPTXReg<"%v2b16_149">;
> +def v2b16_150 : NVPTXReg<"%v2b16_150">;
> +def v2b16_151 : NVPTXReg<"%v2b16_151">;
> +def v2b16_152 : NVPTXReg<"%v2b16_152">;
> +def v2b16_153 : NVPTXReg<"%v2b16_153">;
> +def v2b16_154 : NVPTXReg<"%v2b16_154">;
> +def v2b16_155 : NVPTXReg<"%v2b16_155">;
> +def v2b16_156 : NVPTXReg<"%v2b16_156">;
> +def v2b16_157 : NVPTXReg<"%v2b16_157">;
> +def v2b16_158 : NVPTXReg<"%v2b16_158">;
> +def v2b16_159 : NVPTXReg<"%v2b16_159">;
> +def v2b16_160 : NVPTXReg<"%v2b16_160">;
> +def v2b16_161 : NVPTXReg<"%v2b16_161">;
> +def v2b16_162 : NVPTXReg<"%v2b16_162">;
> +def v2b16_163 : NVPTXReg<"%v2b16_163">;
> +def v2b16_164 : NVPTXReg<"%v2b16_164">;
> +def v2b16_165 : NVPTXReg<"%v2b16_165">;
> +def v2b16_166 : NVPTXReg<"%v2b16_166">;
> +def v2b16_167 : NVPTXReg<"%v2b16_167">;
> +def v2b16_168 : NVPTXReg<"%v2b16_168">;
> +def v2b16_169 : NVPTXReg<"%v2b16_169">;
> +def v2b16_170 : NVPTXReg<"%v2b16_170">;
> +def v2b16_171 : NVPTXReg<"%v2b16_171">;
> +def v2b16_172 : NVPTXReg<"%v2b16_172">;
> +def v2b16_173 : NVPTXReg<"%v2b16_173">;
> +def v2b16_174 : NVPTXReg<"%v2b16_174">;
> +def v2b16_175 : NVPTXReg<"%v2b16_175">;
> +def v2b16_176 : NVPTXReg<"%v2b16_176">;
> +def v2b16_177 : NVPTXReg<"%v2b16_177">;
> +def v2b16_178 : NVPTXReg<"%v2b16_178">;
> +def v2b16_179 : NVPTXReg<"%v2b16_179">;
> +def v2b16_180 : NVPTXReg<"%v2b16_180">;
> +def v2b16_181 : NVPTXReg<"%v2b16_181">;
> +def v2b16_182 : NVPTXReg<"%v2b16_182">;
> +def v2b16_183 : NVPTXReg<"%v2b16_183">;
> +def v2b16_184 : NVPTXReg<"%v2b16_184">;
> +def v2b16_185 : NVPTXReg<"%v2b16_185">;
> +def v2b16_186 : NVPTXReg<"%v2b16_186">;
> +def v2b16_187 : NVPTXReg<"%v2b16_187">;
> +def v2b16_188 : NVPTXReg<"%v2b16_188">;
> +def v2b16_189 : NVPTXReg<"%v2b16_189">;
> +def v2b16_190 : NVPTXReg<"%v2b16_190">;
> +def v2b16_191 : NVPTXReg<"%v2b16_191">;
> +def v2b16_192 : NVPTXReg<"%v2b16_192">;
> +def v2b16_193 : NVPTXReg<"%v2b16_193">;
> +def v2b16_194 : NVPTXReg<"%v2b16_194">;
> +def v2b16_195 : NVPTXReg<"%v2b16_195">;
> +def v2b16_196 : NVPTXReg<"%v2b16_196">;
> +def v2b16_197 : NVPTXReg<"%v2b16_197">;
> +def v2b16_198 : NVPTXReg<"%v2b16_198">;
> +def v2b16_199 : NVPTXReg<"%v2b16_199">;
> +def v2b16_200 : NVPTXReg<"%v2b16_200">;
> +def v2b16_201 : NVPTXReg<"%v2b16_201">;
> +def v2b16_202 : NVPTXReg<"%v2b16_202">;
> +def v2b16_203 : NVPTXReg<"%v2b16_203">;
> +def v2b16_204 : NVPTXReg<"%v2b16_204">;
> +def v2b16_205 : NVPTXReg<"%v2b16_205">;
> +def v2b16_206 : NVPTXReg<"%v2b16_206">;
> +def v2b16_207 : NVPTXReg<"%v2b16_207">;
> +def v2b16_208 : NVPTXReg<"%v2b16_208">;
> +def v2b16_209 : NVPTXReg<"%v2b16_209">;
> +def v2b16_210 : NVPTXReg<"%v2b16_210">;
> +def v2b16_211 : NVPTXReg<"%v2b16_211">;
> +def v2b16_212 : NVPTXReg<"%v2b16_212">;
> +def v2b16_213 : NVPTXReg<"%v2b16_213">;
> +def v2b16_214 : NVPTXReg<"%v2b16_214">;
> +def v2b16_215 : NVPTXReg<"%v2b16_215">;
> +def v2b16_216 : NVPTXReg<"%v2b16_216">;
> +def v2b16_217 : NVPTXReg<"%v2b16_217">;
> +def v2b16_218 : NVPTXReg<"%v2b16_218">;
> +def v2b16_219 : NVPTXReg<"%v2b16_219">;
> +def v2b16_220 : NVPTXReg<"%v2b16_220">;
> +def v2b16_221 : NVPTXReg<"%v2b16_221">;
> +def v2b16_222 : NVPTXReg<"%v2b16_222">;
> +def v2b16_223 : NVPTXReg<"%v2b16_223">;
> +def v2b16_224 : NVPTXReg<"%v2b16_224">;
> +def v2b16_225 : NVPTXReg<"%v2b16_225">;
> +def v2b16_226 : NVPTXReg<"%v2b16_226">;
> +def v2b16_227 : NVPTXReg<"%v2b16_227">;
> +def v2b16_228 : NVPTXReg<"%v2b16_228">;
> +def v2b16_229 : NVPTXReg<"%v2b16_229">;
> +def v2b16_230 : NVPTXReg<"%v2b16_230">;
> +def v2b16_231 : NVPTXReg<"%v2b16_231">;
> +def v2b16_232 : NVPTXReg<"%v2b16_232">;
> +def v2b16_233 : NVPTXReg<"%v2b16_233">;
> +def v2b16_234 : NVPTXReg<"%v2b16_234">;
> +def v2b16_235 : NVPTXReg<"%v2b16_235">;
> +def v2b16_236 : NVPTXReg<"%v2b16_236">;
> +def v2b16_237 : NVPTXReg<"%v2b16_237">;
> +def v2b16_238 : NVPTXReg<"%v2b16_238">;
> +def v2b16_239 : NVPTXReg<"%v2b16_239">;
> +def v2b16_240 : NVPTXReg<"%v2b16_240">;
> +def v2b16_241 : NVPTXReg<"%v2b16_241">;
> +def v2b16_242 : NVPTXReg<"%v2b16_242">;
> +def v2b16_243 : NVPTXReg<"%v2b16_243">;
> +def v2b16_244 : NVPTXReg<"%v2b16_244">;
> +def v2b16_245 : NVPTXReg<"%v2b16_245">;
> +def v2b16_246 : NVPTXReg<"%v2b16_246">;
> +def v2b16_247 : NVPTXReg<"%v2b16_247">;
> +def v2b16_248 : NVPTXReg<"%v2b16_248">;
> +def v2b16_249 : NVPTXReg<"%v2b16_249">;
> +def v2b16_250 : NVPTXReg<"%v2b16_250">;
> +def v2b16_251 : NVPTXReg<"%v2b16_251">;
> +def v2b16_252 : NVPTXReg<"%v2b16_252">;
> +def v2b16_253 : NVPTXReg<"%v2b16_253">;
> +def v2b16_254 : NVPTXReg<"%v2b16_254">;
> +def v2b16_255 : NVPTXReg<"%v2b16_255">;
> +def v2b16_256 : NVPTXReg<"%v2b16_256">;
> +def v2b16_257 : NVPTXReg<"%v2b16_257">;
> +def v2b16_258 : NVPTXReg<"%v2b16_258">;
> +def v2b16_259 : NVPTXReg<"%v2b16_259">;
> +def v2b16_260 : NVPTXReg<"%v2b16_260">;
> +def v2b16_261 : NVPTXReg<"%v2b16_261">;
> +def v2b16_262 : NVPTXReg<"%v2b16_262">;
> +def v2b16_263 : NVPTXReg<"%v2b16_263">;
> +def v2b16_264 : NVPTXReg<"%v2b16_264">;
> +def v2b16_265 : NVPTXReg<"%v2b16_265">;
> +def v2b16_266 : NVPTXReg<"%v2b16_266">;
> +def v2b16_267 : NVPTXReg<"%v2b16_267">;
> +def v2b16_268 : NVPTXReg<"%v2b16_268">;
> +def v2b16_269 : NVPTXReg<"%v2b16_269">;
> +def v2b16_270 : NVPTXReg<"%v2b16_270">;
> +def v2b16_271 : NVPTXReg<"%v2b16_271">;
> +def v2b16_272 : NVPTXReg<"%v2b16_272">;
> +def v2b16_273 : NVPTXReg<"%v2b16_273">;
> +def v2b16_274 : NVPTXReg<"%v2b16_274">;
> +def v2b16_275 : NVPTXReg<"%v2b16_275">;
> +def v2b16_276 : NVPTXReg<"%v2b16_276">;
> +def v2b16_277 : NVPTXReg<"%v2b16_277">;
> +def v2b16_278 : NVPTXReg<"%v2b16_278">;
> +def v2b16_279 : NVPTXReg<"%v2b16_279">;
> +def v2b16_280 : NVPTXReg<"%v2b16_280">;
> +def v2b16_281 : NVPTXReg<"%v2b16_281">;
> +def v2b16_282 : NVPTXReg<"%v2b16_282">;
> +def v2b16_283 : NVPTXReg<"%v2b16_283">;
> +def v2b16_284 : NVPTXReg<"%v2b16_284">;
> +def v2b16_285 : NVPTXReg<"%v2b16_285">;
> +def v2b16_286 : NVPTXReg<"%v2b16_286">;
> +def v2b16_287 : NVPTXReg<"%v2b16_287">;
> +def v2b16_288 : NVPTXReg<"%v2b16_288">;
> +def v2b16_289 : NVPTXReg<"%v2b16_289">;
> +def v2b16_290 : NVPTXReg<"%v2b16_290">;
> +def v2b16_291 : NVPTXReg<"%v2b16_291">;
> +def v2b16_292 : NVPTXReg<"%v2b16_292">;
> +def v2b16_293 : NVPTXReg<"%v2b16_293">;
> +def v2b16_294 : NVPTXReg<"%v2b16_294">;
> +def v2b16_295 : NVPTXReg<"%v2b16_295">;
> +def v2b16_296 : NVPTXReg<"%v2b16_296">;
> +def v2b16_297 : NVPTXReg<"%v2b16_297">;
> +def v2b16_298 : NVPTXReg<"%v2b16_298">;
> +def v2b16_299 : NVPTXReg<"%v2b16_299">;
> +def v2b16_300 : NVPTXReg<"%v2b16_300">;
> +def v2b16_301 : NVPTXReg<"%v2b16_301">;
> +def v2b16_302 : NVPTXReg<"%v2b16_302">;
> +def v2b16_303 : NVPTXReg<"%v2b16_303">;
> +def v2b16_304 : NVPTXReg<"%v2b16_304">;
> +def v2b16_305 : NVPTXReg<"%v2b16_305">;
> +def v2b16_306 : NVPTXReg<"%v2b16_306">;
> +def v2b16_307 : NVPTXReg<"%v2b16_307">;
> +def v2b16_308 : NVPTXReg<"%v2b16_308">;
> +def v2b16_309 : NVPTXReg<"%v2b16_309">;
> +def v2b16_310 : NVPTXReg<"%v2b16_310">;
> +def v2b16_311 : NVPTXReg<"%v2b16_311">;
> +def v2b16_312 : NVPTXReg<"%v2b16_312">;
> +def v2b16_313 : NVPTXReg<"%v2b16_313">;
> +def v2b16_314 : NVPTXReg<"%v2b16_314">;
> +def v2b16_315 : NVPTXReg<"%v2b16_315">;
> +def v2b16_316 : NVPTXReg<"%v2b16_316">;
> +def v2b16_317 : NVPTXReg<"%v2b16_317">;
> +def v2b16_318 : NVPTXReg<"%v2b16_318">;
> +def v2b16_319 : NVPTXReg<"%v2b16_319">;
> +def v2b16_320 : NVPTXReg<"%v2b16_320">;
> +def v2b16_321 : NVPTXReg<"%v2b16_321">;
> +def v2b16_322 : NVPTXReg<"%v2b16_322">;
> +def v2b16_323 : NVPTXReg<"%v2b16_323">;
> +def v2b16_324 : NVPTXReg<"%v2b16_324">;
> +def v2b16_325 : NVPTXReg<"%v2b16_325">;
> +def v2b16_326 : NVPTXReg<"%v2b16_326">;
> +def v2b16_327 : NVPTXReg<"%v2b16_327">;
> +def v2b16_328 : NVPTXReg<"%v2b16_328">;
> +def v2b16_329 : NVPTXReg<"%v2b16_329">;
> +def v2b16_330 : NVPTXReg<"%v2b16_330">;
> +def v2b16_331 : NVPTXReg<"%v2b16_331">;
> +def v2b16_332 : NVPTXReg<"%v2b16_332">;
> +def v2b16_333 : NVPTXReg<"%v2b16_333">;
> +def v2b16_334 : NVPTXReg<"%v2b16_334">;
> +def v2b16_335 : NVPTXReg<"%v2b16_335">;
> +def v2b16_336 : NVPTXReg<"%v2b16_336">;
> +def v2b16_337 : NVPTXReg<"%v2b16_337">;
> +def v2b16_338 : NVPTXReg<"%v2b16_338">;
> +def v2b16_339 : NVPTXReg<"%v2b16_339">;
> +def v2b16_340 : NVPTXReg<"%v2b16_340">;
> +def v2b16_341 : NVPTXReg<"%v2b16_341">;
> +def v2b16_342 : NVPTXReg<"%v2b16_342">;
> +def v2b16_343 : NVPTXReg<"%v2b16_343">;
> +def v2b16_344 : NVPTXReg<"%v2b16_344">;
> +def v2b16_345 : NVPTXReg<"%v2b16_345">;
> +def v2b16_346 : NVPTXReg<"%v2b16_346">;
> +def v2b16_347 : NVPTXReg<"%v2b16_347">;
> +def v2b16_348 : NVPTXReg<"%v2b16_348">;
> +def v2b16_349 : NVPTXReg<"%v2b16_349">;
> +def v2b16_350 : NVPTXReg<"%v2b16_350">;
> +def v2b16_351 : NVPTXReg<"%v2b16_351">;
> +def v2b16_352 : NVPTXReg<"%v2b16_352">;
> +def v2b16_353 : NVPTXReg<"%v2b16_353">;
> +def v2b16_354 : NVPTXReg<"%v2b16_354">;
> +def v2b16_355 : NVPTXReg<"%v2b16_355">;
> +def v2b16_356 : NVPTXReg<"%v2b16_356">;
> +def v2b16_357 : NVPTXReg<"%v2b16_357">;
> +def v2b16_358 : NVPTXReg<"%v2b16_358">;
> +def v2b16_359 : NVPTXReg<"%v2b16_359">;
> +def v2b16_360 : NVPTXReg<"%v2b16_360">;
> +def v2b16_361 : NVPTXReg<"%v2b16_361">;
> +def v2b16_362 : NVPTXReg<"%v2b16_362">;
> +def v2b16_363 : NVPTXReg<"%v2b16_363">;
> +def v2b16_364 : NVPTXReg<"%v2b16_364">;
> +def v2b16_365 : NVPTXReg<"%v2b16_365">;
> +def v2b16_366 : NVPTXReg<"%v2b16_366">;
> +def v2b16_367 : NVPTXReg<"%v2b16_367">;
> +def v2b16_368 : NVPTXReg<"%v2b16_368">;
> +def v2b16_369 : NVPTXReg<"%v2b16_369">;
> +def v2b16_370 : NVPTXReg<"%v2b16_370">;
> +def v2b16_371 : NVPTXReg<"%v2b16_371">;
> +def v2b16_372 : NVPTXReg<"%v2b16_372">;
> +def v2b16_373 : NVPTXReg<"%v2b16_373">;
> +def v2b16_374 : NVPTXReg<"%v2b16_374">;
> +def v2b16_375 : NVPTXReg<"%v2b16_375">;
> +def v2b16_376 : NVPTXReg<"%v2b16_376">;
> +def v2b16_377 : NVPTXReg<"%v2b16_377">;
> +def v2b16_378 : NVPTXReg<"%v2b16_378">;
> +def v2b16_379 : NVPTXReg<"%v2b16_379">;
> +def v2b16_380 : NVPTXReg<"%v2b16_380">;
> +def v2b16_381 : NVPTXReg<"%v2b16_381">;
> +def v2b16_382 : NVPTXReg<"%v2b16_382">;
> +def v2b16_383 : NVPTXReg<"%v2b16_383">;
> +def v2b16_384 : NVPTXReg<"%v2b16_384">;
> +def v2b16_385 : NVPTXReg<"%v2b16_385">;
> +def v2b16_386 : NVPTXReg<"%v2b16_386">;
> +def v2b16_387 : NVPTXReg<"%v2b16_387">;
> +def v2b16_388 : NVPTXReg<"%v2b16_388">;
> +def v2b16_389 : NVPTXReg<"%v2b16_389">;
> +def v2b16_390 : NVPTXReg<"%v2b16_390">;
> +def v2b16_391 : NVPTXReg<"%v2b16_391">;
> +def v2b16_392 : NVPTXReg<"%v2b16_392">;
> +def v2b16_393 : NVPTXReg<"%v2b16_393">;
> +def v2b16_394 : NVPTXReg<"%v2b16_394">;
> +def v2b16_395 : NVPTXReg<"%v2b16_395">;
> +def v2b32_0 : NVPTXReg<"%v2b32_0">;
> +def v2b32_1 : NVPTXReg<"%v2b32_1">;
> +def v2b32_2 : NVPTXReg<"%v2b32_2">;
> +def v2b32_3 : NVPTXReg<"%v2b32_3">;
> +def v2b32_4 : NVPTXReg<"%v2b32_4">;
> +def v2b32_5 : NVPTXReg<"%v2b32_5">;
> +def v2b32_6 : NVPTXReg<"%v2b32_6">;
> +def v2b32_7 : NVPTXReg<"%v2b32_7">;
> +def v2b32_8 : NVPTXReg<"%v2b32_8">;
> +def v2b32_9 : NVPTXReg<"%v2b32_9">;
> +def v2b32_10 : NVPTXReg<"%v2b32_10">;
> +def v2b32_11 : NVPTXReg<"%v2b32_11">;
> +def v2b32_12 : NVPTXReg<"%v2b32_12">;
> +def v2b32_13 : NVPTXReg<"%v2b32_13">;
> +def v2b32_14 : NVPTXReg<"%v2b32_14">;
> +def v2b32_15 : NVPTXReg<"%v2b32_15">;
> +def v2b32_16 : NVPTXReg<"%v2b32_16">;
> +def v2b32_17 : NVPTXReg<"%v2b32_17">;
> +def v2b32_18 : NVPTXReg<"%v2b32_18">;
> +def v2b32_19 : NVPTXReg<"%v2b32_19">;
> +def v2b32_20 : NVPTXReg<"%v2b32_20">;
> +def v2b32_21 : NVPTXReg<"%v2b32_21">;
> +def v2b32_22 : NVPTXReg<"%v2b32_22">;
> +def v2b32_23 : NVPTXReg<"%v2b32_23">;
> +def v2b32_24 : NVPTXReg<"%v2b32_24">;
> +def v2b32_25 : NVPTXReg<"%v2b32_25">;
> +def v2b32_26 : NVPTXReg<"%v2b32_26">;
> +def v2b32_27 : NVPTXReg<"%v2b32_27">;
> +def v2b32_28 : NVPTXReg<"%v2b32_28">;
> +def v2b32_29 : NVPTXReg<"%v2b32_29">;
> +def v2b32_30 : NVPTXReg<"%v2b32_30">;
> +def v2b32_31 : NVPTXReg<"%v2b32_31">;
> +def v2b32_32 : NVPTXReg<"%v2b32_32">;
> +def v2b32_33 : NVPTXReg<"%v2b32_33">;
> +def v2b32_34 : NVPTXReg<"%v2b32_34">;
> +def v2b32_35 : NVPTXReg<"%v2b32_35">;
> +def v2b32_36 : NVPTXReg<"%v2b32_36">;
> +def v2b32_37 : NVPTXReg<"%v2b32_37">;
> +def v2b32_38 : NVPTXReg<"%v2b32_38">;
> +def v2b32_39 : NVPTXReg<"%v2b32_39">;
> +def v2b32_40 : NVPTXReg<"%v2b32_40">;
> +def v2b32_41 : NVPTXReg<"%v2b32_41">;
> +def v2b32_42 : NVPTXReg<"%v2b32_42">;
> +def v2b32_43 : NVPTXReg<"%v2b32_43">;
> +def v2b32_44 : NVPTXReg<"%v2b32_44">;
> +def v2b32_45 : NVPTXReg<"%v2b32_45">;
> +def v2b32_46 : NVPTXReg<"%v2b32_46">;
> +def v2b32_47 : NVPTXReg<"%v2b32_47">;
> +def v2b32_48 : NVPTXReg<"%v2b32_48">;
> +def v2b32_49 : NVPTXReg<"%v2b32_49">;
> +def v2b32_50 : NVPTXReg<"%v2b32_50">;
> +def v2b32_51 : NVPTXReg<"%v2b32_51">;
> +def v2b32_52 : NVPTXReg<"%v2b32_52">;
> +def v2b32_53 : NVPTXReg<"%v2b32_53">;
> +def v2b32_54 : NVPTXReg<"%v2b32_54">;
> +def v2b32_55 : NVPTXReg<"%v2b32_55">;
> +def v2b32_56 : NVPTXReg<"%v2b32_56">;
> +def v2b32_57 : NVPTXReg<"%v2b32_57">;
> +def v2b32_58 : NVPTXReg<"%v2b32_58">;
> +def v2b32_59 : NVPTXReg<"%v2b32_59">;
> +def v2b32_60 : NVPTXReg<"%v2b32_60">;
> +def v2b32_61 : NVPTXReg<"%v2b32_61">;
> +def v2b32_62 : NVPTXReg<"%v2b32_62">;
> +def v2b32_63 : NVPTXReg<"%v2b32_63">;
> +def v2b32_64 : NVPTXReg<"%v2b32_64">;
> +def v2b32_65 : NVPTXReg<"%v2b32_65">;
> +def v2b32_66 : NVPTXReg<"%v2b32_66">;
> +def v2b32_67 : NVPTXReg<"%v2b32_67">;
> +def v2b32_68 : NVPTXReg<"%v2b32_68">;
> +def v2b32_69 : NVPTXReg<"%v2b32_69">;
> +def v2b32_70 : NVPTXReg<"%v2b32_70">;
> +def v2b32_71 : NVPTXReg<"%v2b32_71">;
> +def v2b32_72 : NVPTXReg<"%v2b32_72">;
> +def v2b32_73 : NVPTXReg<"%v2b32_73">;
> +def v2b32_74 : NVPTXReg<"%v2b32_74">;
> +def v2b32_75 : NVPTXReg<"%v2b32_75">;
> +def v2b32_76 : NVPTXReg<"%v2b32_76">;
> +def v2b32_77 : NVPTXReg<"%v2b32_77">;
> +def v2b32_78 : NVPTXReg<"%v2b32_78">;
> +def v2b32_79 : NVPTXReg<"%v2b32_79">;
> +def v2b32_80 : NVPTXReg<"%v2b32_80">;
> +def v2b32_81 : NVPTXReg<"%v2b32_81">;
> +def v2b32_82 : NVPTXReg<"%v2b32_82">;
> +def v2b32_83 : NVPTXReg<"%v2b32_83">;
> +def v2b32_84 : NVPTXReg<"%v2b32_84">;
> +def v2b32_85 : NVPTXReg<"%v2b32_85">;
> +def v2b32_86 : NVPTXReg<"%v2b32_86">;
> +def v2b32_87 : NVPTXReg<"%v2b32_87">;
> +def v2b32_88 : NVPTXReg<"%v2b32_88">;
> +def v2b32_89 : NVPTXReg<"%v2b32_89">;
> +def v2b32_90 : NVPTXReg<"%v2b32_90">;
> +def v2b32_91 : NVPTXReg<"%v2b32_91">;
> +def v2b32_92 : NVPTXReg<"%v2b32_92">;
> +def v2b32_93 : NVPTXReg<"%v2b32_93">;
> +def v2b32_94 : NVPTXReg<"%v2b32_94">;
> +def v2b32_95 : NVPTXReg<"%v2b32_95">;
> +def v2b32_96 : NVPTXReg<"%v2b32_96">;
> +def v2b32_97 : NVPTXReg<"%v2b32_97">;
> +def v2b32_98 : NVPTXReg<"%v2b32_98">;
> +def v2b32_99 : NVPTXReg<"%v2b32_99">;
> +def v2b32_100 : NVPTXReg<"%v2b32_100">;
> +def v2b32_101 : NVPTXReg<"%v2b32_101">;
> +def v2b32_102 : NVPTXReg<"%v2b32_102">;
> +def v2b32_103 : NVPTXReg<"%v2b32_103">;
> +def v2b32_104 : NVPTXReg<"%v2b32_104">;
> +def v2b32_105 : NVPTXReg<"%v2b32_105">;
> +def v2b32_106 : NVPTXReg<"%v2b32_106">;
> +def v2b32_107 : NVPTXReg<"%v2b32_107">;
> +def v2b32_108 : NVPTXReg<"%v2b32_108">;
> +def v2b32_109 : NVPTXReg<"%v2b32_109">;
> +def v2b32_110 : NVPTXReg<"%v2b32_110">;
> +def v2b32_111 : NVPTXReg<"%v2b32_111">;
> +def v2b32_112 : NVPTXReg<"%v2b32_112">;
> +def v2b32_113 : NVPTXReg<"%v2b32_113">;
> +def v2b32_114 : NVPTXReg<"%v2b32_114">;
> +def v2b32_115 : NVPTXReg<"%v2b32_115">;
> +def v2b32_116 : NVPTXReg<"%v2b32_116">;
> +def v2b32_117 : NVPTXReg<"%v2b32_117">;
> +def v2b32_118 : NVPTXReg<"%v2b32_118">;
> +def v2b32_119 : NVPTXReg<"%v2b32_119">;
> +def v2b32_120 : NVPTXReg<"%v2b32_120">;
> +def v2b32_121 : NVPTXReg<"%v2b32_121">;
> +def v2b32_122 : NVPTXReg<"%v2b32_122">;
> +def v2b32_123 : NVPTXReg<"%v2b32_123">;
> +def v2b32_124 : NVPTXReg<"%v2b32_124">;
> +def v2b32_125 : NVPTXReg<"%v2b32_125">;
> +def v2b32_126 : NVPTXReg<"%v2b32_126">;
> +def v2b32_127 : NVPTXReg<"%v2b32_127">;
> +def v2b32_128 : NVPTXReg<"%v2b32_128">;
> +def v2b32_129 : NVPTXReg<"%v2b32_129">;
> +def v2b32_130 : NVPTXReg<"%v2b32_130">;
> +def v2b32_131 : NVPTXReg<"%v2b32_131">;
> +def v2b32_132 : NVPTXReg<"%v2b32_132">;
> +def v2b32_133 : NVPTXReg<"%v2b32_133">;
> +def v2b32_134 : NVPTXReg<"%v2b32_134">;
> +def v2b32_135 : NVPTXReg<"%v2b32_135">;
> +def v2b32_136 : NVPTXReg<"%v2b32_136">;
> +def v2b32_137 : NVPTXReg<"%v2b32_137">;
> +def v2b32_138 : NVPTXReg<"%v2b32_138">;
> +def v2b32_139 : NVPTXReg<"%v2b32_139">;
> +def v2b32_140 : NVPTXReg<"%v2b32_140">;
> +def v2b32_141 : NVPTXReg<"%v2b32_141">;
> +def v2b32_142 : NVPTXReg<"%v2b32_142">;
> +def v2b32_143 : NVPTXReg<"%v2b32_143">;
> +def v2b32_144 : NVPTXReg<"%v2b32_144">;
> +def v2b32_145 : NVPTXReg<"%v2b32_145">;
> +def v2b32_146 : NVPTXReg<"%v2b32_146">;
> +def v2b32_147 : NVPTXReg<"%v2b32_147">;
> +def v2b32_148 : NVPTXReg<"%v2b32_148">;
> +def v2b32_149 : NVPTXReg<"%v2b32_149">;
> +def v2b32_150 : NVPTXReg<"%v2b32_150">;
> +def v2b32_151 : NVPTXReg<"%v2b32_151">;
> +def v2b32_152 : NVPTXReg<"%v2b32_152">;
> +def v2b32_153 : NVPTXReg<"%v2b32_153">;
> +def v2b32_154 : NVPTXReg<"%v2b32_154">;
> +def v2b32_155 : NVPTXReg<"%v2b32_155">;
> +def v2b32_156 : NVPTXReg<"%v2b32_156">;
> +def v2b32_157 : NVPTXReg<"%v2b32_157">;
> +def v2b32_158 : NVPTXReg<"%v2b32_158">;
> +def v2b32_159 : NVPTXReg<"%v2b32_159">;
> +def v2b32_160 : NVPTXReg<"%v2b32_160">;
> +def v2b32_161 : NVPTXReg<"%v2b32_161">;
> +def v2b32_162 : NVPTXReg<"%v2b32_162">;
> +def v2b32_163 : NVPTXReg<"%v2b32_163">;
> +def v2b32_164 : NVPTXReg<"%v2b32_164">;
> +def v2b32_165 : NVPTXReg<"%v2b32_165">;
> +def v2b32_166 : NVPTXReg<"%v2b32_166">;
> +def v2b32_167 : NVPTXReg<"%v2b32_167">;
> +def v2b32_168 : NVPTXReg<"%v2b32_168">;
> +def v2b32_169 : NVPTXReg<"%v2b32_169">;
> +def v2b32_170 : NVPTXReg<"%v2b32_170">;
> +def v2b32_171 : NVPTXReg<"%v2b32_171">;
> +def v2b32_172 : NVPTXReg<"%v2b32_172">;
> +def v2b32_173 : NVPTXReg<"%v2b32_173">;
> +def v2b32_174 : NVPTXReg<"%v2b32_174">;
> +def v2b32_175 : NVPTXReg<"%v2b32_175">;
> +def v2b32_176 : NVPTXReg<"%v2b32_176">;
> +def v2b32_177 : NVPTXReg<"%v2b32_177">;
> +def v2b32_178 : NVPTXReg<"%v2b32_178">;
> +def v2b32_179 : NVPTXReg<"%v2b32_179">;
> +def v2b32_180 : NVPTXReg<"%v2b32_180">;
> +def v2b32_181 : NVPTXReg<"%v2b32_181">;
> +def v2b32_182 : NVPTXReg<"%v2b32_182">;
> +def v2b32_183 : NVPTXReg<"%v2b32_183">;
> +def v2b32_184 : NVPTXReg<"%v2b32_184">;
> +def v2b32_185 : NVPTXReg<"%v2b32_185">;
> +def v2b32_186 : NVPTXReg<"%v2b32_186">;
> +def v2b32_187 : NVPTXReg<"%v2b32_187">;
> +def v2b32_188 : NVPTXReg<"%v2b32_188">;
> +def v2b32_189 : NVPTXReg<"%v2b32_189">;
> +def v2b32_190 : NVPTXReg<"%v2b32_190">;
> +def v2b32_191 : NVPTXReg<"%v2b32_191">;
> +def v2b32_192 : NVPTXReg<"%v2b32_192">;
> +def v2b32_193 : NVPTXReg<"%v2b32_193">;
> +def v2b32_194 : NVPTXReg<"%v2b32_194">;
> +def v2b32_195 : NVPTXReg<"%v2b32_195">;
> +def v2b32_196 : NVPTXReg<"%v2b32_196">;
> +def v2b32_197 : NVPTXReg<"%v2b32_197">;
> +def v2b32_198 : NVPTXReg<"%v2b32_198">;
> +def v2b32_199 : NVPTXReg<"%v2b32_199">;
> +def v2b32_200 : NVPTXReg<"%v2b32_200">;
> +def v2b32_201 : NVPTXReg<"%v2b32_201">;
> +def v2b32_202 : NVPTXReg<"%v2b32_202">;
> +def v2b32_203 : NVPTXReg<"%v2b32_203">;
> +def v2b32_204 : NVPTXReg<"%v2b32_204">;
> +def v2b32_205 : NVPTXReg<"%v2b32_205">;
> +def v2b32_206 : NVPTXReg<"%v2b32_206">;
> +def v2b32_207 : NVPTXReg<"%v2b32_207">;
> +def v2b32_208 : NVPTXReg<"%v2b32_208">;
> +def v2b32_209 : NVPTXReg<"%v2b32_209">;
> +def v2b32_210 : NVPTXReg<"%v2b32_210">;
> +def v2b32_211 : NVPTXReg<"%v2b32_211">;
> +def v2b32_212 : NVPTXReg<"%v2b32_212">;
> +def v2b32_213 : NVPTXReg<"%v2b32_213">;
> +def v2b32_214 : NVPTXReg<"%v2b32_214">;
> +def v2b32_215 : NVPTXReg<"%v2b32_215">;
> +def v2b32_216 : NVPTXReg<"%v2b32_216">;
> +def v2b32_217 : NVPTXReg<"%v2b32_217">;
> +def v2b32_218 : NVPTXReg<"%v2b32_218">;
> +def v2b32_219 : NVPTXReg<"%v2b32_219">;
> +def v2b32_220 : NVPTXReg<"%v2b32_220">;
> +def v2b32_221 : NVPTXReg<"%v2b32_221">;
> +def v2b32_222 : NVPTXReg<"%v2b32_222">;
> +def v2b32_223 : NVPTXReg<"%v2b32_223">;
> +def v2b32_224 : NVPTXReg<"%v2b32_224">;
> +def v2b32_225 : NVPTXReg<"%v2b32_225">;
> +def v2b32_226 : NVPTXReg<"%v2b32_226">;
> +def v2b32_227 : NVPTXReg<"%v2b32_227">;
> +def v2b32_228 : NVPTXReg<"%v2b32_228">;
> +def v2b32_229 : NVPTXReg<"%v2b32_229">;
> +def v2b32_230 : NVPTXReg<"%v2b32_230">;
> +def v2b32_231 : NVPTXReg<"%v2b32_231">;
> +def v2b32_232 : NVPTXReg<"%v2b32_232">;
> +def v2b32_233 : NVPTXReg<"%v2b32_233">;
> +def v2b32_234 : NVPTXReg<"%v2b32_234">;
> +def v2b32_235 : NVPTXReg<"%v2b32_235">;
> +def v2b32_236 : NVPTXReg<"%v2b32_236">;
> +def v2b32_237 : NVPTXReg<"%v2b32_237">;
> +def v2b32_238 : NVPTXReg<"%v2b32_238">;
> +def v2b32_239 : NVPTXReg<"%v2b32_239">;
> +def v2b32_240 : NVPTXReg<"%v2b32_240">;
> +def v2b32_241 : NVPTXReg<"%v2b32_241">;
> +def v2b32_242 : NVPTXReg<"%v2b32_242">;
> +def v2b32_243 : NVPTXReg<"%v2b32_243">;
> +def v2b32_244 : NVPTXReg<"%v2b32_244">;
> +def v2b32_245 : NVPTXReg<"%v2b32_245">;
> +def v2b32_246 : NVPTXReg<"%v2b32_246">;
> +def v2b32_247 : NVPTXReg<"%v2b32_247">;
> +def v2b32_248 : NVPTXReg<"%v2b32_248">;
> +def v2b32_249 : NVPTXReg<"%v2b32_249">;
> +def v2b32_250 : NVPTXReg<"%v2b32_250">;
> +def v2b32_251 : NVPTXReg<"%v2b32_251">;
> +def v2b32_252 : NVPTXReg<"%v2b32_252">;
> +def v2b32_253 : NVPTXReg<"%v2b32_253">;
> +def v2b32_254 : NVPTXReg<"%v2b32_254">;
> +def v2b32_255 : NVPTXReg<"%v2b32_255">;
> +def v2b32_256 : NVPTXReg<"%v2b32_256">;
> +def v2b32_257 : NVPTXReg<"%v2b32_257">;
> +def v2b32_258 : NVPTXReg<"%v2b32_258">;
> +def v2b32_259 : NVPTXReg<"%v2b32_259">;
> +def v2b32_260 : NVPTXReg<"%v2b32_260">;
> +def v2b32_261 : NVPTXReg<"%v2b32_261">;
> +def v2b32_262 : NVPTXReg<"%v2b32_262">;
> +def v2b32_263 : NVPTXReg<"%v2b32_263">;
> +def v2b32_264 : NVPTXReg<"%v2b32_264">;
> +def v2b32_265 : NVPTXReg<"%v2b32_265">;
> +def v2b32_266 : NVPTXReg<"%v2b32_266">;
> +def v2b32_267 : NVPTXReg<"%v2b32_267">;
> +def v2b32_268 : NVPTXReg<"%v2b32_268">;
> +def v2b32_269 : NVPTXReg<"%v2b32_269">;
> +def v2b32_270 : NVPTXReg<"%v2b32_270">;
> +def v2b32_271 : NVPTXReg<"%v2b32_271">;
> +def v2b32_272 : NVPTXReg<"%v2b32_272">;
> +def v2b32_273 : NVPTXReg<"%v2b32_273">;
> +def v2b32_274 : NVPTXReg<"%v2b32_274">;
> +def v2b32_275 : NVPTXReg<"%v2b32_275">;
> +def v2b32_276 : NVPTXReg<"%v2b32_276">;
> +def v2b32_277 : NVPTXReg<"%v2b32_277">;
> +def v2b32_278 : NVPTXReg<"%v2b32_278">;
> +def v2b32_279 : NVPTXReg<"%v2b32_279">;
> +def v2b32_280 : NVPTXReg<"%v2b32_280">;
> +def v2b32_281 : NVPTXReg<"%v2b32_281">;
> +def v2b32_282 : NVPTXReg<"%v2b32_282">;
> +def v2b32_283 : NVPTXReg<"%v2b32_283">;
> +def v2b32_284 : NVPTXReg<"%v2b32_284">;
> +def v2b32_285 : NVPTXReg<"%v2b32_285">;
> +def v2b32_286 : NVPTXReg<"%v2b32_286">;
> +def v2b32_287 : NVPTXReg<"%v2b32_287">;
> +def v2b32_288 : NVPTXReg<"%v2b32_288">;
> +def v2b32_289 : NVPTXReg<"%v2b32_289">;
> +def v2b32_290 : NVPTXReg<"%v2b32_290">;
> +def v2b32_291 : NVPTXReg<"%v2b32_291">;
> +def v2b32_292 : NVPTXReg<"%v2b32_292">;
> +def v2b32_293 : NVPTXReg<"%v2b32_293">;
> +def v2b32_294 : NVPTXReg<"%v2b32_294">;
> +def v2b32_295 : NVPTXReg<"%v2b32_295">;
> +def v2b32_296 : NVPTXReg<"%v2b32_296">;
> +def v2b32_297 : NVPTXReg<"%v2b32_297">;
> +def v2b32_298 : NVPTXReg<"%v2b32_298">;
> +def v2b32_299 : NVPTXReg<"%v2b32_299">;
> +def v2b32_300 : NVPTXReg<"%v2b32_300">;
> +def v2b32_301 : NVPTXReg<"%v2b32_301">;
> +def v2b32_302 : NVPTXReg<"%v2b32_302">;
> +def v2b32_303 : NVPTXReg<"%v2b32_303">;
> +def v2b32_304 : NVPTXReg<"%v2b32_304">;
> +def v2b32_305 : NVPTXReg<"%v2b32_305">;
> +def v2b32_306 : NVPTXReg<"%v2b32_306">;
> +def v2b32_307 : NVPTXReg<"%v2b32_307">;
> +def v2b32_308 : NVPTXReg<"%v2b32_308">;
> +def v2b32_309 : NVPTXReg<"%v2b32_309">;
> +def v2b32_310 : NVPTXReg<"%v2b32_310">;
> +def v2b32_311 : NVPTXReg<"%v2b32_311">;
> +def v2b32_312 : NVPTXReg<"%v2b32_312">;
> +def v2b32_313 : NVPTXReg<"%v2b32_313">;
> +def v2b32_314 : NVPTXReg<"%v2b32_314">;
> +def v2b32_315 : NVPTXReg<"%v2b32_315">;
> +def v2b32_316 : NVPTXReg<"%v2b32_316">;
> +def v2b32_317 : NVPTXReg<"%v2b32_317">;
> +def v2b32_318 : NVPTXReg<"%v2b32_318">;
> +def v2b32_319 : NVPTXReg<"%v2b32_319">;
> +def v2b32_320 : NVPTXReg<"%v2b32_320">;
> +def v2b32_321 : NVPTXReg<"%v2b32_321">;
> +def v2b32_322 : NVPTXReg<"%v2b32_322">;
> +def v2b32_323 : NVPTXReg<"%v2b32_323">;
> +def v2b32_324 : NVPTXReg<"%v2b32_324">;
> +def v2b32_325 : NVPTXReg<"%v2b32_325">;
> +def v2b32_326 : NVPTXReg<"%v2b32_326">;
> +def v2b32_327 : NVPTXReg<"%v2b32_327">;
> +def v2b32_328 : NVPTXReg<"%v2b32_328">;
> +def v2b32_329 : NVPTXReg<"%v2b32_329">;
> +def v2b32_330 : NVPTXReg<"%v2b32_330">;
> +def v2b32_331 : NVPTXReg<"%v2b32_331">;
> +def v2b32_332 : NVPTXReg<"%v2b32_332">;
> +def v2b32_333 : NVPTXReg<"%v2b32_333">;
> +def v2b32_334 : NVPTXReg<"%v2b32_334">;
> +def v2b32_335 : NVPTXReg<"%v2b32_335">;
> +def v2b32_336 : NVPTXReg<"%v2b32_336">;
> +def v2b32_337 : NVPTXReg<"%v2b32_337">;
> +def v2b32_338 : NVPTXReg<"%v2b32_338">;
> +def v2b32_339 : NVPTXReg<"%v2b32_339">;
> +def v2b32_340 : NVPTXReg<"%v2b32_340">;
> +def v2b32_341 : NVPTXReg<"%v2b32_341">;
> +def v2b32_342 : NVPTXReg<"%v2b32_342">;
> +def v2b32_343 : NVPTXReg<"%v2b32_343">;
> +def v2b32_344 : NVPTXReg<"%v2b32_344">;
> +def v2b32_345 : NVPTXReg<"%v2b32_345">;
> +def v2b32_346 : NVPTXReg<"%v2b32_346">;
> +def v2b32_347 : NVPTXReg<"%v2b32_347">;
> +def v2b32_348 : NVPTXReg<"%v2b32_348">;
> +def v2b32_349 : NVPTXReg<"%v2b32_349">;
> +def v2b32_350 : NVPTXReg<"%v2b32_350">;
> +def v2b32_351 : NVPTXReg<"%v2b32_351">;
> +def v2b32_352 : NVPTXReg<"%v2b32_352">;
> +def v2b32_353 : NVPTXReg<"%v2b32_353">;
> +def v2b32_354 : NVPTXReg<"%v2b32_354">;
> +def v2b32_355 : NVPTXReg<"%v2b32_355">;
> +def v2b32_356 : NVPTXReg<"%v2b32_356">;
> +def v2b32_357 : NVPTXReg<"%v2b32_357">;
> +def v2b32_358 : NVPTXReg<"%v2b32_358">;
> +def v2b32_359 : NVPTXReg<"%v2b32_359">;
> +def v2b32_360 : NVPTXReg<"%v2b32_360">;
> +def v2b32_361 : NVPTXReg<"%v2b32_361">;
> +def v2b32_362 : NVPTXReg<"%v2b32_362">;
> +def v2b32_363 : NVPTXReg<"%v2b32_363">;
> +def v2b32_364 : NVPTXReg<"%v2b32_364">;
> +def v2b32_365 : NVPTXReg<"%v2b32_365">;
> +def v2b32_366 : NVPTXReg<"%v2b32_366">;
> +def v2b32_367 : NVPTXReg<"%v2b32_367">;
> +def v2b32_368 : NVPTXReg<"%v2b32_368">;
> +def v2b32_369 : NVPTXReg<"%v2b32_369">;
> +def v2b32_370 : NVPTXReg<"%v2b32_370">;
> +def v2b32_371 : NVPTXReg<"%v2b32_371">;
> +def v2b32_372 : NVPTXReg<"%v2b32_372">;
> +def v2b32_373 : NVPTXReg<"%v2b32_373">;
> +def v2b32_374 : NVPTXReg<"%v2b32_374">;
> +def v2b32_375 : NVPTXReg<"%v2b32_375">;
> +def v2b32_376 : NVPTXReg<"%v2b32_376">;
> +def v2b32_377 : NVPTXReg<"%v2b32_377">;
> +def v2b32_378 : NVPTXReg<"%v2b32_378">;
> +def v2b32_379 : NVPTXReg<"%v2b32_379">;
> +def v2b32_380 : NVPTXReg<"%v2b32_380">;
> +def v2b32_381 : NVPTXReg<"%v2b32_381">;
> +def v2b32_382 : NVPTXReg<"%v2b32_382">;
> +def v2b32_383 : NVPTXReg<"%v2b32_383">;
> +def v2b32_384 : NVPTXReg<"%v2b32_384">;
> +def v2b32_385 : NVPTXReg<"%v2b32_385">;
> +def v2b32_386 : NVPTXReg<"%v2b32_386">;
> +def v2b32_387 : NVPTXReg<"%v2b32_387">;
> +def v2b32_388 : NVPTXReg<"%v2b32_388">;
> +def v2b32_389 : NVPTXReg<"%v2b32_389">;
> +def v2b32_390 : NVPTXReg<"%v2b32_390">;
> +def v2b32_391 : NVPTXReg<"%v2b32_391">;
> +def v2b32_392 : NVPTXReg<"%v2b32_392">;
> +def v2b32_393 : NVPTXReg<"%v2b32_393">;
> +def v2b32_394 : NVPTXReg<"%v2b32_394">;
> +def v2b32_395 : NVPTXReg<"%v2b32_395">;
> +def v2b64_0 : NVPTXReg<"%v2b64_0">;
> +def v2b64_1 : NVPTXReg<"%v2b64_1">;
> +def v2b64_2 : NVPTXReg<"%v2b64_2">;
> +def v2b64_3 : NVPTXReg<"%v2b64_3">;
> +def v2b64_4 : NVPTXReg<"%v2b64_4">;
> +def v2b64_5 : NVPTXReg<"%v2b64_5">;
> +def v2b64_6 : NVPTXReg<"%v2b64_6">;
> +def v2b64_7 : NVPTXReg<"%v2b64_7">;
> +def v2b64_8 : NVPTXReg<"%v2b64_8">;
> +def v2b64_9 : NVPTXReg<"%v2b64_9">;
> +def v2b64_10 : NVPTXReg<"%v2b64_10">;
> +def v2b64_11 : NVPTXReg<"%v2b64_11">;
> +def v2b64_12 : NVPTXReg<"%v2b64_12">;
> +def v2b64_13 : NVPTXReg<"%v2b64_13">;
> +def v2b64_14 : NVPTXReg<"%v2b64_14">;
> +def v2b64_15 : NVPTXReg<"%v2b64_15">;
> +def v2b64_16 : NVPTXReg<"%v2b64_16">;
> +def v2b64_17 : NVPTXReg<"%v2b64_17">;
> +def v2b64_18 : NVPTXReg<"%v2b64_18">;
> +def v2b64_19 : NVPTXReg<"%v2b64_19">;
> +def v2b64_20 : NVPTXReg<"%v2b64_20">;
> +def v2b64_21 : NVPTXReg<"%v2b64_21">;
> +def v2b64_22 : NVPTXReg<"%v2b64_22">;
> +def v2b64_23 : NVPTXReg<"%v2b64_23">;
> +def v2b64_24 : NVPTXReg<"%v2b64_24">;
> +def v2b64_25 : NVPTXReg<"%v2b64_25">;
> +def v2b64_26 : NVPTXReg<"%v2b64_26">;
> +def v2b64_27 : NVPTXReg<"%v2b64_27">;
> +def v2b64_28 : NVPTXReg<"%v2b64_28">;
> +def v2b64_29 : NVPTXReg<"%v2b64_29">;
> +def v2b64_30 : NVPTXReg<"%v2b64_30">;
> +def v2b64_31 : NVPTXReg<"%v2b64_31">;
> +def v2b64_32 : NVPTXReg<"%v2b64_32">;
> +def v2b64_33 : NVPTXReg<"%v2b64_33">;
> +def v2b64_34 : NVPTXReg<"%v2b64_34">;
> +def v2b64_35 : NVPTXReg<"%v2b64_35">;
> +def v2b64_36 : NVPTXReg<"%v2b64_36">;
> +def v2b64_37 : NVPTXReg<"%v2b64_37">;
> +def v2b64_38 : NVPTXReg<"%v2b64_38">;
> +def v2b64_39 : NVPTXReg<"%v2b64_39">;
> +def v2b64_40 : NVPTXReg<"%v2b64_40">;
> +def v2b64_41 : NVPTXReg<"%v2b64_41">;
> +def v2b64_42 : NVPTXReg<"%v2b64_42">;
> +def v2b64_43 : NVPTXReg<"%v2b64_43">;
> +def v2b64_44 : NVPTXReg<"%v2b64_44">;
> +def v2b64_45 : NVPTXReg<"%v2b64_45">;
> +def v2b64_46 : NVPTXReg<"%v2b64_46">;
> +def v2b64_47 : NVPTXReg<"%v2b64_47">;
> +def v2b64_48 : NVPTXReg<"%v2b64_48">;
> +def v2b64_49 : NVPTXReg<"%v2b64_49">;
> +def v2b64_50 : NVPTXReg<"%v2b64_50">;
> +def v2b64_51 : NVPTXReg<"%v2b64_51">;
> +def v2b64_52 : NVPTXReg<"%v2b64_52">;
> +def v2b64_53 : NVPTXReg<"%v2b64_53">;
> +def v2b64_54 : NVPTXReg<"%v2b64_54">;
> +def v2b64_55 : NVPTXReg<"%v2b64_55">;
> +def v2b64_56 : NVPTXReg<"%v2b64_56">;
> +def v2b64_57 : NVPTXReg<"%v2b64_57">;
> +def v2b64_58 : NVPTXReg<"%v2b64_58">;
> +def v2b64_59 : NVPTXReg<"%v2b64_59">;
> +def v2b64_60 : NVPTXReg<"%v2b64_60">;
> +def v2b64_61 : NVPTXReg<"%v2b64_61">;
> +def v2b64_62 : NVPTXReg<"%v2b64_62">;
> +def v2b64_63 : NVPTXReg<"%v2b64_63">;
> +def v2b64_64 : NVPTXReg<"%v2b64_64">;
> +def v2b64_65 : NVPTXReg<"%v2b64_65">;
> +def v2b64_66 : NVPTXReg<"%v2b64_66">;
> +def v2b64_67 : NVPTXReg<"%v2b64_67">;
> +def v2b64_68 : NVPTXReg<"%v2b64_68">;
> +def v2b64_69 : NVPTXReg<"%v2b64_69">;
> +def v2b64_70 : NVPTXReg<"%v2b64_70">;
> +def v2b64_71 : NVPTXReg<"%v2b64_71">;
> +def v2b64_72 : NVPTXReg<"%v2b64_72">;
> +def v2b64_73 : NVPTXReg<"%v2b64_73">;
> +def v2b64_74 : NVPTXReg<"%v2b64_74">;
> +def v2b64_75 : NVPTXReg<"%v2b64_75">;
> +def v2b64_76 : NVPTXReg<"%v2b64_76">;
> +def v2b64_77 : NVPTXReg<"%v2b64_77">;
> +def v2b64_78 : NVPTXReg<"%v2b64_78">;
> +def v2b64_79 : NVPTXReg<"%v2b64_79">;
> +def v2b64_80 : NVPTXReg<"%v2b64_80">;
> +def v2b64_81 : NVPTXReg<"%v2b64_81">;
> +def v2b64_82 : NVPTXReg<"%v2b64_82">;
> +def v2b64_83 : NVPTXReg<"%v2b64_83">;
> +def v2b64_84 : NVPTXReg<"%v2b64_84">;
> +def v2b64_85 : NVPTXReg<"%v2b64_85">;
> +def v2b64_86 : NVPTXReg<"%v2b64_86">;
> +def v2b64_87 : NVPTXReg<"%v2b64_87">;
> +def v2b64_88 : NVPTXReg<"%v2b64_88">;
> +def v2b64_89 : NVPTXReg<"%v2b64_89">;
> +def v2b64_90 : NVPTXReg<"%v2b64_90">;
> +def v2b64_91 : NVPTXReg<"%v2b64_91">;
> +def v2b64_92 : NVPTXReg<"%v2b64_92">;
> +def v2b64_93 : NVPTXReg<"%v2b64_93">;
> +def v2b64_94 : NVPTXReg<"%v2b64_94">;
> +def v2b64_95 : NVPTXReg<"%v2b64_95">;
> +def v2b64_96 : NVPTXReg<"%v2b64_96">;
> +def v2b64_97 : NVPTXReg<"%v2b64_97">;
> +def v2b64_98 : NVPTXReg<"%v2b64_98">;
> +def v2b64_99 : NVPTXReg<"%v2b64_99">;
> +def v2b64_100 : NVPTXReg<"%v2b64_100">;
> +def v2b64_101 : NVPTXReg<"%v2b64_101">;
> +def v2b64_102 : NVPTXReg<"%v2b64_102">;
> +def v2b64_103 : NVPTXReg<"%v2b64_103">;
> +def v2b64_104 : NVPTXReg<"%v2b64_104">;
> +def v2b64_105 : NVPTXReg<"%v2b64_105">;
> +def v2b64_106 : NVPTXReg<"%v2b64_106">;
> +def v2b64_107 : NVPTXReg<"%v2b64_107">;
> +def v2b64_108 : NVPTXReg<"%v2b64_108">;
> +def v2b64_109 : NVPTXReg<"%v2b64_109">;
> +def v2b64_110 : NVPTXReg<"%v2b64_110">;
> +def v2b64_111 : NVPTXReg<"%v2b64_111">;
> +def v2b64_112 : NVPTXReg<"%v2b64_112">;
> +def v2b64_113 : NVPTXReg<"%v2b64_113">;
> +def v2b64_114 : NVPTXReg<"%v2b64_114">;
> +def v2b64_115 : NVPTXReg<"%v2b64_115">;
> +def v2b64_116 : NVPTXReg<"%v2b64_116">;
> +def v2b64_117 : NVPTXReg<"%v2b64_117">;
> +def v2b64_118 : NVPTXReg<"%v2b64_118">;
> +def v2b64_119 : NVPTXReg<"%v2b64_119">;
> +def v2b64_120 : NVPTXReg<"%v2b64_120">;
> +def v2b64_121 : NVPTXReg<"%v2b64_121">;
> +def v2b64_122 : NVPTXReg<"%v2b64_122">;
> +def v2b64_123 : NVPTXReg<"%v2b64_123">;
> +def v2b64_124 : NVPTXReg<"%v2b64_124">;
> +def v2b64_125 : NVPTXReg<"%v2b64_125">;
> +def v2b64_126 : NVPTXReg<"%v2b64_126">;
> +def v2b64_127 : NVPTXReg<"%v2b64_127">;
> +def v2b64_128 : NVPTXReg<"%v2b64_128">;
> +def v2b64_129 : NVPTXReg<"%v2b64_129">;
> +def v2b64_130 : NVPTXReg<"%v2b64_130">;
> +def v2b64_131 : NVPTXReg<"%v2b64_131">;
> +def v2b64_132 : NVPTXReg<"%v2b64_132">;
> +def v2b64_133 : NVPTXReg<"%v2b64_133">;
> +def v2b64_134 : NVPTXReg<"%v2b64_134">;
> +def v2b64_135 : NVPTXReg<"%v2b64_135">;
> +def v2b64_136 : NVPTXReg<"%v2b64_136">;
> +def v2b64_137 : NVPTXReg<"%v2b64_137">;
> +def v2b64_138 : NVPTXReg<"%v2b64_138">;
> +def v2b64_139 : NVPTXReg<"%v2b64_139">;
> +def v2b64_140 : NVPTXReg<"%v2b64_140">;
> +def v2b64_141 : NVPTXReg<"%v2b64_141">;
> +def v2b64_142 : NVPTXReg<"%v2b64_142">;
> +def v2b64_143 : NVPTXReg<"%v2b64_143">;
> +def v2b64_144 : NVPTXReg<"%v2b64_144">;
> +def v2b64_145 : NVPTXReg<"%v2b64_145">;
> +def v2b64_146 : NVPTXReg<"%v2b64_146">;
> +def v2b64_147 : NVPTXReg<"%v2b64_147">;
> +def v2b64_148 : NVPTXReg<"%v2b64_148">;
> +def v2b64_149 : NVPTXReg<"%v2b64_149">;
> +def v2b64_150 : NVPTXReg<"%v2b64_150">;
> +def v2b64_151 : NVPTXReg<"%v2b64_151">;
> +def v2b64_152 : NVPTXReg<"%v2b64_152">;
> +def v2b64_153 : NVPTXReg<"%v2b64_153">;
> +def v2b64_154 : NVPTXReg<"%v2b64_154">;
> +def v2b64_155 : NVPTXReg<"%v2b64_155">;
> +def v2b64_156 : NVPTXReg<"%v2b64_156">;
> +def v2b64_157 : NVPTXReg<"%v2b64_157">;
> +def v2b64_158 : NVPTXReg<"%v2b64_158">;
> +def v2b64_159 : NVPTXReg<"%v2b64_159">;
> +def v2b64_160 : NVPTXReg<"%v2b64_160">;
> +def v2b64_161 : NVPTXReg<"%v2b64_161">;
> +def v2b64_162 : NVPTXReg<"%v2b64_162">;
> +def v2b64_163 : NVPTXReg<"%v2b64_163">;
> +def v2b64_164 : NVPTXReg<"%v2b64_164">;
> +def v2b64_165 : NVPTXReg<"%v2b64_165">;
> +def v2b64_166 : NVPTXReg<"%v2b64_166">;
> +def v2b64_167 : NVPTXReg<"%v2b64_167">;
> +def v2b64_168 : NVPTXReg<"%v2b64_168">;
> +def v2b64_169 : NVPTXReg<"%v2b64_169">;
> +def v2b64_170 : NVPTXReg<"%v2b64_170">;
> +def v2b64_171 : NVPTXReg<"%v2b64_171">;
> +def v2b64_172 : NVPTXReg<"%v2b64_172">;
> +def v2b64_173 : NVPTXReg<"%v2b64_173">;
> +def v2b64_174 : NVPTXReg<"%v2b64_174">;
> +def v2b64_175 : NVPTXReg<"%v2b64_175">;
> +def v2b64_176 : NVPTXReg<"%v2b64_176">;
> +def v2b64_177 : NVPTXReg<"%v2b64_177">;
> +def v2b64_178 : NVPTXReg<"%v2b64_178">;
> +def v2b64_179 : NVPTXReg<"%v2b64_179">;
> +def v2b64_180 : NVPTXReg<"%v2b64_180">;
> +def v2b64_181 : NVPTXReg<"%v2b64_181">;
> +def v2b64_182 : NVPTXReg<"%v2b64_182">;
> +def v2b64_183 : NVPTXReg<"%v2b64_183">;
> +def v2b64_184 : NVPTXReg<"%v2b64_184">;
> +def v2b64_185 : NVPTXReg<"%v2b64_185">;
> +def v2b64_186 : NVPTXReg<"%v2b64_186">;
> +def v2b64_187 : NVPTXReg<"%v2b64_187">;
> +def v2b64_188 : NVPTXReg<"%v2b64_188">;
> +def v2b64_189 : NVPTXReg<"%v2b64_189">;
> +def v2b64_190 : NVPTXReg<"%v2b64_190">;
> +def v2b64_191 : NVPTXReg<"%v2b64_191">;
> +def v2b64_192 : NVPTXReg<"%v2b64_192">;
> +def v2b64_193 : NVPTXReg<"%v2b64_193">;
> +def v2b64_194 : NVPTXReg<"%v2b64_194">;
> +def v2b64_195 : NVPTXReg<"%v2b64_195">;
> +def v2b64_196 : NVPTXReg<"%v2b64_196">;
> +def v2b64_197 : NVPTXReg<"%v2b64_197">;
> +def v2b64_198 : NVPTXReg<"%v2b64_198">;
> +def v2b64_199 : NVPTXReg<"%v2b64_199">;
> +def v2b64_200 : NVPTXReg<"%v2b64_200">;
> +def v2b64_201 : NVPTXReg<"%v2b64_201">;
> +def v2b64_202 : NVPTXReg<"%v2b64_202">;
> +def v2b64_203 : NVPTXReg<"%v2b64_203">;
> +def v2b64_204 : NVPTXReg<"%v2b64_204">;
> +def v2b64_205 : NVPTXReg<"%v2b64_205">;
> +def v2b64_206 : NVPTXReg<"%v2b64_206">;
> +def v2b64_207 : NVPTXReg<"%v2b64_207">;
> +def v2b64_208 : NVPTXReg<"%v2b64_208">;
> +def v2b64_209 : NVPTXReg<"%v2b64_209">;
> +def v2b64_210 : NVPTXReg<"%v2b64_210">;
> +def v2b64_211 : NVPTXReg<"%v2b64_211">;
> +def v2b64_212 : NVPTXReg<"%v2b64_212">;
> +def v2b64_213 : NVPTXReg<"%v2b64_213">;
> +def v2b64_214 : NVPTXReg<"%v2b64_214">;
> +def v2b64_215 : NVPTXReg<"%v2b64_215">;
> +def v2b64_216 : NVPTXReg<"%v2b64_216">;
> +def v2b64_217 : NVPTXReg<"%v2b64_217">;
> +def v2b64_218 : NVPTXReg<"%v2b64_218">;
> +def v2b64_219 : NVPTXReg<"%v2b64_219">;
> +def v2b64_220 : NVPTXReg<"%v2b64_220">;
> +def v2b64_221 : NVPTXReg<"%v2b64_221">;
> +def v2b64_222 : NVPTXReg<"%v2b64_222">;
> +def v2b64_223 : NVPTXReg<"%v2b64_223">;
> +def v2b64_224 : NVPTXReg<"%v2b64_224">;
> +def v2b64_225 : NVPTXReg<"%v2b64_225">;
> +def v2b64_226 : NVPTXReg<"%v2b64_226">;
> +def v2b64_227 : NVPTXReg<"%v2b64_227">;
> +def v2b64_228 : NVPTXReg<"%v2b64_228">;
> +def v2b64_229 : NVPTXReg<"%v2b64_229">;
> +def v2b64_230 : NVPTXReg<"%v2b64_230">;
> +def v2b64_231 : NVPTXReg<"%v2b64_231">;
> +def v2b64_232 : NVPTXReg<"%v2b64_232">;
> +def v2b64_233 : NVPTXReg<"%v2b64_233">;
> +def v2b64_234 : NVPTXReg<"%v2b64_234">;
> +def v2b64_235 : NVPTXReg<"%v2b64_235">;
> +def v2b64_236 : NVPTXReg<"%v2b64_236">;
> +def v2b64_237 : NVPTXReg<"%v2b64_237">;
> +def v2b64_238 : NVPTXReg<"%v2b64_238">;
> +def v2b64_239 : NVPTXReg<"%v2b64_239">;
> +def v2b64_240 : NVPTXReg<"%v2b64_240">;
> +def v2b64_241 : NVPTXReg<"%v2b64_241">;
> +def v2b64_242 : NVPTXReg<"%v2b64_242">;
> +def v2b64_243 : NVPTXReg<"%v2b64_243">;
> +def v2b64_244 : NVPTXReg<"%v2b64_244">;
> +def v2b64_245 : NVPTXReg<"%v2b64_245">;
> +def v2b64_246 : NVPTXReg<"%v2b64_246">;
> +def v2b64_247 : NVPTXReg<"%v2b64_247">;
> +def v2b64_248 : NVPTXReg<"%v2b64_248">;
> +def v2b64_249 : NVPTXReg<"%v2b64_249">;
> +def v2b64_250 : NVPTXReg<"%v2b64_250">;
> +def v2b64_251 : NVPTXReg<"%v2b64_251">;
> +def v2b64_252 : NVPTXReg<"%v2b64_252">;
> +def v2b64_253 : NVPTXReg<"%v2b64_253">;
> +def v2b64_254 : NVPTXReg<"%v2b64_254">;
> +def v2b64_255 : NVPTXReg<"%v2b64_255">;
> +def v2b64_256 : NVPTXReg<"%v2b64_256">;
> +def v2b64_257 : NVPTXReg<"%v2b64_257">;
> +def v2b64_258 : NVPTXReg<"%v2b64_258">;
> +def v2b64_259 : NVPTXReg<"%v2b64_259">;
> +def v2b64_260 : NVPTXReg<"%v2b64_260">;
> +def v2b64_261 : NVPTXReg<"%v2b64_261">;
> +def v2b64_262 : NVPTXReg<"%v2b64_262">;
> +def v2b64_263 : NVPTXReg<"%v2b64_263">;
> +def v2b64_264 : NVPTXReg<"%v2b64_264">;
> +def v2b64_265 : NVPTXReg<"%v2b64_265">;
> +def v2b64_266 : NVPTXReg<"%v2b64_266">;
> +def v2b64_267 : NVPTXReg<"%v2b64_267">;
> +def v2b64_268 : NVPTXReg<"%v2b64_268">;
> +def v2b64_269 : NVPTXReg<"%v2b64_269">;
> +def v2b64_270 : NVPTXReg<"%v2b64_270">;
> +def v2b64_271 : NVPTXReg<"%v2b64_271">;
> +def v2b64_272 : NVPTXReg<"%v2b64_272">;
> +def v2b64_273 : NVPTXReg<"%v2b64_273">;
> +def v2b64_274 : NVPTXReg<"%v2b64_274">;
> +def v2b64_275 : NVPTXReg<"%v2b64_275">;
> +def v2b64_276 : NVPTXReg<"%v2b64_276">;
> +def v2b64_277 : NVPTXReg<"%v2b64_277">;
> +def v2b64_278 : NVPTXReg<"%v2b64_278">;
> +def v2b64_279 : NVPTXReg<"%v2b64_279">;
> +def v2b64_280 : NVPTXReg<"%v2b64_280">;
> +def v2b64_281 : NVPTXReg<"%v2b64_281">;
> +def v2b64_282 : NVPTXReg<"%v2b64_282">;
> +def v2b64_283 : NVPTXReg<"%v2b64_283">;
> +def v2b64_284 : NVPTXReg<"%v2b64_284">;
> +def v2b64_285 : NVPTXReg<"%v2b64_285">;
> +def v2b64_286 : NVPTXReg<"%v2b64_286">;
> +def v2b64_287 : NVPTXReg<"%v2b64_287">;
> +def v2b64_288 : NVPTXReg<"%v2b64_288">;
> +def v2b64_289 : NVPTXReg<"%v2b64_289">;
> +def v2b64_290 : NVPTXReg<"%v2b64_290">;
> +def v2b64_291 : NVPTXReg<"%v2b64_291">;
> +def v2b64_292 : NVPTXReg<"%v2b64_292">;
> +def v2b64_293 : NVPTXReg<"%v2b64_293">;
> +def v2b64_294 : NVPTXReg<"%v2b64_294">;
> +def v2b64_295 : NVPTXReg<"%v2b64_295">;
> +def v2b64_296 : NVPTXReg<"%v2b64_296">;
> +def v2b64_297 : NVPTXReg<"%v2b64_297">;
> +def v2b64_298 : NVPTXReg<"%v2b64_298">;
> +def v2b64_299 : NVPTXReg<"%v2b64_299">;
> +def v2b64_300 : NVPTXReg<"%v2b64_300">;
> +def v2b64_301 : NVPTXReg<"%v2b64_301">;
> +def v2b64_302 : NVPTXReg<"%v2b64_302">;
> +def v2b64_303 : NVPTXReg<"%v2b64_303">;
> +def v2b64_304 : NVPTXReg<"%v2b64_304">;
> +def v2b64_305 : NVPTXReg<"%v2b64_305">;
> +def v2b64_306 : NVPTXReg<"%v2b64_306">;
> +def v2b64_307 : NVPTXReg<"%v2b64_307">;
> +def v2b64_308 : NVPTXReg<"%v2b64_308">;
> +def v2b64_309 : NVPTXReg<"%v2b64_309">;
> +def v2b64_310 : NVPTXReg<"%v2b64_310">;
> +def v2b64_311 : NVPTXReg<"%v2b64_311">;
> +def v2b64_312 : NVPTXReg<"%v2b64_312">;
> +def v2b64_313 : NVPTXReg<"%v2b64_313">;
> +def v2b64_314 : NVPTXReg<"%v2b64_314">;
> +def v2b64_315 : NVPTXReg<"%v2b64_315">;
> +def v2b64_316 : NVPTXReg<"%v2b64_316">;
> +def v2b64_317 : NVPTXReg<"%v2b64_317">;
> +def v2b64_318 : NVPTXReg<"%v2b64_318">;
> +def v2b64_319 : NVPTXReg<"%v2b64_319">;
> +def v2b64_320 : NVPTXReg<"%v2b64_320">;
> +def v2b64_321 : NVPTXReg<"%v2b64_321">;
> +def v2b64_322 : NVPTXReg<"%v2b64_322">;
> +def v2b64_323 : NVPTXReg<"%v2b64_323">;
> +def v2b64_324 : NVPTXReg<"%v2b64_324">;
> +def v2b64_325 : NVPTXReg<"%v2b64_325">;
> +def v2b64_326 : NVPTXReg<"%v2b64_326">;
> +def v2b64_327 : NVPTXReg<"%v2b64_327">;
> +def v2b64_328 : NVPTXReg<"%v2b64_328">;
> +def v2b64_329 : NVPTXReg<"%v2b64_329">;
> +def v2b64_330 : NVPTXReg<"%v2b64_330">;
> +def v2b64_331 : NVPTXReg<"%v2b64_331">;
> +def v2b64_332 : NVPTXReg<"%v2b64_332">;
> +def v2b64_333 : NVPTXReg<"%v2b64_333">;
> +def v2b64_334 : NVPTXReg<"%v2b64_334">;
> +def v2b64_335 : NVPTXReg<"%v2b64_335">;
> +def v2b64_336 : NVPTXReg<"%v2b64_336">;
> +def v2b64_337 : NVPTXReg<"%v2b64_337">;
> +def v2b64_338 : NVPTXReg<"%v2b64_338">;
> +def v2b64_339 : NVPTXReg<"%v2b64_339">;
> +def v2b64_340 : NVPTXReg<"%v2b64_340">;
> +def v2b64_341 : NVPTXReg<"%v2b64_341">;
> +def v2b64_342 : NVPTXReg<"%v2b64_342">;
> +def v2b64_343 : NVPTXReg<"%v2b64_343">;
> +def v2b64_344 : NVPTXReg<"%v2b64_344">;
> +def v2b64_345 : NVPTXReg<"%v2b64_345">;
> +def v2b64_346 : NVPTXReg<"%v2b64_346">;
> +def v2b64_347 : NVPTXReg<"%v2b64_347">;
> +def v2b64_348 : NVPTXReg<"%v2b64_348">;
> +def v2b64_349 : NVPTXReg<"%v2b64_349">;
> +def v2b64_350 : NVPTXReg<"%v2b64_350">;
> +def v2b64_351 : NVPTXReg<"%v2b64_351">;
> +def v2b64_352 : NVPTXReg<"%v2b64_352">;
> +def v2b64_353 : NVPTXReg<"%v2b64_353">;
> +def v2b64_354 : NVPTXReg<"%v2b64_354">;
> +def v2b64_355 : NVPTXReg<"%v2b64_355">;
> +def v2b64_356 : NVPTXReg<"%v2b64_356">;
> +def v2b64_357 : NVPTXReg<"%v2b64_357">;
> +def v2b64_358 : NVPTXReg<"%v2b64_358">;
> +def v2b64_359 : NVPTXReg<"%v2b64_359">;
> +def v2b64_360 : NVPTXReg<"%v2b64_360">;
> +def v2b64_361 : NVPTXReg<"%v2b64_361">;
> +def v2b64_362 : NVPTXReg<"%v2b64_362">;
> +def v2b64_363 : NVPTXReg<"%v2b64_363">;
> +def v2b64_364 : NVPTXReg<"%v2b64_364">;
> +def v2b64_365 : NVPTXReg<"%v2b64_365">;
> +def v2b64_366 : NVPTXReg<"%v2b64_366">;
> +def v2b64_367 : NVPTXReg<"%v2b64_367">;
> +def v2b64_368 : NVPTXReg<"%v2b64_368">;
> +def v2b64_369 : NVPTXReg<"%v2b64_369">;
> +def v2b64_370 : NVPTXReg<"%v2b64_370">;
> +def v2b64_371 : NVPTXReg<"%v2b64_371">;
> +def v2b64_372 : NVPTXReg<"%v2b64_372">;
> +def v2b64_373 : NVPTXReg<"%v2b64_373">;
> +def v2b64_374 : NVPTXReg<"%v2b64_374">;
> +def v2b64_375 : NVPTXReg<"%v2b64_375">;
> +def v2b64_376 : NVPTXReg<"%v2b64_376">;
> +def v2b64_377 : NVPTXReg<"%v2b64_377">;
> +def v2b64_378 : NVPTXReg<"%v2b64_378">;
> +def v2b64_379 : NVPTXReg<"%v2b64_379">;
> +def v2b64_380 : NVPTXReg<"%v2b64_380">;
> +def v2b64_381 : NVPTXReg<"%v2b64_381">;
> +def v2b64_382 : NVPTXReg<"%v2b64_382">;
> +def v2b64_383 : NVPTXReg<"%v2b64_383">;
> +def v2b64_384 : NVPTXReg<"%v2b64_384">;
> +def v2b64_385 : NVPTXReg<"%v2b64_385">;
> +def v2b64_386 : NVPTXReg<"%v2b64_386">;
> +def v2b64_387 : NVPTXReg<"%v2b64_387">;
> +def v2b64_388 : NVPTXReg<"%v2b64_388">;
> +def v2b64_389 : NVPTXReg<"%v2b64_389">;
> +def v2b64_390 : NVPTXReg<"%v2b64_390">;
> +def v2b64_391 : NVPTXReg<"%v2b64_391">;
> +def v2b64_392 : NVPTXReg<"%v2b64_392">;
> +def v2b64_393 : NVPTXReg<"%v2b64_393">;
> +def v2b64_394 : NVPTXReg<"%v2b64_394">;
> +def v2b64_395 : NVPTXReg<"%v2b64_395">;
> +def v4b8_0 : NVPTXReg<"%v4b8_0">;
> +def v4b8_1 : NVPTXReg<"%v4b8_1">;
> +def v4b8_2 : NVPTXReg<"%v4b8_2">;
> +def v4b8_3 : NVPTXReg<"%v4b8_3">;
> +def v4b8_4 : NVPTXReg<"%v4b8_4">;
> +def v4b8_5 : NVPTXReg<"%v4b8_5">;
> +def v4b8_6 : NVPTXReg<"%v4b8_6">;
> +def v4b8_7 : NVPTXReg<"%v4b8_7">;
> +def v4b8_8 : NVPTXReg<"%v4b8_8">;
> +def v4b8_9 : NVPTXReg<"%v4b8_9">;
> +def v4b8_10 : NVPTXReg<"%v4b8_10">;
> +def v4b8_11 : NVPTXReg<"%v4b8_11">;
> +def v4b8_12 : NVPTXReg<"%v4b8_12">;
> +def v4b8_13 : NVPTXReg<"%v4b8_13">;
> +def v4b8_14 : NVPTXReg<"%v4b8_14">;
> +def v4b8_15 : NVPTXReg<"%v4b8_15">;
> +def v4b8_16 : NVPTXReg<"%v4b8_16">;
> +def v4b8_17 : NVPTXReg<"%v4b8_17">;
> +def v4b8_18 : NVPTXReg<"%v4b8_18">;
> +def v4b8_19 : NVPTXReg<"%v4b8_19">;
> +def v4b8_20 : NVPTXReg<"%v4b8_20">;
> +def v4b8_21 : NVPTXReg<"%v4b8_21">;
> +def v4b8_22 : NVPTXReg<"%v4b8_22">;
> +def v4b8_23 : NVPTXReg<"%v4b8_23">;
> +def v4b8_24 : NVPTXReg<"%v4b8_24">;
> +def v4b8_25 : NVPTXReg<"%v4b8_25">;
> +def v4b8_26 : NVPTXReg<"%v4b8_26">;
> +def v4b8_27 : NVPTXReg<"%v4b8_27">;
> +def v4b8_28 : NVPTXReg<"%v4b8_28">;
> +def v4b8_29 : NVPTXReg<"%v4b8_29">;
> +def v4b8_30 : NVPTXReg<"%v4b8_30">;
> +def v4b8_31 : NVPTXReg<"%v4b8_31">;
> +def v4b8_32 : NVPTXReg<"%v4b8_32">;
> +def v4b8_33 : NVPTXReg<"%v4b8_33">;
> +def v4b8_34 : NVPTXReg<"%v4b8_34">;
> +def v4b8_35 : NVPTXReg<"%v4b8_35">;
> +def v4b8_36 : NVPTXReg<"%v4b8_36">;
> +def v4b8_37 : NVPTXReg<"%v4b8_37">;
> +def v4b8_38 : NVPTXReg<"%v4b8_38">;
> +def v4b8_39 : NVPTXReg<"%v4b8_39">;
> +def v4b8_40 : NVPTXReg<"%v4b8_40">;
> +def v4b8_41 : NVPTXReg<"%v4b8_41">;
> +def v4b8_42 : NVPTXReg<"%v4b8_42">;
> +def v4b8_43 : NVPTXReg<"%v4b8_43">;
> +def v4b8_44 : NVPTXReg<"%v4b8_44">;
> +def v4b8_45 : NVPTXReg<"%v4b8_45">;
> +def v4b8_46 : NVPTXReg<"%v4b8_46">;
> +def v4b8_47 : NVPTXReg<"%v4b8_47">;
> +def v4b8_48 : NVPTXReg<"%v4b8_48">;
> +def v4b8_49 : NVPTXReg<"%v4b8_49">;
> +def v4b8_50 : NVPTXReg<"%v4b8_50">;
> +def v4b8_51 : NVPTXReg<"%v4b8_51">;
> +def v4b8_52 : NVPTXReg<"%v4b8_52">;
> +def v4b8_53 : NVPTXReg<"%v4b8_53">;
> +def v4b8_54 : NVPTXReg<"%v4b8_54">;
> +def v4b8_55 : NVPTXReg<"%v4b8_55">;
> +def v4b8_56 : NVPTXReg<"%v4b8_56">;
> +def v4b8_57 : NVPTXReg<"%v4b8_57">;
> +def v4b8_58 : NVPTXReg<"%v4b8_58">;
> +def v4b8_59 : NVPTXReg<"%v4b8_59">;
> +def v4b8_60 : NVPTXReg<"%v4b8_60">;
> +def v4b8_61 : NVPTXReg<"%v4b8_61">;
> +def v4b8_62 : NVPTXReg<"%v4b8_62">;
> +def v4b8_63 : NVPTXReg<"%v4b8_63">;
> +def v4b8_64 : NVPTXReg<"%v4b8_64">;
> +def v4b8_65 : NVPTXReg<"%v4b8_65">;
> +def v4b8_66 : NVPTXReg<"%v4b8_66">;
> +def v4b8_67 : NVPTXReg<"%v4b8_67">;
> +def v4b8_68 : NVPTXReg<"%v4b8_68">;
> +def v4b8_69 : NVPTXReg<"%v4b8_69">;
> +def v4b8_70 : NVPTXReg<"%v4b8_70">;
> +def v4b8_71 : NVPTXReg<"%v4b8_71">;
> +def v4b8_72 : NVPTXReg<"%v4b8_72">;
> +def v4b8_73 : NVPTXReg<"%v4b8_73">;
> +def v4b8_74 : NVPTXReg<"%v4b8_74">;
> +def v4b8_75 : NVPTXReg<"%v4b8_75">;
> +def v4b8_76 : NVPTXReg<"%v4b8_76">;
> +def v4b8_77 : NVPTXReg<"%v4b8_77">;
> +def v4b8_78 : NVPTXReg<"%v4b8_78">;
> +def v4b8_79 : NVPTXReg<"%v4b8_79">;
> +def v4b8_80 : NVPTXReg<"%v4b8_80">;
> +def v4b8_81 : NVPTXReg<"%v4b8_81">;
> +def v4b8_82 : NVPTXReg<"%v4b8_82">;
> +def v4b8_83 : NVPTXReg<"%v4b8_83">;
> +def v4b8_84 : NVPTXReg<"%v4b8_84">;
> +def v4b8_85 : NVPTXReg<"%v4b8_85">;
> +def v4b8_86 : NVPTXReg<"%v4b8_86">;
> +def v4b8_87 : NVPTXReg<"%v4b8_87">;
> +def v4b8_88 : NVPTXReg<"%v4b8_88">;
> +def v4b8_89 : NVPTXReg<"%v4b8_89">;
> +def v4b8_90 : NVPTXReg<"%v4b8_90">;
> +def v4b8_91 : NVPTXReg<"%v4b8_91">;
> +def v4b8_92 : NVPTXReg<"%v4b8_92">;
> +def v4b8_93 : NVPTXReg<"%v4b8_93">;
> +def v4b8_94 : NVPTXReg<"%v4b8_94">;
> +def v4b8_95 : NVPTXReg<"%v4b8_95">;
> +def v4b8_96 : NVPTXReg<"%v4b8_96">;
> +def v4b8_97 : NVPTXReg<"%v4b8_97">;
> +def v4b8_98 : NVPTXReg<"%v4b8_98">;
> +def v4b8_99 : NVPTXReg<"%v4b8_99">;
> +def v4b8_100 : NVPTXReg<"%v4b8_100">;
> +def v4b8_101 : NVPTXReg<"%v4b8_101">;
> +def v4b8_102 : NVPTXReg<"%v4b8_102">;
> +def v4b8_103 : NVPTXReg<"%v4b8_103">;
> +def v4b8_104 : NVPTXReg<"%v4b8_104">;
> +def v4b8_105 : NVPTXReg<"%v4b8_105">;
> +def v4b8_106 : NVPTXReg<"%v4b8_106">;
> +def v4b8_107 : NVPTXReg<"%v4b8_107">;
> +def v4b8_108 : NVPTXReg<"%v4b8_108">;
> +def v4b8_109 : NVPTXReg<"%v4b8_109">;
> +def v4b8_110 : NVPTXReg<"%v4b8_110">;
> +def v4b8_111 : NVPTXReg<"%v4b8_111">;
> +def v4b8_112 : NVPTXReg<"%v4b8_112">;
> +def v4b8_113 : NVPTXReg<"%v4b8_113">;
> +def v4b8_114 : NVPTXReg<"%v4b8_114">;
> +def v4b8_115 : NVPTXReg<"%v4b8_115">;
> +def v4b8_116 : NVPTXReg<"%v4b8_116">;
> +def v4b8_117 : NVPTXReg<"%v4b8_117">;
> +def v4b8_118 : NVPTXReg<"%v4b8_118">;
> +def v4b8_119 : NVPTXReg<"%v4b8_119">;
> +def v4b8_120 : NVPTXReg<"%v4b8_120">;
> +def v4b8_121 : NVPTXReg<"%v4b8_121">;
> +def v4b8_122 : NVPTXReg<"%v4b8_122">;
> +def v4b8_123 : NVPTXReg<"%v4b8_123">;
> +def v4b8_124 : NVPTXReg<"%v4b8_124">;
> +def v4b8_125 : NVPTXReg<"%v4b8_125">;
> +def v4b8_126 : NVPTXReg<"%v4b8_126">;
> +def v4b8_127 : NVPTXReg<"%v4b8_127">;
> +def v4b8_128 : NVPTXReg<"%v4b8_128">;
> +def v4b8_129 : NVPTXReg<"%v4b8_129">;
> +def v4b8_130 : NVPTXReg<"%v4b8_130">;
> +def v4b8_131 : NVPTXReg<"%v4b8_131">;
> +def v4b8_132 : NVPTXReg<"%v4b8_132">;
> +def v4b8_133 : NVPTXReg<"%v4b8_133">;
> +def v4b8_134 : NVPTXReg<"%v4b8_134">;
> +def v4b8_135 : NVPTXReg<"%v4b8_135">;
> +def v4b8_136 : NVPTXReg<"%v4b8_136">;
> +def v4b8_137 : NVPTXReg<"%v4b8_137">;
> +def v4b8_138 : NVPTXReg<"%v4b8_138">;
> +def v4b8_139 : NVPTXReg<"%v4b8_139">;
> +def v4b8_140 : NVPTXReg<"%v4b8_140">;
> +def v4b8_141 : NVPTXReg<"%v4b8_141">;
> +def v4b8_142 : NVPTXReg<"%v4b8_142">;
> +def v4b8_143 : NVPTXReg<"%v4b8_143">;
> +def v4b8_144 : NVPTXReg<"%v4b8_144">;
> +def v4b8_145 : NVPTXReg<"%v4b8_145">;
> +def v4b8_146 : NVPTXReg<"%v4b8_146">;
> +def v4b8_147 : NVPTXReg<"%v4b8_147">;
> +def v4b8_148 : NVPTXReg<"%v4b8_148">;
> +def v4b8_149 : NVPTXReg<"%v4b8_149">;
> +def v4b8_150 : NVPTXReg<"%v4b8_150">;
> +def v4b8_151 : NVPTXReg<"%v4b8_151">;
> +def v4b8_152 : NVPTXReg<"%v4b8_152">;
> +def v4b8_153 : NVPTXReg<"%v4b8_153">;
> +def v4b8_154 : NVPTXReg<"%v4b8_154">;
> +def v4b8_155 : NVPTXReg<"%v4b8_155">;
> +def v4b8_156 : NVPTXReg<"%v4b8_156">;
> +def v4b8_157 : NVPTXReg<"%v4b8_157">;
> +def v4b8_158 : NVPTXReg<"%v4b8_158">;
> +def v4b8_159 : NVPTXReg<"%v4b8_159">;
> +def v4b8_160 : NVPTXReg<"%v4b8_160">;
> +def v4b8_161 : NVPTXReg<"%v4b8_161">;
> +def v4b8_162 : NVPTXReg<"%v4b8_162">;
> +def v4b8_163 : NVPTXReg<"%v4b8_163">;
> +def v4b8_164 : NVPTXReg<"%v4b8_164">;
> +def v4b8_165 : NVPTXReg<"%v4b8_165">;
> +def v4b8_166 : NVPTXReg<"%v4b8_166">;
> +def v4b8_167 : NVPTXReg<"%v4b8_167">;
> +def v4b8_168 : NVPTXReg<"%v4b8_168">;
> +def v4b8_169 : NVPTXReg<"%v4b8_169">;
> +def v4b8_170 : NVPTXReg<"%v4b8_170">;
> +def v4b8_171 : NVPTXReg<"%v4b8_171">;
> +def v4b8_172 : NVPTXReg<"%v4b8_172">;
> +def v4b8_173 : NVPTXReg<"%v4b8_173">;
> +def v4b8_174 : NVPTXReg<"%v4b8_174">;
> +def v4b8_175 : NVPTXReg<"%v4b8_175">;
> +def v4b8_176 : NVPTXReg<"%v4b8_176">;
> +def v4b8_177 : NVPTXReg<"%v4b8_177">;
> +def v4b8_178 : NVPTXReg<"%v4b8_178">;
> +def v4b8_179 : NVPTXReg<"%v4b8_179">;
> +def v4b8_180 : NVPTXReg<"%v4b8_180">;
> +def v4b8_181 : NVPTXReg<"%v4b8_181">;
> +def v4b8_182 : NVPTXReg<"%v4b8_182">;
> +def v4b8_183 : NVPTXReg<"%v4b8_183">;
> +def v4b8_184 : NVPTXReg<"%v4b8_184">;
> +def v4b8_185 : NVPTXReg<"%v4b8_185">;
> +def v4b8_186 : NVPTXReg<"%v4b8_186">;
> +def v4b8_187 : NVPTXReg<"%v4b8_187">;
> +def v4b8_188 : NVPTXReg<"%v4b8_188">;
> +def v4b8_189 : NVPTXReg<"%v4b8_189">;
> +def v4b8_190 : NVPTXReg<"%v4b8_190">;
> +def v4b8_191 : NVPTXReg<"%v4b8_191">;
> +def v4b8_192 : NVPTXReg<"%v4b8_192">;
> +def v4b8_193 : NVPTXReg<"%v4b8_193">;
> +def v4b8_194 : NVPTXReg<"%v4b8_194">;
> +def v4b8_195 : NVPTXReg<"%v4b8_195">;
> +def v4b8_196 : NVPTXReg<"%v4b8_196">;
> +def v4b8_197 : NVPTXReg<"%v4b8_197">;
> +def v4b8_198 : NVPTXReg<"%v4b8_198">;
> +def v4b8_199 : NVPTXReg<"%v4b8_199">;
> +def v4b8_200 : NVPTXReg<"%v4b8_200">;
> +def v4b8_201 : NVPTXReg<"%v4b8_201">;
> +def v4b8_202 : NVPTXReg<"%v4b8_202">;
> +def v4b8_203 : NVPTXReg<"%v4b8_203">;
> +def v4b8_204 : NVPTXReg<"%v4b8_204">;
> +def v4b8_205 : NVPTXReg<"%v4b8_205">;
> +def v4b8_206 : NVPTXReg<"%v4b8_206">;
> +def v4b8_207 : NVPTXReg<"%v4b8_207">;
> +def v4b8_208 : NVPTXReg<"%v4b8_208">;
> +def v4b8_209 : NVPTXReg<"%v4b8_209">;
> +def v4b8_210 : NVPTXReg<"%v4b8_210">;
> +def v4b8_211 : NVPTXReg<"%v4b8_211">;
> +def v4b8_212 : NVPTXReg<"%v4b8_212">;
> +def v4b8_213 : NVPTXReg<"%v4b8_213">;
> +def v4b8_214 : NVPTXReg<"%v4b8_214">;
> +def v4b8_215 : NVPTXReg<"%v4b8_215">;
> +def v4b8_216 : NVPTXReg<"%v4b8_216">;
> +def v4b8_217 : NVPTXReg<"%v4b8_217">;
> +def v4b8_218 : NVPTXReg<"%v4b8_218">;
> +def v4b8_219 : NVPTXReg<"%v4b8_219">;
> +def v4b8_220 : NVPTXReg<"%v4b8_220">;
> +def v4b8_221 : NVPTXReg<"%v4b8_221">;
> +def v4b8_222 : NVPTXReg<"%v4b8_222">;
> +def v4b8_223 : NVPTXReg<"%v4b8_223">;
> +def v4b8_224 : NVPTXReg<"%v4b8_224">;
> +def v4b8_225 : NVPTXReg<"%v4b8_225">;
> +def v4b8_226 : NVPTXReg<"%v4b8_226">;
> +def v4b8_227 : NVPTXReg<"%v4b8_227">;
> +def v4b8_228 : NVPTXReg<"%v4b8_228">;
> +def v4b8_229 : NVPTXReg<"%v4b8_229">;
> +def v4b8_230 : NVPTXReg<"%v4b8_230">;
> +def v4b8_231 : NVPTXReg<"%v4b8_231">;
> +def v4b8_232 : NVPTXReg<"%v4b8_232">;
> +def v4b8_233 : NVPTXReg<"%v4b8_233">;
> +def v4b8_234 : NVPTXReg<"%v4b8_234">;
> +def v4b8_235 : NVPTXReg<"%v4b8_235">;
> +def v4b8_236 : NVPTXReg<"%v4b8_236">;
> +def v4b8_237 : NVPTXReg<"%v4b8_237">;
> +def v4b8_238 : NVPTXReg<"%v4b8_238">;
> +def v4b8_239 : NVPTXReg<"%v4b8_239">;
> +def v4b8_240 : NVPTXReg<"%v4b8_240">;
> +def v4b8_241 : NVPTXReg<"%v4b8_241">;
> +def v4b8_242 : NVPTXReg<"%v4b8_242">;
> +def v4b8_243 : NVPTXReg<"%v4b8_243">;
> +def v4b8_244 : NVPTXReg<"%v4b8_244">;
> +def v4b8_245 : NVPTXReg<"%v4b8_245">;
> +def v4b8_246 : NVPTXReg<"%v4b8_246">;
> +def v4b8_247 : NVPTXReg<"%v4b8_247">;
> +def v4b8_248 : NVPTXReg<"%v4b8_248">;
> +def v4b8_249 : NVPTXReg<"%v4b8_249">;
> +def v4b8_250 : NVPTXReg<"%v4b8_250">;
> +def v4b8_251 : NVPTXReg<"%v4b8_251">;
> +def v4b8_252 : NVPTXReg<"%v4b8_252">;
> +def v4b8_253 : NVPTXReg<"%v4b8_253">;
> +def v4b8_254 : NVPTXReg<"%v4b8_254">;
> +def v4b8_255 : NVPTXReg<"%v4b8_255">;
> +def v4b8_256 : NVPTXReg<"%v4b8_256">;
> +def v4b8_257 : NVPTXReg<"%v4b8_257">;
> +def v4b8_258 : NVPTXReg<"%v4b8_258">;
> +def v4b8_259 : NVPTXReg<"%v4b8_259">;
> +def v4b8_260 : NVPTXReg<"%v4b8_260">;
> +def v4b8_261 : NVPTXReg<"%v4b8_261">;
> +def v4b8_262 : NVPTXReg<"%v4b8_262">;
> +def v4b8_263 : NVPTXReg<"%v4b8_263">;
> +def v4b8_264 : NVPTXReg<"%v4b8_264">;
> +def v4b8_265 : NVPTXReg<"%v4b8_265">;
> +def v4b8_266 : NVPTXReg<"%v4b8_266">;
> +def v4b8_267 : NVPTXReg<"%v4b8_267">;
> +def v4b8_268 : NVPTXReg<"%v4b8_268">;
> +def v4b8_269 : NVPTXReg<"%v4b8_269">;
> +def v4b8_270 : NVPTXReg<"%v4b8_270">;
> +def v4b8_271 : NVPTXReg<"%v4b8_271">;
> +def v4b8_272 : NVPTXReg<"%v4b8_272">;
> +def v4b8_273 : NVPTXReg<"%v4b8_273">;
> +def v4b8_274 : NVPTXReg<"%v4b8_274">;
> +def v4b8_275 : NVPTXReg<"%v4b8_275">;
> +def v4b8_276 : NVPTXReg<"%v4b8_276">;
> +def v4b8_277 : NVPTXReg<"%v4b8_277">;
> +def v4b8_278 : NVPTXReg<"%v4b8_278">;
> +def v4b8_279 : NVPTXReg<"%v4b8_279">;
> +def v4b8_280 : NVPTXReg<"%v4b8_280">;
> +def v4b8_281 : NVPTXReg<"%v4b8_281">;
> +def v4b8_282 : NVPTXReg<"%v4b8_282">;
> +def v4b8_283 : NVPTXReg<"%v4b8_283">;
> +def v4b8_284 : NVPTXReg<"%v4b8_284">;
> +def v4b8_285 : NVPTXReg<"%v4b8_285">;
> +def v4b8_286 : NVPTXReg<"%v4b8_286">;
> +def v4b8_287 : NVPTXReg<"%v4b8_287">;
> +def v4b8_288 : NVPTXReg<"%v4b8_288">;
> +def v4b8_289 : NVPTXReg<"%v4b8_289">;
> +def v4b8_290 : NVPTXReg<"%v4b8_290">;
> +def v4b8_291 : NVPTXReg<"%v4b8_291">;
> +def v4b8_292 : NVPTXReg<"%v4b8_292">;
> +def v4b8_293 : NVPTXReg<"%v4b8_293">;
> +def v4b8_294 : NVPTXReg<"%v4b8_294">;
> +def v4b8_295 : NVPTXReg<"%v4b8_295">;
> +def v4b8_296 : NVPTXReg<"%v4b8_296">;
> +def v4b8_297 : NVPTXReg<"%v4b8_297">;
> +def v4b8_298 : NVPTXReg<"%v4b8_298">;
> +def v4b8_299 : NVPTXReg<"%v4b8_299">;
> +def v4b8_300 : NVPTXReg<"%v4b8_300">;
> +def v4b8_301 : NVPTXReg<"%v4b8_301">;
> +def v4b8_302 : NVPTXReg<"%v4b8_302">;
> +def v4b8_303 : NVPTXReg<"%v4b8_303">;
> +def v4b8_304 : NVPTXReg<"%v4b8_304">;
> +def v4b8_305 : NVPTXReg<"%v4b8_305">;
> +def v4b8_306 : NVPTXReg<"%v4b8_306">;
> +def v4b8_307 : NVPTXReg<"%v4b8_307">;
> +def v4b8_308 : NVPTXReg<"%v4b8_308">;
> +def v4b8_309 : NVPTXReg<"%v4b8_309">;
> +def v4b8_310 : NVPTXReg<"%v4b8_310">;
> +def v4b8_311 : NVPTXReg<"%v4b8_311">;
> +def v4b8_312 : NVPTXReg<"%v4b8_312">;
> +def v4b8_313 : NVPTXReg<"%v4b8_313">;
> +def v4b8_314 : NVPTXReg<"%v4b8_314">;
> +def v4b8_315 : NVPTXReg<"%v4b8_315">;
> +def v4b8_316 : NVPTXReg<"%v4b8_316">;
> +def v4b8_317 : NVPTXReg<"%v4b8_317">;
> +def v4b8_318 : NVPTXReg<"%v4b8_318">;
> +def v4b8_319 : NVPTXReg<"%v4b8_319">;
> +def v4b8_320 : NVPTXReg<"%v4b8_320">;
> +def v4b8_321 : NVPTXReg<"%v4b8_321">;
> +def v4b8_322 : NVPTXReg<"%v4b8_322">;
> +def v4b8_323 : NVPTXReg<"%v4b8_323">;
> +def v4b8_324 : NVPTXReg<"%v4b8_324">;
> +def v4b8_325 : NVPTXReg<"%v4b8_325">;
> +def v4b8_326 : NVPTXReg<"%v4b8_326">;
> +def v4b8_327 : NVPTXReg<"%v4b8_327">;
> +def v4b8_328 : NVPTXReg<"%v4b8_328">;
> +def v4b8_329 : NVPTXReg<"%v4b8_329">;
> +def v4b8_330 : NVPTXReg<"%v4b8_330">;
> +def v4b8_331 : NVPTXReg<"%v4b8_331">;
> +def v4b8_332 : NVPTXReg<"%v4b8_332">;
> +def v4b8_333 : NVPTXReg<"%v4b8_333">;
> +def v4b8_334 : NVPTXReg<"%v4b8_334">;
> +def v4b8_335 : NVPTXReg<"%v4b8_335">;
> +def v4b8_336 : NVPTXReg<"%v4b8_336">;
> +def v4b8_337 : NVPTXReg<"%v4b8_337">;
> +def v4b8_338 : NVPTXReg<"%v4b8_338">;
> +def v4b8_339 : NVPTXReg<"%v4b8_339">;
> +def v4b8_340 : NVPTXReg<"%v4b8_340">;
> +def v4b8_341 : NVPTXReg<"%v4b8_341">;
> +def v4b8_342 : NVPTXReg<"%v4b8_342">;
> +def v4b8_343 : NVPTXReg<"%v4b8_343">;
> +def v4b8_344 : NVPTXReg<"%v4b8_344">;
> +def v4b8_345 : NVPTXReg<"%v4b8_345">;
> +def v4b8_346 : NVPTXReg<"%v4b8_346">;
> +def v4b8_347 : NVPTXReg<"%v4b8_347">;
> +def v4b8_348 : NVPTXReg<"%v4b8_348">;
> +def v4b8_349 : NVPTXReg<"%v4b8_349">;
> +def v4b8_350 : NVPTXReg<"%v4b8_350">;
> +def v4b8_351 : NVPTXReg<"%v4b8_351">;
> +def v4b8_352 : NVPTXReg<"%v4b8_352">;
> +def v4b8_353 : NVPTXReg<"%v4b8_353">;
> +def v4b8_354 : NVPTXReg<"%v4b8_354">;
> +def v4b8_355 : NVPTXReg<"%v4b8_355">;
> +def v4b8_356 : NVPTXReg<"%v4b8_356">;
> +def v4b8_357 : NVPTXReg<"%v4b8_357">;
> +def v4b8_358 : NVPTXReg<"%v4b8_358">;
> +def v4b8_359 : NVPTXReg<"%v4b8_359">;
> +def v4b8_360 : NVPTXReg<"%v4b8_360">;
> +def v4b8_361 : NVPTXReg<"%v4b8_361">;
> +def v4b8_362 : NVPTXReg<"%v4b8_362">;
> +def v4b8_363 : NVPTXReg<"%v4b8_363">;
> +def v4b8_364 : NVPTXReg<"%v4b8_364">;
> +def v4b8_365 : NVPTXReg<"%v4b8_365">;
> +def v4b8_366 : NVPTXReg<"%v4b8_366">;
> +def v4b8_367 : NVPTXReg<"%v4b8_367">;
> +def v4b8_368 : NVPTXReg<"%v4b8_368">;
> +def v4b8_369 : NVPTXReg<"%v4b8_369">;
> +def v4b8_370 : NVPTXReg<"%v4b8_370">;
> +def v4b8_371 : NVPTXReg<"%v4b8_371">;
> +def v4b8_372 : NVPTXReg<"%v4b8_372">;
> +def v4b8_373 : NVPTXReg<"%v4b8_373">;
> +def v4b8_374 : NVPTXReg<"%v4b8_374">;
> +def v4b8_375 : NVPTXReg<"%v4b8_375">;
> +def v4b8_376 : NVPTXReg<"%v4b8_376">;
> +def v4b8_377 : NVPTXReg<"%v4b8_377">;
> +def v4b8_378 : NVPTXReg<"%v4b8_378">;
> +def v4b8_379 : NVPTXReg<"%v4b8_379">;
> +def v4b8_380 : NVPTXReg<"%v4b8_380">;
> +def v4b8_381 : NVPTXReg<"%v4b8_381">;
> +def v4b8_382 : NVPTXReg<"%v4b8_382">;
> +def v4b8_383 : NVPTXReg<"%v4b8_383">;
> +def v4b8_384 : NVPTXReg<"%v4b8_384">;
> +def v4b8_385 : NVPTXReg<"%v4b8_385">;
> +def v4b8_386 : NVPTXReg<"%v4b8_386">;
> +def v4b8_387 : NVPTXReg<"%v4b8_387">;
> +def v4b8_388 : NVPTXReg<"%v4b8_388">;
> +def v4b8_389 : NVPTXReg<"%v4b8_389">;
> +def v4b8_390 : NVPTXReg<"%v4b8_390">;
> +def v4b8_391 : NVPTXReg<"%v4b8_391">;
> +def v4b8_392 : NVPTXReg<"%v4b8_392">;
> +def v4b8_393 : NVPTXReg<"%v4b8_393">;
> +def v4b8_394 : NVPTXReg<"%v4b8_394">;
> +def v4b8_395 : NVPTXReg<"%v4b8_395">;
> +def v4b16_0 : NVPTXReg<"%v4b16_0">;
> +def v4b16_1 : NVPTXReg<"%v4b16_1">;
> +def v4b16_2 : NVPTXReg<"%v4b16_2">;
> +def v4b16_3 : NVPTXReg<"%v4b16_3">;
> +def v4b16_4 : NVPTXReg<"%v4b16_4">;
> +def v4b16_5 : NVPTXReg<"%v4b16_5">;
> +def v4b16_6 : NVPTXReg<"%v4b16_6">;
> +def v4b16_7 : NVPTXReg<"%v4b16_7">;
> +def v4b16_8 : NVPTXReg<"%v4b16_8">;
> +def v4b16_9 : NVPTXReg<"%v4b16_9">;
> +def v4b16_10 : NVPTXReg<"%v4b16_10">;
> +def v4b16_11 : NVPTXReg<"%v4b16_11">;
> +def v4b16_12 : NVPTXReg<"%v4b16_12">;
> +def v4b16_13 : NVPTXReg<"%v4b16_13">;
> +def v4b16_14 : NVPTXReg<"%v4b16_14">;
> +def v4b16_15 : NVPTXReg<"%v4b16_15">;
> +def v4b16_16 : NVPTXReg<"%v4b16_16">;
> +def v4b16_17 : NVPTXReg<"%v4b16_17">;
> +def v4b16_18 : NVPTXReg<"%v4b16_18">;
> +def v4b16_19 : NVPTXReg<"%v4b16_19">;
> +def v4b16_20 : NVPTXReg<"%v4b16_20">;
> +def v4b16_21 : NVPTXReg<"%v4b16_21">;
> +def v4b16_22 : NVPTXReg<"%v4b16_22">;
> +def v4b16_23 : NVPTXReg<"%v4b16_23">;
> +def v4b16_24 : NVPTXReg<"%v4b16_24">;
> +def v4b16_25 : NVPTXReg<"%v4b16_25">;
> +def v4b16_26 : NVPTXReg<"%v4b16_26">;
> +def v4b16_27 : NVPTXReg<"%v4b16_27">;
> +def v4b16_28 : NVPTXReg<"%v4b16_28">;
> +def v4b16_29 : NVPTXReg<"%v4b16_29">;
> +def v4b16_30 : NVPTXReg<"%v4b16_30">;
> +def v4b16_31 : NVPTXReg<"%v4b16_31">;
> +def v4b16_32 : NVPTXReg<"%v4b16_32">;
> +def v4b16_33 : NVPTXReg<"%v4b16_33">;
> +def v4b16_34 : NVPTXReg<"%v4b16_34">;
> +def v4b16_35 : NVPTXReg<"%v4b16_35">;
> +def v4b16_36 : NVPTXReg<"%v4b16_36">;
> +def v4b16_37 : NVPTXReg<"%v4b16_37">;
> +def v4b16_38 : NVPTXReg<"%v4b16_38">;
> +def v4b16_39 : NVPTXReg<"%v4b16_39">;
> +def v4b16_40 : NVPTXReg<"%v4b16_40">;
> +def v4b16_41 : NVPTXReg<"%v4b16_41">;
> +def v4b16_42 : NVPTXReg<"%v4b16_42">;
> +def v4b16_43 : NVPTXReg<"%v4b16_43">;
> +def v4b16_44 : NVPTXReg<"%v4b16_44">;
> +def v4b16_45 : NVPTXReg<"%v4b16_45">;
> +def v4b16_46 : NVPTXReg<"%v4b16_46">;
> +def v4b16_47 : NVPTXReg<"%v4b16_47">;
> +def v4b16_48 : NVPTXReg<"%v4b16_48">;
> +def v4b16_49 : NVPTXReg<"%v4b16_49">;
> +def v4b16_50 : NVPTXReg<"%v4b16_50">;
> +def v4b16_51 : NVPTXReg<"%v4b16_51">;
> +def v4b16_52 : NVPTXReg<"%v4b16_52">;
> +def v4b16_53 : NVPTXReg<"%v4b16_53">;
> +def v4b16_54 : NVPTXReg<"%v4b16_54">;
> +def v4b16_55 : NVPTXReg<"%v4b16_55">;
> +def v4b16_56 : NVPTXReg<"%v4b16_56">;
> +def v4b16_57 : NVPTXReg<"%v4b16_57">;
> +def v4b16_58 : NVPTXReg<"%v4b16_58">;
> +def v4b16_59 : NVPTXReg<"%v4b16_59">;
> +def v4b16_60 : NVPTXReg<"%v4b16_60">;
> +def v4b16_61 : NVPTXReg<"%v4b16_61">;
> +def v4b16_62 : NVPTXReg<"%v4b16_62">;
> +def v4b16_63 : NVPTXReg<"%v4b16_63">;
> +def v4b16_64 : NVPTXReg<"%v4b16_64">;
> +def v4b16_65 : NVPTXReg<"%v4b16_65">;
> +def v4b16_66 : NVPTXReg<"%v4b16_66">;
> +def v4b16_67 : NVPTXReg<"%v4b16_67">;
> +def v4b16_68 : NVPTXReg<"%v4b16_68">;
> +def v4b16_69 : NVPTXReg<"%v4b16_69">;
> +def v4b16_70 : NVPTXReg<"%v4b16_70">;
> +def v4b16_71 : NVPTXReg<"%v4b16_71">;
> +def v4b16_72 : NVPTXReg<"%v4b16_72">;
> +def v4b16_73 : NVPTXReg<"%v4b16_73">;
> +def v4b16_74 : NVPTXReg<"%v4b16_74">;
> +def v4b16_75 : NVPTXReg<"%v4b16_75">;
> +def v4b16_76 : NVPTXReg<"%v4b16_76">;
> +def v4b16_77 : NVPTXReg<"%v4b16_77">;
> +def v4b16_78 : NVPTXReg<"%v4b16_78">;
> +def v4b16_79 : NVPTXReg<"%v4b16_79">;
> +def v4b16_80 : NVPTXReg<"%v4b16_80">;
> +def v4b16_81 : NVPTXReg<"%v4b16_81">;
> +def v4b16_82 : NVPTXReg<"%v4b16_82">;
> +def v4b16_83 : NVPTXReg<"%v4b16_83">;
> +def v4b16_84 : NVPTXReg<"%v4b16_84">;
> +def v4b16_85 : NVPTXReg<"%v4b16_85">;
> +def v4b16_86 : NVPTXReg<"%v4b16_86">;
> +def v4b16_87 : NVPTXReg<"%v4b16_87">;
> +def v4b16_88 : NVPTXReg<"%v4b16_88">;
> +def v4b16_89 : NVPTXReg<"%v4b16_89">;
> +def v4b16_90 : NVPTXReg<"%v4b16_90">;
> +def v4b16_91 : NVPTXReg<"%v4b16_91">;
> +def v4b16_92 : NVPTXReg<"%v4b16_92">;
> +def v4b16_93 : NVPTXReg<"%v4b16_93">;
> +def v4b16_94 : NVPTXReg<"%v4b16_94">;
> +def v4b16_95 : NVPTXReg<"%v4b16_95">;
> +def v4b16_96 : NVPTXReg<"%v4b16_96">;
> +def v4b16_97 : NVPTXReg<"%v4b16_97">;
> +def v4b16_98 : NVPTXReg<"%v4b16_98">;
> +def v4b16_99 : NVPTXReg<"%v4b16_99">;
> +def v4b16_100 : NVPTXReg<"%v4b16_100">;
> +def v4b16_101 : NVPTXReg<"%v4b16_101">;
> +def v4b16_102 : NVPTXReg<"%v4b16_102">;
> +def v4b16_103 : NVPTXReg<"%v4b16_103">;
> +def v4b16_104 : NVPTXReg<"%v4b16_104">;
> +def v4b16_105 : NVPTXReg<"%v4b16_105">;
> +def v4b16_106 : NVPTXReg<"%v4b16_106">;
> +def v4b16_107 : NVPTXReg<"%v4b16_107">;
> +def v4b16_108 : NVPTXReg<"%v4b16_108">;
> +def v4b16_109 : NVPTXReg<"%v4b16_109">;
> +def v4b16_110 : NVPTXReg<"%v4b16_110">;
> +def v4b16_111 : NVPTXReg<"%v4b16_111">;
> +def v4b16_112 : NVPTXReg<"%v4b16_112">;
> +def v4b16_113 : NVPTXReg<"%v4b16_113">;
> +def v4b16_114 : NVPTXReg<"%v4b16_114">;
> +def v4b16_115 : NVPTXReg<"%v4b16_115">;
> +def v4b16_116 : NVPTXReg<"%v4b16_116">;
> +def v4b16_117 : NVPTXReg<"%v4b16_117">;
> +def v4b16_118 : NVPTXReg<"%v4b16_118">;
> +def v4b16_119 : NVPTXReg<"%v4b16_119">;
> +def v4b16_120 : NVPTXReg<"%v4b16_120">;
> +def v4b16_121 : NVPTXReg<"%v4b16_121">;
> +def v4b16_122 : NVPTXReg<"%v4b16_122">;
> +def v4b16_123 : NVPTXReg<"%v4b16_123">;
> +def v4b16_124 : NVPTXReg<"%v4b16_124">;
> +def v4b16_125 : NVPTXReg<"%v4b16_125">;
> +def v4b16_126 : NVPTXReg<"%v4b16_126">;
> +def v4b16_127 : NVPTXReg<"%v4b16_127">;
> +def v4b16_128 : NVPTXReg<"%v4b16_128">;
> +def v4b16_129 : NVPTXReg<"%v4b16_129">;
> +def v4b16_130 : NVPTXReg<"%v4b16_130">;
> +def v4b16_131 : NVPTXReg<"%v4b16_131">;
> +def v4b16_132 : NVPTXReg<"%v4b16_132">;
> +def v4b16_133 : NVPTXReg<"%v4b16_133">;
> +def v4b16_134 : NVPTXReg<"%v4b16_134">;
> +def v4b16_135 : NVPTXReg<"%v4b16_135">;
> +def v4b16_136 : NVPTXReg<"%v4b16_136">;
> +def v4b16_137 : NVPTXReg<"%v4b16_137">;
> +def v4b16_138 : NVPTXReg<"%v4b16_138">;
> +def v4b16_139 : NVPTXReg<"%v4b16_139">;
> +def v4b16_140 : NVPTXReg<"%v4b16_140">;
> +def v4b16_141 : NVPTXReg<"%v4b16_141">;
> +def v4b16_142 : NVPTXReg<"%v4b16_142">;
> +def v4b16_143 : NVPTXReg<"%v4b16_143">;
> +def v4b16_144 : NVPTXReg<"%v4b16_144">;
> +def v4b16_145 : NVPTXReg<"%v4b16_145">;
> +def v4b16_146 : NVPTXReg<"%v4b16_146">;
> +def v4b16_147 : NVPTXReg<"%v4b16_147">;
> +def v4b16_148 : NVPTXReg<"%v4b16_148">;
> +def v4b16_149 : NVPTXReg<"%v4b16_149">;
> +def v4b16_150 : NVPTXReg<"%v4b16_150">;
> +def v4b16_151 : NVPTXReg<"%v4b16_151">;
> +def v4b16_152 : NVPTXReg<"%v4b16_152">;
> +def v4b16_153 : NVPTXReg<"%v4b16_153">;
> +def v4b16_154 : NVPTXReg<"%v4b16_154">;
> +def v4b16_155 : NVPTXReg<"%v4b16_155">;
> +def v4b16_156 : NVPTXReg<"%v4b16_156">;
> +def v4b16_157 : NVPTXReg<"%v4b16_157">;
> +def v4b16_158 : NVPTXReg<"%v4b16_158">;
> +def v4b16_159 : NVPTXReg<"%v4b16_159">;
> +def v4b16_160 : NVPTXReg<"%v4b16_160">;
> +def v4b16_161 : NVPTXReg<"%v4b16_161">;
> +def v4b16_162 : NVPTXReg<"%v4b16_162">;
> +def v4b16_163 : NVPTXReg<"%v4b16_163">;
> +def v4b16_164 : NVPTXReg<"%v4b16_164">;
> +def v4b16_165 : NVPTXReg<"%v4b16_165">;
> +def v4b16_166 : NVPTXReg<"%v4b16_166">;
> +def v4b16_167 : NVPTXReg<"%v4b16_167">;
> +def v4b16_168 : NVPTXReg<"%v4b16_168">;
> +def v4b16_169 : NVPTXReg<"%v4b16_169">;
> +def v4b16_170 : NVPTXReg<"%v4b16_170">;
> +def v4b16_171 : NVPTXReg<"%v4b16_171">;
> +def v4b16_172 : NVPTXReg<"%v4b16_172">;
> +def v4b16_173 : NVPTXReg<"%v4b16_173">;
> +def v4b16_174 : NVPTXReg<"%v4b16_174">;
> +def v4b16_175 : NVPTXReg<"%v4b16_175">;
> +def v4b16_176 : NVPTXReg<"%v4b16_176">;
> +def v4b16_177 : NVPTXReg<"%v4b16_177">;
> +def v4b16_178 : NVPTXReg<"%v4b16_178">;
> +def v4b16_179 : NVPTXReg<"%v4b16_179">;
> +def v4b16_180 : NVPTXReg<"%v4b16_180">;
> +def v4b16_181 : NVPTXReg<"%v4b16_181">;
> +def v4b16_182 : NVPTXReg<"%v4b16_182">;
> +def v4b16_183 : NVPTXReg<"%v4b16_183">;
> +def v4b16_184 : NVPTXReg<"%v4b16_184">;
> +def v4b16_185 : NVPTXReg<"%v4b16_185">;
> +def v4b16_186 : NVPTXReg<"%v4b16_186">;
> +def v4b16_187 : NVPTXReg<"%v4b16_187">;
> +def v4b16_188 : NVPTXReg<"%v4b16_188">;
> +def v4b16_189 : NVPTXReg<"%v4b16_189">;
> +def v4b16_190 : NVPTXReg<"%v4b16_190">;
> +def v4b16_191 : NVPTXReg<"%v4b16_191">;
> +def v4b16_192 : NVPTXReg<"%v4b16_192">;
> +def v4b16_193 : NVPTXReg<"%v4b16_193">;
> +def v4b16_194 : NVPTXReg<"%v4b16_194">;
> +def v4b16_195 : NVPTXReg<"%v4b16_195">;
> +def v4b16_196 : NVPTXReg<"%v4b16_196">;
> +def v4b16_197 : NVPTXReg<"%v4b16_197">;
> +def v4b16_198 : NVPTXReg<"%v4b16_198">;
> +def v4b16_199 : NVPTXReg<"%v4b16_199">;
> +def v4b16_200 : NVPTXReg<"%v4b16_200">;
> +def v4b16_201 : NVPTXReg<"%v4b16_201">;
> +def v4b16_202 : NVPTXReg<"%v4b16_202">;
> +def v4b16_203 : NVPTXReg<"%v4b16_203">;
> +def v4b16_204 : NVPTXReg<"%v4b16_204">;
> +def v4b16_205 : NVPTXReg<"%v4b16_205">;
> +def v4b16_206 : NVPTXReg<"%v4b16_206">;
> +def v4b16_207 : NVPTXReg<"%v4b16_207">;
> +def v4b16_208 : NVPTXReg<"%v4b16_208">;
> +def v4b16_209 : NVPTXReg<"%v4b16_209">;
> +def v4b16_210 : NVPTXReg<"%v4b16_210">;
> +def v4b16_211 : NVPTXReg<"%v4b16_211">;
> +def v4b16_212 : NVPTXReg<"%v4b16_212">;
> +def v4b16_213 : NVPTXReg<"%v4b16_213">;
> +def v4b16_214 : NVPTXReg<"%v4b16_214">;
> +def v4b16_215 : NVPTXReg<"%v4b16_215">;
> +def v4b16_216 : NVPTXReg<"%v4b16_216">;
> +def v4b16_217 : NVPTXReg<"%v4b16_217">;
> +def v4b16_218 : NVPTXReg<"%v4b16_218">;
> +def v4b16_219 : NVPTXReg<"%v4b16_219">;
> +def v4b16_220 : NVPTXReg<"%v4b16_220">;
> +def v4b16_221 : NVPTXReg<"%v4b16_221">;
> +def v4b16_222 : NVPTXReg<"%v4b16_222">;
> +def v4b16_223 : NVPTXReg<"%v4b16_223">;
> +def v4b16_224 : NVPTXReg<"%v4b16_224">;
> +def v4b16_225 : NVPTXReg<"%v4b16_225">;
> +def v4b16_226 : NVPTXReg<"%v4b16_226">;
> +def v4b16_227 : NVPTXReg<"%v4b16_227">;
> +def v4b16_228 : NVPTXReg<"%v4b16_228">;
> +def v4b16_229 : NVPTXReg<"%v4b16_229">;
> +def v4b16_230 : NVPTXReg<"%v4b16_230">;
> +def v4b16_231 : NVPTXReg<"%v4b16_231">;
> +def v4b16_232 : NVPTXReg<"%v4b16_232">;
> +def v4b16_233 : NVPTXReg<"%v4b16_233">;
> +def v4b16_234 : NVPTXReg<"%v4b16_234">;
> +def v4b16_235 : NVPTXReg<"%v4b16_235">;
> +def v4b16_236 : NVPTXReg<"%v4b16_236">;
> +def v4b16_237 : NVPTXReg<"%v4b16_237">;
> +def v4b16_238 : NVPTXReg<"%v4b16_238">;
> +def v4b16_239 : NVPTXReg<"%v4b16_239">;
> +def v4b16_240 : NVPTXReg<"%v4b16_240">;
> +def v4b16_241 : NVPTXReg<"%v4b16_241">;
> +def v4b16_242 : NVPTXReg<"%v4b16_242">;
> +def v4b16_243 : NVPTXReg<"%v4b16_243">;
> +def v4b16_244 : NVPTXReg<"%v4b16_244">;
> +def v4b16_245 : NVPTXReg<"%v4b16_245">;
> +def v4b16_246 : NVPTXReg<"%v4b16_246">;
> +def v4b16_247 : NVPTXReg<"%v4b16_247">;
> +def v4b16_248 : NVPTXReg<"%v4b16_248">;
> +def v4b16_249 : NVPTXReg<"%v4b16_249">;
> +def v4b16_250 : NVPTXReg<"%v4b16_250">;
> +def v4b16_251 : NVPTXReg<"%v4b16_251">;
> +def v4b16_252 : NVPTXReg<"%v4b16_252">;
> +def v4b16_253 : NVPTXReg<"%v4b16_253">;
> +def v4b16_254 : NVPTXReg<"%v4b16_254">;
> +def v4b16_255 : NVPTXReg<"%v4b16_255">;
> +def v4b16_256 : NVPTXReg<"%v4b16_256">;
> +def v4b16_257 : NVPTXReg<"%v4b16_257">;
> +def v4b16_258 : NVPTXReg<"%v4b16_258">;
> +def v4b16_259 : NVPTXReg<"%v4b16_259">;
> +def v4b16_260 : NVPTXReg<"%v4b16_260">;
> +def v4b16_261 : NVPTXReg<"%v4b16_261">;
> +def v4b16_262 : NVPTXReg<"%v4b16_262">;
> +def v4b16_263 : NVPTXReg<"%v4b16_263">;
> +def v4b16_264 : NVPTXReg<"%v4b16_264">;
> +def v4b16_265 : NVPTXReg<"%v4b16_265">;
> +def v4b16_266 : NVPTXReg<"%v4b16_266">;
> +def v4b16_267 : NVPTXReg<"%v4b16_267">;
> +def v4b16_268 : NVPTXReg<"%v4b16_268">;
> +def v4b16_269 : NVPTXReg<"%v4b16_269">;
> +def v4b16_270 : NVPTXReg<"%v4b16_270">;
> +def v4b16_271 : NVPTXReg<"%v4b16_271">;
> +def v4b16_272 : NVPTXReg<"%v4b16_272">;
> +def v4b16_273 : NVPTXReg<"%v4b16_273">;
> +def v4b16_274 : NVPTXReg<"%v4b16_274">;
> +def v4b16_275 : NVPTXReg<"%v4b16_275">;
> +def v4b16_276 : NVPTXReg<"%v4b16_276">;
> +def v4b16_277 : NVPTXReg<"%v4b16_277">;
> +def v4b16_278 : NVPTXReg<"%v4b16_278">;
> +def v4b16_279 : NVPTXReg<"%v4b16_279">;
> +def v4b16_280 : NVPTXReg<"%v4b16_280">;
> +def v4b16_281 : NVPTXReg<"%v4b16_281">;
> +def v4b16_282 : NVPTXReg<"%v4b16_282">;
> +def v4b16_283 : NVPTXReg<"%v4b16_283">;
> +def v4b16_284 : NVPTXReg<"%v4b16_284">;
> +def v4b16_285 : NVPTXReg<"%v4b16_285">;
> +def v4b16_286 : NVPTXReg<"%v4b16_286">;
> +def v4b16_287 : NVPTXReg<"%v4b16_287">;
> +def v4b16_288 : NVPTXReg<"%v4b16_288">;
> +def v4b16_289 : NVPTXReg<"%v4b16_289">;
> +def v4b16_290 : NVPTXReg<"%v4b16_290">;
> +def v4b16_291 : NVPTXReg<"%v4b16_291">;
> +def v4b16_292 : NVPTXReg<"%v4b16_292">;
> +def v4b16_293 : NVPTXReg<"%v4b16_293">;
> +def v4b16_294 : NVPTXReg<"%v4b16_294">;
> +def v4b16_295 : NVPTXReg<"%v4b16_295">;
> +def v4b16_296 : NVPTXReg<"%v4b16_296">;
> +def v4b16_297 : NVPTXReg<"%v4b16_297">;
> +def v4b16_298 : NVPTXReg<"%v4b16_298">;
> +def v4b16_299 : NVPTXReg<"%v4b16_299">;
> +def v4b16_300 : NVPTXReg<"%v4b16_300">;
> +def v4b16_301 : NVPTXReg<"%v4b16_301">;
> +def v4b16_302 : NVPTXReg<"%v4b16_302">;
> +def v4b16_303 : NVPTXReg<"%v4b16_303">;
> +def v4b16_304 : NVPTXReg<"%v4b16_304">;
> +def v4b16_305 : NVPTXReg<"%v4b16_305">;
> +def v4b16_306 : NVPTXReg<"%v4b16_306">;
> +def v4b16_307 : NVPTXReg<"%v4b16_307">;
> +def v4b16_308 : NVPTXReg<"%v4b16_308">;
> +def v4b16_309 : NVPTXReg<"%v4b16_309">;
> +def v4b16_310 : NVPTXReg<"%v4b16_310">;
> +def v4b16_311 : NVPTXReg<"%v4b16_311">;
> +def v4b16_312 : NVPTXReg<"%v4b16_312">;
> +def v4b16_313 : NVPTXReg<"%v4b16_313">;
> +def v4b16_314 : NVPTXReg<"%v4b16_314">;
> +def v4b16_315 : NVPTXReg<"%v4b16_315">;
> +def v4b16_316 : NVPTXReg<"%v4b16_316">;
> +def v4b16_317 : NVPTXReg<"%v4b16_317">;
> +def v4b16_318 : NVPTXReg<"%v4b16_318">;
> +def v4b16_319 : NVPTXReg<"%v4b16_319">;
> +def v4b16_320 : NVPTXReg<"%v4b16_320">;
> +def v4b16_321 : NVPTXReg<"%v4b16_321">;
> +def v4b16_322 : NVPTXReg<"%v4b16_322">;
> +def v4b16_323 : NVPTXReg<"%v4b16_323">;
> +def v4b16_324 : NVPTXReg<"%v4b16_324">;
> +def v4b16_325 : NVPTXReg<"%v4b16_325">;
> +def v4b16_326 : NVPTXReg<"%v4b16_326">;
> +def v4b16_327 : NVPTXReg<"%v4b16_327">;
> +def v4b16_328 : NVPTXReg<"%v4b16_328">;
> +def v4b16_329 : NVPTXReg<"%v4b16_329">;
> +def v4b16_330 : NVPTXReg<"%v4b16_330">;
> +def v4b16_331 : NVPTXReg<"%v4b16_331">;
> +def v4b16_332 : NVPTXReg<"%v4b16_332">;
> +def v4b16_333 : NVPTXReg<"%v4b16_333">;
> +def v4b16_334 : NVPTXReg<"%v4b16_334">;
> +def v4b16_335 : NVPTXReg<"%v4b16_335">;
> +def v4b16_336 : NVPTXReg<"%v4b16_336">;
> +def v4b16_337 : NVPTXReg<"%v4b16_337">;
> +def v4b16_338 : NVPTXReg<"%v4b16_338">;
> +def v4b16_339 : NVPTXReg<"%v4b16_339">;
> +def v4b16_340 : NVPTXReg<"%v4b16_340">;
> +def v4b16_341 : NVPTXReg<"%v4b16_341">;
> +def v4b16_342 : NVPTXReg<"%v4b16_342">;
> +def v4b16_343 : NVPTXReg<"%v4b16_343">;
> +def v4b16_344 : NVPTXReg<"%v4b16_344">;
> +def v4b16_345 : NVPTXReg<"%v4b16_345">;
> +def v4b16_346 : NVPTXReg<"%v4b16_346">;
> +def v4b16_347 : NVPTXReg<"%v4b16_347">;
> +def v4b16_348 : NVPTXReg<"%v4b16_348">;
> +def v4b16_349 : NVPTXReg<"%v4b16_349">;
> +def v4b16_350 : NVPTXReg<"%v4b16_350">;
> +def v4b16_351 : NVPTXReg<"%v4b16_351">;
> +def v4b16_352 : NVPTXReg<"%v4b16_352">;
> +def v4b16_353 : NVPTXReg<"%v4b16_353">;
> +def v4b16_354 : NVPTXReg<"%v4b16_354">;
> +def v4b16_355 : NVPTXReg<"%v4b16_355">;
> +def v4b16_356 : NVPTXReg<"%v4b16_356">;
> +def v4b16_357 : NVPTXReg<"%v4b16_357">;
> +def v4b16_358 : NVPTXReg<"%v4b16_358">;
> +def v4b16_359 : NVPTXReg<"%v4b16_359">;
> +def v4b16_360 : NVPTXReg<"%v4b16_360">;
> +def v4b16_361 : NVPTXReg<"%v4b16_361">;
> +def v4b16_362 : NVPTXReg<"%v4b16_362">;
> +def v4b16_363 : NVPTXReg<"%v4b16_363">;
> +def v4b16_364 : NVPTXReg<"%v4b16_364">;
> +def v4b16_365 : NVPTXReg<"%v4b16_365">;
> +def v4b16_366 : NVPTXReg<"%v4b16_366">;
> +def v4b16_367 : NVPTXReg<"%v4b16_367">;
> +def v4b16_368 : NVPTXReg<"%v4b16_368">;
> +def v4b16_369 : NVPTXReg<"%v4b16_369">;
> +def v4b16_370 : NVPTXReg<"%v4b16_370">;
> +def v4b16_371 : NVPTXReg<"%v4b16_371">;
> +def v4b16_372 : NVPTXReg<"%v4b16_372">;
> +def v4b16_373 : NVPTXReg<"%v4b16_373">;
> +def v4b16_374 : NVPTXReg<"%v4b16_374">;
> +def v4b16_375 : NVPTXReg<"%v4b16_375">;
> +def v4b16_376 : NVPTXReg<"%v4b16_376">;
> +def v4b16_377 : NVPTXReg<"%v4b16_377">;
> +def v4b16_378 : NVPTXReg<"%v4b16_378">;
> +def v4b16_379 : NVPTXReg<"%v4b16_379">;
> +def v4b16_380 : NVPTXReg<"%v4b16_380">;
> +def v4b16_381 : NVPTXReg<"%v4b16_381">;
> +def v4b16_382 : NVPTXReg<"%v4b16_382">;
> +def v4b16_383 : NVPTXReg<"%v4b16_383">;
> +def v4b16_384 : NVPTXReg<"%v4b16_384">;
> +def v4b16_385 : NVPTXReg<"%v4b16_385">;
> +def v4b16_386 : NVPTXReg<"%v4b16_386">;
> +def v4b16_387 : NVPTXReg<"%v4b16_387">;
> +def v4b16_388 : NVPTXReg<"%v4b16_388">;
> +def v4b16_389 : NVPTXReg<"%v4b16_389">;
> +def v4b16_390 : NVPTXReg<"%v4b16_390">;
> +def v4b16_391 : NVPTXReg<"%v4b16_391">;
> +def v4b16_392 : NVPTXReg<"%v4b16_392">;
> +def v4b16_393 : NVPTXReg<"%v4b16_393">;
> +def v4b16_394 : NVPTXReg<"%v4b16_394">;
> +def v4b16_395 : NVPTXReg<"%v4b16_395">;
> +def v4b32_0 : NVPTXReg<"%v4b32_0">;
> +def v4b32_1 : NVPTXReg<"%v4b32_1">;
> +def v4b32_2 : NVPTXReg<"%v4b32_2">;
> +def v4b32_3 : NVPTXReg<"%v4b32_3">;
> +def v4b32_4 : NVPTXReg<"%v4b32_4">;
> +def v4b32_5 : NVPTXReg<"%v4b32_5">;
> +def v4b32_6 : NVPTXReg<"%v4b32_6">;
> +def v4b32_7 : NVPTXReg<"%v4b32_7">;
> +def v4b32_8 : NVPTXReg<"%v4b32_8">;
> +def v4b32_9 : NVPTXReg<"%v4b32_9">;
> +def v4b32_10 : NVPTXReg<"%v4b32_10">;
> +def v4b32_11 : NVPTXReg<"%v4b32_11">;
> +def v4b32_12 : NVPTXReg<"%v4b32_12">;
> +def v4b32_13 : NVPTXReg<"%v4b32_13">;
> +def v4b32_14 : NVPTXReg<"%v4b32_14">;
> +def v4b32_15 : NVPTXReg<"%v4b32_15">;
> +def v4b32_16 : NVPTXReg<"%v4b32_16">;
> +def v4b32_17 : NVPTXReg<"%v4b32_17">;
> +def v4b32_18 : NVPTXReg<"%v4b32_18">;
> +def v4b32_19 : NVPTXReg<"%v4b32_19">;
> +def v4b32_20 : NVPTXReg<"%v4b32_20">;
> +def v4b32_21 : NVPTXReg<"%v4b32_21">;
> +def v4b32_22 : NVPTXReg<"%v4b32_22">;
> +def v4b32_23 : NVPTXReg<"%v4b32_23">;
> +def v4b32_24 : NVPTXReg<"%v4b32_24">;
> +def v4b32_25 : NVPTXReg<"%v4b32_25">;
> +def v4b32_26 : NVPTXReg<"%v4b32_26">;
> +def v4b32_27 : NVPTXReg<"%v4b32_27">;
> +def v4b32_28 : NVPTXReg<"%v4b32_28">;
> +def v4b32_29 : NVPTXReg<"%v4b32_29">;
> +def v4b32_30 : NVPTXReg<"%v4b32_30">;
> +def v4b32_31 : NVPTXReg<"%v4b32_31">;
> +def v4b32_32 : NVPTXReg<"%v4b32_32">;
> +def v4b32_33 : NVPTXReg<"%v4b32_33">;
> +def v4b32_34 : NVPTXReg<"%v4b32_34">;
> +def v4b32_35 : NVPTXReg<"%v4b32_35">;
> +def v4b32_36 : NVPTXReg<"%v4b32_36">;
> +def v4b32_37 : NVPTXReg<"%v4b32_37">;
> +def v4b32_38 : NVPTXReg<"%v4b32_38">;
> +def v4b32_39 : NVPTXReg<"%v4b32_39">;
> +def v4b32_40 : NVPTXReg<"%v4b32_40">;
> +def v4b32_41 : NVPTXReg<"%v4b32_41">;
> +def v4b32_42 : NVPTXReg<"%v4b32_42">;
> +def v4b32_43 : NVPTXReg<"%v4b32_43">;
> +def v4b32_44 : NVPTXReg<"%v4b32_44">;
> +def v4b32_45 : NVPTXReg<"%v4b32_45">;
> +def v4b32_46 : NVPTXReg<"%v4b32_46">;
> +def v4b32_47 : NVPTXReg<"%v4b32_47">;
> +def v4b32_48 : NVPTXReg<"%v4b32_48">;
> +def v4b32_49 : NVPTXReg<"%v4b32_49">;
> +def v4b32_50 : NVPTXReg<"%v4b32_50">;
> +def v4b32_51 : NVPTXReg<"%v4b32_51">;
> +def v4b32_52 : NVPTXReg<"%v4b32_52">;
> +def v4b32_53 : NVPTXReg<"%v4b32_53">;
> +def v4b32_54 : NVPTXReg<"%v4b32_54">;
> +def v4b32_55 : NVPTXReg<"%v4b32_55">;
> +def v4b32_56 : NVPTXReg<"%v4b32_56">;
> +def v4b32_57 : NVPTXReg<"%v4b32_57">;
> +def v4b32_58 : NVPTXReg<"%v4b32_58">;
> +def v4b32_59 : NVPTXReg<"%v4b32_59">;
> +def v4b32_60 : NVPTXReg<"%v4b32_60">;
> +def v4b32_61 : NVPTXReg<"%v4b32_61">;
> +def v4b32_62 : NVPTXReg<"%v4b32_62">;
> +def v4b32_63 : NVPTXReg<"%v4b32_63">;
> +def v4b32_64 : NVPTXReg<"%v4b32_64">;
> +def v4b32_65 : NVPTXReg<"%v4b32_65">;
> +def v4b32_66 : NVPTXReg<"%v4b32_66">;
> +def v4b32_67 : NVPTXReg<"%v4b32_67">;
> +def v4b32_68 : NVPTXReg<"%v4b32_68">;
> +def v4b32_69 : NVPTXReg<"%v4b32_69">;
> +def v4b32_70 : NVPTXReg<"%v4b32_70">;
> +def v4b32_71 : NVPTXReg<"%v4b32_71">;
> +def v4b32_72 : NVPTXReg<"%v4b32_72">;
> +def v4b32_73 : NVPTXReg<"%v4b32_73">;
> +def v4b32_74 : NVPTXReg<"%v4b32_74">;
> +def v4b32_75 : NVPTXReg<"%v4b32_75">;
> +def v4b32_76 : NVPTXReg<"%v4b32_76">;
> +def v4b32_77 : NVPTXReg<"%v4b32_77">;
> +def v4b32_78 : NVPTXReg<"%v4b32_78">;
> +def v4b32_79 : NVPTXReg<"%v4b32_79">;
> +def v4b32_80 : NVPTXReg<"%v4b32_80">;
> +def v4b32_81 : NVPTXReg<"%v4b32_81">;
> +def v4b32_82 : NVPTXReg<"%v4b32_82">;
> +def v4b32_83 : NVPTXReg<"%v4b32_83">;
> +def v4b32_84 : NVPTXReg<"%v4b32_84">;
> +def v4b32_85 : NVPTXReg<"%v4b32_85">;
> +def v4b32_86 : NVPTXReg<"%v4b32_86">;
> +def v4b32_87 : NVPTXReg<"%v4b32_87">;
> +def v4b32_88 : NVPTXReg<"%v4b32_88">;
> +def v4b32_89 : NVPTXReg<"%v4b32_89">;
> +def v4b32_90 : NVPTXReg<"%v4b32_90">;
> +def v4b32_91 : NVPTXReg<"%v4b32_91">;
> +def v4b32_92 : NVPTXReg<"%v4b32_92">;
> +def v4b32_93 : NVPTXReg<"%v4b32_93">;
> +def v4b32_94 : NVPTXReg<"%v4b32_94">;
> +def v4b32_95 : NVPTXReg<"%v4b32_95">;
> +def v4b32_96 : NVPTXReg<"%v4b32_96">;
> +def v4b32_97 : NVPTXReg<"%v4b32_97">;
> +def v4b32_98 : NVPTXReg<"%v4b32_98">;
> +def v4b32_99 : NVPTXReg<"%v4b32_99">;
> +def v4b32_100 : NVPTXReg<"%v4b32_100">;
> +def v4b32_101 : NVPTXReg<"%v4b32_101">;
> +def v4b32_102 : NVPTXReg<"%v4b32_102">;
> +def v4b32_103 : NVPTXReg<"%v4b32_103">;
> +def v4b32_104 : NVPTXReg<"%v4b32_104">;
> +def v4b32_105 : NVPTXReg<"%v4b32_105">;
> +def v4b32_106 : NVPTXReg<"%v4b32_106">;
> +def v4b32_107 : NVPTXReg<"%v4b32_107">;
> +def v4b32_108 : NVPTXReg<"%v4b32_108">;
> +def v4b32_109 : NVPTXReg<"%v4b32_109">;
> +def v4b32_110 : NVPTXReg<"%v4b32_110">;
> +def v4b32_111 : NVPTXReg<"%v4b32_111">;
> +def v4b32_112 : NVPTXReg<"%v4b32_112">;
> +def v4b32_113 : NVPTXReg<"%v4b32_113">;
> +def v4b32_114 : NVPTXReg<"%v4b32_114">;
> +def v4b32_115 : NVPTXReg<"%v4b32_115">;
> +def v4b32_116 : NVPTXReg<"%v4b32_116">;
> +def v4b32_117 : NVPTXReg<"%v4b32_117">;
> +def v4b32_118 : NVPTXReg<"%v4b32_118">;
> +def v4b32_119 : NVPTXReg<"%v4b32_119">;
> +def v4b32_120 : NVPTXReg<"%v4b32_120">;
> +def v4b32_121 : NVPTXReg<"%v4b32_121">;
> +def v4b32_122 : NVPTXReg<"%v4b32_122">;
> +def v4b32_123 : NVPTXReg<"%v4b32_123">;
> +def v4b32_124 : NVPTXReg<"%v4b32_124">;
> +def v4b32_125 : NVPTXReg<"%v4b32_125">;
> +def v4b32_126 : NVPTXReg<"%v4b32_126">;
> +def v4b32_127 : NVPTXReg<"%v4b32_127">;
> +def v4b32_128 : NVPTXReg<"%v4b32_128">;
> +def v4b32_129 : NVPTXReg<"%v4b32_129">;
> +def v4b32_130 : NVPTXReg<"%v4b32_130">;
> +def v4b32_131 : NVPTXReg<"%v4b32_131">;
> +def v4b32_132 : NVPTXReg<"%v4b32_132">;
> +def v4b32_133 : NVPTXReg<"%v4b32_133">;
> +def v4b32_134 : NVPTXReg<"%v4b32_134">;
> +def v4b32_135 : NVPTXReg<"%v4b32_135">;
> +def v4b32_136 : NVPTXReg<"%v4b32_136">;
> +def v4b32_137 : NVPTXReg<"%v4b32_137">;
> +def v4b32_138 : NVPTXReg<"%v4b32_138">;
> +def v4b32_139 : NVPTXReg<"%v4b32_139">;
> +def v4b32_140 : NVPTXReg<"%v4b32_140">;
> +def v4b32_141 : NVPTXReg<"%v4b32_141">;
> +def v4b32_142 : NVPTXReg<"%v4b32_142">;
> +def v4b32_143 : NVPTXReg<"%v4b32_143">;
> +def v4b32_144 : NVPTXReg<"%v4b32_144">;
> +def v4b32_145 : NVPTXReg<"%v4b32_145">;
> +def v4b32_146 : NVPTXReg<"%v4b32_146">;
> +def v4b32_147 : NVPTXReg<"%v4b32_147">;
> +def v4b32_148 : NVPTXReg<"%v4b32_148">;
> +def v4b32_149 : NVPTXReg<"%v4b32_149">;
> +def v4b32_150 : NVPTXReg<"%v4b32_150">;
> +def v4b32_151 : NVPTXReg<"%v4b32_151">;
> +def v4b32_152 : NVPTXReg<"%v4b32_152">;
> +def v4b32_153 : NVPTXReg<"%v4b32_153">;
> +def v4b32_154 : NVPTXReg<"%v4b32_154">;
> +def v4b32_155 : NVPTXReg<"%v4b32_155">;
> +def v4b32_156 : NVPTXReg<"%v4b32_156">;
> +def v4b32_157 : NVPTXReg<"%v4b32_157">;
> +def v4b32_158 : NVPTXReg<"%v4b32_158">;
> +def v4b32_159 : NVPTXReg<"%v4b32_159">;
> +def v4b32_160 : NVPTXReg<"%v4b32_160">;
> +def v4b32_161 : NVPTXReg<"%v4b32_161">;
> +def v4b32_162 : NVPTXReg<"%v4b32_162">;
> +def v4b32_163 : NVPTXReg<"%v4b32_163">;
> +def v4b32_164 : NVPTXReg<"%v4b32_164">;
> +def v4b32_165 : NVPTXReg<"%v4b32_165">;
> +def v4b32_166 : NVPTXReg<"%v4b32_166">;
> +def v4b32_167 : NVPTXReg<"%v4b32_167">;
> +def v4b32_168 : NVPTXReg<"%v4b32_168">;
> +def v4b32_169 : NVPTXReg<"%v4b32_169">;
> +def v4b32_170 : NVPTXReg<"%v4b32_170">;
> +def v4b32_171 : NVPTXReg<"%v4b32_171">;
> +def v4b32_172 : NVPTXReg<"%v4b32_172">;
> +def v4b32_173 : NVPTXReg<"%v4b32_173">;
> +def v4b32_174 : NVPTXReg<"%v4b32_174">;
> +def v4b32_175 : NVPTXReg<"%v4b32_175">;
> +def v4b32_176 : NVPTXReg<"%v4b32_176">;
> +def v4b32_177 : NVPTXReg<"%v4b32_177">;
> +def v4b32_178 : NVPTXReg<"%v4b32_178">;
> +def v4b32_179 : NVPTXReg<"%v4b32_179">;
> +def v4b32_180 : NVPTXReg<"%v4b32_180">;
> +def v4b32_181 : NVPTXReg<"%v4b32_181">;
> +def v4b32_182 : NVPTXReg<"%v4b32_182">;
> +def v4b32_183 : NVPTXReg<"%v4b32_183">;
> +def v4b32_184 : NVPTXReg<"%v4b32_184">;
> +def v4b32_185 : NVPTXReg<"%v4b32_185">;
> +def v4b32_186 : NVPTXReg<"%v4b32_186">;
> +def v4b32_187 : NVPTXReg<"%v4b32_187">;
> +def v4b32_188 : NVPTXReg<"%v4b32_188">;
> +def v4b32_189 : NVPTXReg<"%v4b32_189">;
> +def v4b32_190 : NVPTXReg<"%v4b32_190">;
> +def v4b32_191 : NVPTXReg<"%v4b32_191">;
> +def v4b32_192 : NVPTXReg<"%v4b32_192">;
> +def v4b32_193 : NVPTXReg<"%v4b32_193">;
> +def v4b32_194 : NVPTXReg<"%v4b32_194">;
> +def v4b32_195 : NVPTXReg<"%v4b32_195">;
> +def v4b32_196 : NVPTXReg<"%v4b32_196">;
> +def v4b32_197 : NVPTXReg<"%v4b32_197">;
> +def v4b32_198 : NVPTXReg<"%v4b32_198">;
> +def v4b32_199 : NVPTXReg<"%v4b32_199">;
> +def v4b32_200 : NVPTXReg<"%v4b32_200">;
> +def v4b32_201 : NVPTXReg<"%v4b32_201">;
> +def v4b32_202 : NVPTXReg<"%v4b32_202">;
> +def v4b32_203 : NVPTXReg<"%v4b32_203">;
> +def v4b32_204 : NVPTXReg<"%v4b32_204">;
> +def v4b32_205 : NVPTXReg<"%v4b32_205">;
> +def v4b32_206 : NVPTXReg<"%v4b32_206">;
> +def v4b32_207 : NVPTXReg<"%v4b32_207">;
> +def v4b32_208 : NVPTXReg<"%v4b32_208">;
> +def v4b32_209 : NVPTXReg<"%v4b32_209">;
> +def v4b32_210 : NVPTXReg<"%v4b32_210">;
> +def v4b32_211 : NVPTXReg<"%v4b32_211">;
> +def v4b32_212 : NVPTXReg<"%v4b32_212">;
> +def v4b32_213 : NVPTXReg<"%v4b32_213">;
> +def v4b32_214 : NVPTXReg<"%v4b32_214">;
> +def v4b32_215 : NVPTXReg<"%v4b32_215">;
> +def v4b32_216 : NVPTXReg<"%v4b32_216">;
> +def v4b32_217 : NVPTXReg<"%v4b32_217">;
> +def v4b32_218 : NVPTXReg<"%v4b32_218">;
> +def v4b32_219 : NVPTXReg<"%v4b32_219">;
> +def v4b32_220 : NVPTXReg<"%v4b32_220">;
> +def v4b32_221 : NVPTXReg<"%v4b32_221">;
> +def v4b32_222 : NVPTXReg<"%v4b32_222">;
> +def v4b32_223 : NVPTXReg<"%v4b32_223">;
> +def v4b32_224 : NVPTXReg<"%v4b32_224">;
> +def v4b32_225 : NVPTXReg<"%v4b32_225">;
> +def v4b32_226 : NVPTXReg<"%v4b32_226">;
> +def v4b32_227 : NVPTXReg<"%v4b32_227">;
> +def v4b32_228 : NVPTXReg<"%v4b32_228">;
> +def v4b32_229 : NVPTXReg<"%v4b32_229">;
> +def v4b32_230 : NVPTXReg<"%v4b32_230">;
> +def v4b32_231 : NVPTXReg<"%v4b32_231">;
> +def v4b32_232 : NVPTXReg<"%v4b32_232">;
> +def v4b32_233 : NVPTXReg<"%v4b32_233">;
> +def v4b32_234 : NVPTXReg<"%v4b32_234">;
> +def v4b32_235 : NVPTXReg<"%v4b32_235">;
> +def v4b32_236 : NVPTXReg<"%v4b32_236">;
> +def v4b32_237 : NVPTXReg<"%v4b32_237">;
> +def v4b32_238 : NVPTXReg<"%v4b32_238">;
> +def v4b32_239 : NVPTXReg<"%v4b32_239">;
> +def v4b32_240 : NVPTXReg<"%v4b32_240">;
> +def v4b32_241 : NVPTXReg<"%v4b32_241">;
> +def v4b32_242 : NVPTXReg<"%v4b32_242">;
> +def v4b32_243 : NVPTXReg<"%v4b32_243">;
> +def v4b32_244 : NVPTXReg<"%v4b32_244">;
> +def v4b32_245 : NVPTXReg<"%v4b32_245">;
> +def v4b32_246 : NVPTXReg<"%v4b32_246">;
> +def v4b32_247 : NVPTXReg<"%v4b32_247">;
> +def v4b32_248 : NVPTXReg<"%v4b32_248">;
> +def v4b32_249 : NVPTXReg<"%v4b32_249">;
> +def v4b32_250 : NVPTXReg<"%v4b32_250">;
> +def v4b32_251 : NVPTXReg<"%v4b32_251">;
> +def v4b32_252 : NVPTXReg<"%v4b32_252">;
> +def v4b32_253 : NVPTXReg<"%v4b32_253">;
> +def v4b32_254 : NVPTXReg<"%v4b32_254">;
> +def v4b32_255 : NVPTXReg<"%v4b32_255">;
> +def v4b32_256 : NVPTXReg<"%v4b32_256">;
> +def v4b32_257 : NVPTXReg<"%v4b32_257">;
> +def v4b32_258 : NVPTXReg<"%v4b32_258">;
> +def v4b32_259 : NVPTXReg<"%v4b32_259">;
> +def v4b32_260 : NVPTXReg<"%v4b32_260">;
> +def v4b32_261 : NVPTXReg<"%v4b32_261">;
> +def v4b32_262 : NVPTXReg<"%v4b32_262">;
> +def v4b32_263 : NVPTXReg<"%v4b32_263">;
> +def v4b32_264 : NVPTXReg<"%v4b32_264">;
> +def v4b32_265 : NVPTXReg<"%v4b32_265">;
> +def v4b32_266 : NVPTXReg<"%v4b32_266">;
> +def v4b32_267 : NVPTXReg<"%v4b32_267">;
> +def v4b32_268 : NVPTXReg<"%v4b32_268">;
> +def v4b32_269 : NVPTXReg<"%v4b32_269">;
> +def v4b32_270 : NVPTXReg<"%v4b32_270">;
> +def v4b32_271 : NVPTXReg<"%v4b32_271">;
> +def v4b32_272 : NVPTXReg<"%v4b32_272">;
> +def v4b32_273 : NVPTXReg<"%v4b32_273">;
> +def v4b32_274 : NVPTXReg<"%v4b32_274">;
> +def v4b32_275 : NVPTXReg<"%v4b32_275">;
> +def v4b32_276 : NVPTXReg<"%v4b32_276">;
> +def v4b32_277 : NVPTXReg<"%v4b32_277">;
> +def v4b32_278 : NVPTXReg<"%v4b32_278">;
> +def v4b32_279 : NVPTXReg<"%v4b32_279">;
> +def v4b32_280 : NVPTXReg<"%v4b32_280">;
> +def v4b32_281 : NVPTXReg<"%v4b32_281">;
> +def v4b32_282 : NVPTXReg<"%v4b32_282">;
> +def v4b32_283 : NVPTXReg<"%v4b32_283">;
> +def v4b32_284 : NVPTXReg<"%v4b32_284">;
> +def v4b32_285 : NVPTXReg<"%v4b32_285">;
> +def v4b32_286 : NVPTXReg<"%v4b32_286">;
> +def v4b32_287 : NVPTXReg<"%v4b32_287">;
> +def v4b32_288 : NVPTXReg<"%v4b32_288">;
> +def v4b32_289 : NVPTXReg<"%v4b32_289">;
> +def v4b32_290 : NVPTXReg<"%v4b32_290">;
> +def v4b32_291 : NVPTXReg<"%v4b32_291">;
> +def v4b32_292 : NVPTXReg<"%v4b32_292">;
> +def v4b32_293 : NVPTXReg<"%v4b32_293">;
> +def v4b32_294 : NVPTXReg<"%v4b32_294">;
> +def v4b32_295 : NVPTXReg<"%v4b32_295">;
> +def v4b32_296 : NVPTXReg<"%v4b32_296">;
> +def v4b32_297 : NVPTXReg<"%v4b32_297">;
> +def v4b32_298 : NVPTXReg<"%v4b32_298">;
> +def v4b32_299 : NVPTXReg<"%v4b32_299">;
> +def v4b32_300 : NVPTXReg<"%v4b32_300">;
> +def v4b32_301 : NVPTXReg<"%v4b32_301">;
> +def v4b32_302 : NVPTXReg<"%v4b32_302">;
> +def v4b32_303 : NVPTXReg<"%v4b32_303">;
> +def v4b32_304 : NVPTXReg<"%v4b32_304">;
> +def v4b32_305 : NVPTXReg<"%v4b32_305">;
> +def v4b32_306 : NVPTXReg<"%v4b32_306">;
> +def v4b32_307 : NVPTXReg<"%v4b32_307">;
> +def v4b32_308 : NVPTXReg<"%v4b32_308">;
> +def v4b32_309 : NVPTXReg<"%v4b32_309">;
> +def v4b32_310 : NVPTXReg<"%v4b32_310">;
> +def v4b32_311 : NVPTXReg<"%v4b32_311">;
> +def v4b32_312 : NVPTXReg<"%v4b32_312">;
> +def v4b32_313 : NVPTXReg<"%v4b32_313">;
> +def v4b32_314 : NVPTXReg<"%v4b32_314">;
> +def v4b32_315 : NVPTXReg<"%v4b32_315">;
> +def v4b32_316 : NVPTXReg<"%v4b32_316">;
> +def v4b32_317 : NVPTXReg<"%v4b32_317">;
> +def v4b32_318 : NVPTXReg<"%v4b32_318">;
> +def v4b32_319 : NVPTXReg<"%v4b32_319">;
> +def v4b32_320 : NVPTXReg<"%v4b32_320">;
> +def v4b32_321 : NVPTXReg<"%v4b32_321">;
> +def v4b32_322 : NVPTXReg<"%v4b32_322">;
> +def v4b32_323 : NVPTXReg<"%v4b32_323">;
> +def v4b32_324 : NVPTXReg<"%v4b32_324">;
> +def v4b32_325 : NVPTXReg<"%v4b32_325">;
> +def v4b32_326 : NVPTXReg<"%v4b32_326">;
> +def v4b32_327 : NVPTXReg<"%v4b32_327">;
> +def v4b32_328 : NVPTXReg<"%v4b32_328">;
> +def v4b32_329 : NVPTXReg<"%v4b32_329">;
> +def v4b32_330 : NVPTXReg<"%v4b32_330">;
> +def v4b32_331 : NVPTXReg<"%v4b32_331">;
> +def v4b32_332 : NVPTXReg<"%v4b32_332">;
> +def v4b32_333 : NVPTXReg<"%v4b32_333">;
> +def v4b32_334 : NVPTXReg<"%v4b32_334">;
> +def v4b32_335 : NVPTXReg<"%v4b32_335">;
> +def v4b32_336 : NVPTXReg<"%v4b32_336">;
> +def v4b32_337 : NVPTXReg<"%v4b32_337">;
> +def v4b32_338 : NVPTXReg<"%v4b32_338">;
> +def v4b32_339 : NVPTXReg<"%v4b32_339">;
> +def v4b32_340 : NVPTXReg<"%v4b32_340">;
> +def v4b32_341 : NVPTXReg<"%v4b32_341">;
> +def v4b32_342 : NVPTXReg<"%v4b32_342">;
> +def v4b32_343 : NVPTXReg<"%v4b32_343">;
> +def v4b32_344 : NVPTXReg<"%v4b32_344">;
> +def v4b32_345 : NVPTXReg<"%v4b32_345">;
> +def v4b32_346 : NVPTXReg<"%v4b32_346">;
> +def v4b32_347 : NVPTXReg<"%v4b32_347">;
> +def v4b32_348 : NVPTXReg<"%v4b32_348">;
> +def v4b32_349 : NVPTXReg<"%v4b32_349">;
> +def v4b32_350 : NVPTXReg<"%v4b32_350">;
> +def v4b32_351 : NVPTXReg<"%v4b32_351">;
> +def v4b32_352 : NVPTXReg<"%v4b32_352">;
> +def v4b32_353 : NVPTXReg<"%v4b32_353">;
> +def v4b32_354 : NVPTXReg<"%v4b32_354">;
> +def v4b32_355 : NVPTXReg<"%v4b32_355">;
> +def v4b32_356 : NVPTXReg<"%v4b32_356">;
> +def v4b32_357 : NVPTXReg<"%v4b32_357">;
> +def v4b32_358 : NVPTXReg<"%v4b32_358">;
> +def v4b32_359 : NVPTXReg<"%v4b32_359">;
> +def v4b32_360 : NVPTXReg<"%v4b32_360">;
> +def v4b32_361 : NVPTXReg<"%v4b32_361">;
> +def v4b32_362 : NVPTXReg<"%v4b32_362">;
> +def v4b32_363 : NVPTXReg<"%v4b32_363">;
> +def v4b32_364 : NVPTXReg<"%v4b32_364">;
> +def v4b32_365 : NVPTXReg<"%v4b32_365">;
> +def v4b32_366 : NVPTXReg<"%v4b32_366">;
> +def v4b32_367 : NVPTXReg<"%v4b32_367">;
> +def v4b32_368 : NVPTXReg<"%v4b32_368">;
> +def v4b32_369 : NVPTXReg<"%v4b32_369">;
> +def v4b32_370 : NVPTXReg<"%v4b32_370">;
> +def v4b32_371 : NVPTXReg<"%v4b32_371">;
> +def v4b32_372 : NVPTXReg<"%v4b32_372">;
> +def v4b32_373 : NVPTXReg<"%v4b32_373">;
> +def v4b32_374 : NVPTXReg<"%v4b32_374">;
> +def v4b32_375 : NVPTXReg<"%v4b32_375">;
> +def v4b32_376 : NVPTXReg<"%v4b32_376">;
> +def v4b32_377 : NVPTXReg<"%v4b32_377">;
> +def v4b32_378 : NVPTXReg<"%v4b32_378">;
> +def v4b32_379 : NVPTXReg<"%v4b32_379">;
> +def v4b32_380 : NVPTXReg<"%v4b32_380">;
> +def v4b32_381 : NVPTXReg<"%v4b32_381">;
> +def v4b32_382 : NVPTXReg<"%v4b32_382">;
> +def v4b32_383 : NVPTXReg<"%v4b32_383">;
> +def v4b32_384 : NVPTXReg<"%v4b32_384">;
> +def v4b32_385 : NVPTXReg<"%v4b32_385">;
> +def v4b32_386 : NVPTXReg<"%v4b32_386">;
> +def v4b32_387 : NVPTXReg<"%v4b32_387">;
> +def v4b32_388 : NVPTXReg<"%v4b32_388">;
> +def v4b32_389 : NVPTXReg<"%v4b32_389">;
> +def v4b32_390 : NVPTXReg<"%v4b32_390">;
> +def v4b32_391 : NVPTXReg<"%v4b32_391">;
> +def v4b32_392 : NVPTXReg<"%v4b32_392">;
> +def v4b32_393 : NVPTXReg<"%v4b32_393">;
> +def v4b32_394 : NVPTXReg<"%v4b32_394">;
> +def v4b32_395 : NVPTXReg<"%v4b32_395">;
> +
> +//===--- Arguments --------------------------------------------------------===//
> +def ia0 : NVPTXReg<"%ia0">;
> +def ia1 : NVPTXReg<"%ia1">;
> +def ia2 : NVPTXReg<"%ia2">;
> +def ia3 : NVPTXReg<"%ia3">;
> +def ia4 : NVPTXReg<"%ia4">;
> +def ia5 : NVPTXReg<"%ia5">;
> +def ia6 : NVPTXReg<"%ia6">;
> +def ia7 : NVPTXReg<"%ia7">;
> +def ia8 : NVPTXReg<"%ia8">;
> +def ia9 : NVPTXReg<"%ia9">;
> +def ia10 : NVPTXReg<"%ia10">;
> +def ia11 : NVPTXReg<"%ia11">;
> +def ia12 : NVPTXReg<"%ia12">;
> +def ia13 : NVPTXReg<"%ia13">;
> +def ia14 : NVPTXReg<"%ia14">;
> +def ia15 : NVPTXReg<"%ia15">;
> +def ia16 : NVPTXReg<"%ia16">;
> +def ia17 : NVPTXReg<"%ia17">;
> +def ia18 : NVPTXReg<"%ia18">;
> +def ia19 : NVPTXReg<"%ia19">;
> +def ia20 : NVPTXReg<"%ia20">;
> +def ia21 : NVPTXReg<"%ia21">;
> +def ia22 : NVPTXReg<"%ia22">;
> +def ia23 : NVPTXReg<"%ia23">;
> +def ia24 : NVPTXReg<"%ia24">;
> +def ia25 : NVPTXReg<"%ia25">;
> +def ia26 : NVPTXReg<"%ia26">;
> +def ia27 : NVPTXReg<"%ia27">;
> +def ia28 : NVPTXReg<"%ia28">;
> +def ia29 : NVPTXReg<"%ia29">;
> +def ia30 : NVPTXReg<"%ia30">;
> +def ia31 : NVPTXReg<"%ia31">;
> +def ia32 : NVPTXReg<"%ia32">;
> +def ia33 : NVPTXReg<"%ia33">;
> +def ia34 : NVPTXReg<"%ia34">;
> +def ia35 : NVPTXReg<"%ia35">;
> +def ia36 : NVPTXReg<"%ia36">;
> +def ia37 : NVPTXReg<"%ia37">;
> +def ia38 : NVPTXReg<"%ia38">;
> +def ia39 : NVPTXReg<"%ia39">;
> +def ia40 : NVPTXReg<"%ia40">;
> +def ia41 : NVPTXReg<"%ia41">;
> +def ia42 : NVPTXReg<"%ia42">;
> +def ia43 : NVPTXReg<"%ia43">;
> +def ia44 : NVPTXReg<"%ia44">;
> +def ia45 : NVPTXReg<"%ia45">;
> +def ia46 : NVPTXReg<"%ia46">;
> +def ia47 : NVPTXReg<"%ia47">;
> +def ia48 : NVPTXReg<"%ia48">;
> +def ia49 : NVPTXReg<"%ia49">;
> +def ia50 : NVPTXReg<"%ia50">;
> +def ia51 : NVPTXReg<"%ia51">;
> +def ia52 : NVPTXReg<"%ia52">;
> +def ia53 : NVPTXReg<"%ia53">;
> +def ia54 : NVPTXReg<"%ia54">;
> +def ia55 : NVPTXReg<"%ia55">;
> +def ia56 : NVPTXReg<"%ia56">;
> +def ia57 : NVPTXReg<"%ia57">;
> +def ia58 : NVPTXReg<"%ia58">;
> +def ia59 : NVPTXReg<"%ia59">;
> +def ia60 : NVPTXReg<"%ia60">;
> +def ia61 : NVPTXReg<"%ia61">;
> +def ia62 : NVPTXReg<"%ia62">;
> +def ia63 : NVPTXReg<"%ia63">;
> +def ia64 : NVPTXReg<"%ia64">;
> +def ia65 : NVPTXReg<"%ia65">;
> +def ia66 : NVPTXReg<"%ia66">;
> +def ia67 : NVPTXReg<"%ia67">;
> +def ia68 : NVPTXReg<"%ia68">;
> +def ia69 : NVPTXReg<"%ia69">;
> +def ia70 : NVPTXReg<"%ia70">;
> +def ia71 : NVPTXReg<"%ia71">;
> +def ia72 : NVPTXReg<"%ia72">;
> +def ia73 : NVPTXReg<"%ia73">;
> +def ia74 : NVPTXReg<"%ia74">;
> +def ia75 : NVPTXReg<"%ia75">;
> +def ia76 : NVPTXReg<"%ia76">;
> +def ia77 : NVPTXReg<"%ia77">;
> +def ia78 : NVPTXReg<"%ia78">;
> +def ia79 : NVPTXReg<"%ia79">;
> +def ia80 : NVPTXReg<"%ia80">;
> +def ia81 : NVPTXReg<"%ia81">;
> +def ia82 : NVPTXReg<"%ia82">;
> +def ia83 : NVPTXReg<"%ia83">;
> +def ia84 : NVPTXReg<"%ia84">;
> +def ia85 : NVPTXReg<"%ia85">;
> +def ia86 : NVPTXReg<"%ia86">;
> +def ia87 : NVPTXReg<"%ia87">;
> +def ia88 : NVPTXReg<"%ia88">;
> +def ia89 : NVPTXReg<"%ia89">;
> +def ia90 : NVPTXReg<"%ia90">;
> +def ia91 : NVPTXReg<"%ia91">;
> +def ia92 : NVPTXReg<"%ia92">;
> +def ia93 : NVPTXReg<"%ia93">;
> +def ia94 : NVPTXReg<"%ia94">;
> +def ia95 : NVPTXReg<"%ia95">;
> +def ia96 : NVPTXReg<"%ia96">;
> +def ia97 : NVPTXReg<"%ia97">;
> +def ia98 : NVPTXReg<"%ia98">;
> +def ia99 : NVPTXReg<"%ia99">;
> +def ia100 : NVPTXReg<"%ia100">;
> +def ia101 : NVPTXReg<"%ia101">;
> +def ia102 : NVPTXReg<"%ia102">;
> +def ia103 : NVPTXReg<"%ia103">;
> +def ia104 : NVPTXReg<"%ia104">;
> +def ia105 : NVPTXReg<"%ia105">;
> +def ia106 : NVPTXReg<"%ia106">;
> +def ia107 : NVPTXReg<"%ia107">;
> +def ia108 : NVPTXReg<"%ia108">;
> +def ia109 : NVPTXReg<"%ia109">;
> +def ia110 : NVPTXReg<"%ia110">;
> +def ia111 : NVPTXReg<"%ia111">;
> +def ia112 : NVPTXReg<"%ia112">;
> +def ia113 : NVPTXReg<"%ia113">;
> +def ia114 : NVPTXReg<"%ia114">;
> +def ia115 : NVPTXReg<"%ia115">;
> +def ia116 : NVPTXReg<"%ia116">;
> +def ia117 : NVPTXReg<"%ia117">;
> +def ia118 : NVPTXReg<"%ia118">;
> +def ia119 : NVPTXReg<"%ia119">;
> +def ia120 : NVPTXReg<"%ia120">;
> +def ia121 : NVPTXReg<"%ia121">;
> +def ia122 : NVPTXReg<"%ia122">;
> +def ia123 : NVPTXReg<"%ia123">;
> +def ia124 : NVPTXReg<"%ia124">;
> +def ia125 : NVPTXReg<"%ia125">;
> +def ia126 : NVPTXReg<"%ia126">;
> +def ia127 : NVPTXReg<"%ia127">;
> +def ia128 : NVPTXReg<"%ia128">;
> +def ia129 : NVPTXReg<"%ia129">;
> +def ia130 : NVPTXReg<"%ia130">;
> +def ia131 : NVPTXReg<"%ia131">;
> +def ia132 : NVPTXReg<"%ia132">;
> +def ia133 : NVPTXReg<"%ia133">;
> +def ia134 : NVPTXReg<"%ia134">;
> +def ia135 : NVPTXReg<"%ia135">;
> +def ia136 : NVPTXReg<"%ia136">;
> +def ia137 : NVPTXReg<"%ia137">;
> +def ia138 : NVPTXReg<"%ia138">;
> +def ia139 : NVPTXReg<"%ia139">;
> +def ia140 : NVPTXReg<"%ia140">;
> +def ia141 : NVPTXReg<"%ia141">;
> +def ia142 : NVPTXReg<"%ia142">;
> +def ia143 : NVPTXReg<"%ia143">;
> +def ia144 : NVPTXReg<"%ia144">;
> +def ia145 : NVPTXReg<"%ia145">;
> +def ia146 : NVPTXReg<"%ia146">;
> +def ia147 : NVPTXReg<"%ia147">;
> +def ia148 : NVPTXReg<"%ia148">;
> +def ia149 : NVPTXReg<"%ia149">;
> +def ia150 : NVPTXReg<"%ia150">;
> +def ia151 : NVPTXReg<"%ia151">;
> +def ia152 : NVPTXReg<"%ia152">;
> +def ia153 : NVPTXReg<"%ia153">;
> +def ia154 : NVPTXReg<"%ia154">;
> +def ia155 : NVPTXReg<"%ia155">;
> +def ia156 : NVPTXReg<"%ia156">;
> +def ia157 : NVPTXReg<"%ia157">;
> +def ia158 : NVPTXReg<"%ia158">;
> +def ia159 : NVPTXReg<"%ia159">;
> +def ia160 : NVPTXReg<"%ia160">;
> +def ia161 : NVPTXReg<"%ia161">;
> +def ia162 : NVPTXReg<"%ia162">;
> +def ia163 : NVPTXReg<"%ia163">;
> +def ia164 : NVPTXReg<"%ia164">;
> +def ia165 : NVPTXReg<"%ia165">;
> +def ia166 : NVPTXReg<"%ia166">;
> +def ia167 : NVPTXReg<"%ia167">;
> +def ia168 : NVPTXReg<"%ia168">;
> +def ia169 : NVPTXReg<"%ia169">;
> +def ia170 : NVPTXReg<"%ia170">;
> +def ia171 : NVPTXReg<"%ia171">;
> +def ia172 : NVPTXReg<"%ia172">;
> +def ia173 : NVPTXReg<"%ia173">;
> +def ia174 : NVPTXReg<"%ia174">;
> +def ia175 : NVPTXReg<"%ia175">;
> +def ia176 : NVPTXReg<"%ia176">;
> +def ia177 : NVPTXReg<"%ia177">;
> +def ia178 : NVPTXReg<"%ia178">;
> +def ia179 : NVPTXReg<"%ia179">;
> +def ia180 : NVPTXReg<"%ia180">;
> +def ia181 : NVPTXReg<"%ia181">;
> +def ia182 : NVPTXReg<"%ia182">;
> +def ia183 : NVPTXReg<"%ia183">;
> +def ia184 : NVPTXReg<"%ia184">;
> +def ia185 : NVPTXReg<"%ia185">;
> +def ia186 : NVPTXReg<"%ia186">;
> +def ia187 : NVPTXReg<"%ia187">;
> +def ia188 : NVPTXReg<"%ia188">;
> +def ia189 : NVPTXReg<"%ia189">;
> +def ia190 : NVPTXReg<"%ia190">;
> +def ia191 : NVPTXReg<"%ia191">;
> +def ia192 : NVPTXReg<"%ia192">;
> +def ia193 : NVPTXReg<"%ia193">;
> +def ia194 : NVPTXReg<"%ia194">;
> +def ia195 : NVPTXReg<"%ia195">;
> +def ia196 : NVPTXReg<"%ia196">;
> +def ia197 : NVPTXReg<"%ia197">;
> +def ia198 : NVPTXReg<"%ia198">;
> +def ia199 : NVPTXReg<"%ia199">;
> +def ia200 : NVPTXReg<"%ia200">;
> +def ia201 : NVPTXReg<"%ia201">;
> +def ia202 : NVPTXReg<"%ia202">;
> +def ia203 : NVPTXReg<"%ia203">;
> +def ia204 : NVPTXReg<"%ia204">;
> +def ia205 : NVPTXReg<"%ia205">;
> +def ia206 : NVPTXReg<"%ia206">;
> +def ia207 : NVPTXReg<"%ia207">;
> +def ia208 : NVPTXReg<"%ia208">;
> +def ia209 : NVPTXReg<"%ia209">;
> +def ia210 : NVPTXReg<"%ia210">;
> +def ia211 : NVPTXReg<"%ia211">;
> +def ia212 : NVPTXReg<"%ia212">;
> +def ia213 : NVPTXReg<"%ia213">;
> +def ia214 : NVPTXReg<"%ia214">;
> +def ia215 : NVPTXReg<"%ia215">;
> +def ia216 : NVPTXReg<"%ia216">;
> +def ia217 : NVPTXReg<"%ia217">;
> +def ia218 : NVPTXReg<"%ia218">;
> +def ia219 : NVPTXReg<"%ia219">;
> +def ia220 : NVPTXReg<"%ia220">;
> +def ia221 : NVPTXReg<"%ia221">;
> +def ia222 : NVPTXReg<"%ia222">;
> +def ia223 : NVPTXReg<"%ia223">;
> +def ia224 : NVPTXReg<"%ia224">;
> +def ia225 : NVPTXReg<"%ia225">;
> +def ia226 : NVPTXReg<"%ia226">;
> +def ia227 : NVPTXReg<"%ia227">;
> +def ia228 : NVPTXReg<"%ia228">;
> +def ia229 : NVPTXReg<"%ia229">;
> +def ia230 : NVPTXReg<"%ia230">;
> +def ia231 : NVPTXReg<"%ia231">;
> +def ia232 : NVPTXReg<"%ia232">;
> +def ia233 : NVPTXReg<"%ia233">;
> +def ia234 : NVPTXReg<"%ia234">;
> +def ia235 : NVPTXReg<"%ia235">;
> +def ia236 : NVPTXReg<"%ia236">;
> +def ia237 : NVPTXReg<"%ia237">;
> +def ia238 : NVPTXReg<"%ia238">;
> +def ia239 : NVPTXReg<"%ia239">;
> +def ia240 : NVPTXReg<"%ia240">;
> +def ia241 : NVPTXReg<"%ia241">;
> +def ia242 : NVPTXReg<"%ia242">;
> +def ia243 : NVPTXReg<"%ia243">;
> +def ia244 : NVPTXReg<"%ia244">;
> +def ia245 : NVPTXReg<"%ia245">;
> +def ia246 : NVPTXReg<"%ia246">;
> +def ia247 : NVPTXReg<"%ia247">;
> +def ia248 : NVPTXReg<"%ia248">;
> +def ia249 : NVPTXReg<"%ia249">;
> +def ia250 : NVPTXReg<"%ia250">;
> +def ia251 : NVPTXReg<"%ia251">;
> +def ia252 : NVPTXReg<"%ia252">;
> +def ia253 : NVPTXReg<"%ia253">;
> +def ia254 : NVPTXReg<"%ia254">;
> +def ia255 : NVPTXReg<"%ia255">;
> +def ia256 : NVPTXReg<"%ia256">;
> +def ia257 : NVPTXReg<"%ia257">;
> +def ia258 : NVPTXReg<"%ia258">;
> +def ia259 : NVPTXReg<"%ia259">;
> +def ia260 : NVPTXReg<"%ia260">;
> +def ia261 : NVPTXReg<"%ia261">;
> +def ia262 : NVPTXReg<"%ia262">;
> +def ia263 : NVPTXReg<"%ia263">;
> +def ia264 : NVPTXReg<"%ia264">;
> +def ia265 : NVPTXReg<"%ia265">;
> +def ia266 : NVPTXReg<"%ia266">;
> +def ia267 : NVPTXReg<"%ia267">;
> +def ia268 : NVPTXReg<"%ia268">;
> +def ia269 : NVPTXReg<"%ia269">;
> +def ia270 : NVPTXReg<"%ia270">;
> +def ia271 : NVPTXReg<"%ia271">;
> +def ia272 : NVPTXReg<"%ia272">;
> +def ia273 : NVPTXReg<"%ia273">;
> +def ia274 : NVPTXReg<"%ia274">;
> +def ia275 : NVPTXReg<"%ia275">;
> +def ia276 : NVPTXReg<"%ia276">;
> +def ia277 : NVPTXReg<"%ia277">;
> +def ia278 : NVPTXReg<"%ia278">;
> +def ia279 : NVPTXReg<"%ia279">;
> +def ia280 : NVPTXReg<"%ia280">;
> +def ia281 : NVPTXReg<"%ia281">;
> +def ia282 : NVPTXReg<"%ia282">;
> +def ia283 : NVPTXReg<"%ia283">;
> +def ia284 : NVPTXReg<"%ia284">;
> +def ia285 : NVPTXReg<"%ia285">;
> +def ia286 : NVPTXReg<"%ia286">;
> +def ia287 : NVPTXReg<"%ia287">;
> +def ia288 : NVPTXReg<"%ia288">;
> +def ia289 : NVPTXReg<"%ia289">;
> +def ia290 : NVPTXReg<"%ia290">;
> +def ia291 : NVPTXReg<"%ia291">;
> +def ia292 : NVPTXReg<"%ia292">;
> +def ia293 : NVPTXReg<"%ia293">;
> +def ia294 : NVPTXReg<"%ia294">;
> +def ia295 : NVPTXReg<"%ia295">;
> +def ia296 : NVPTXReg<"%ia296">;
> +def ia297 : NVPTXReg<"%ia297">;
> +def ia298 : NVPTXReg<"%ia298">;
> +def ia299 : NVPTXReg<"%ia299">;
> +def ia300 : NVPTXReg<"%ia300">;
> +def ia301 : NVPTXReg<"%ia301">;
> +def ia302 : NVPTXReg<"%ia302">;
> +def ia303 : NVPTXReg<"%ia303">;
> +def ia304 : NVPTXReg<"%ia304">;
> +def ia305 : NVPTXReg<"%ia305">;
> +def ia306 : NVPTXReg<"%ia306">;
> +def ia307 : NVPTXReg<"%ia307">;
> +def ia308 : NVPTXReg<"%ia308">;
> +def ia309 : NVPTXReg<"%ia309">;
> +def ia310 : NVPTXReg<"%ia310">;
> +def ia311 : NVPTXReg<"%ia311">;
> +def ia312 : NVPTXReg<"%ia312">;
> +def ia313 : NVPTXReg<"%ia313">;
> +def ia314 : NVPTXReg<"%ia314">;
> +def ia315 : NVPTXReg<"%ia315">;
> +def ia316 : NVPTXReg<"%ia316">;
> +def ia317 : NVPTXReg<"%ia317">;
> +def ia318 : NVPTXReg<"%ia318">;
> +def ia319 : NVPTXReg<"%ia319">;
> +def ia320 : NVPTXReg<"%ia320">;
> +def ia321 : NVPTXReg<"%ia321">;
> +def ia322 : NVPTXReg<"%ia322">;
> +def ia323 : NVPTXReg<"%ia323">;
> +def ia324 : NVPTXReg<"%ia324">;
> +def ia325 : NVPTXReg<"%ia325">;
> +def ia326 : NVPTXReg<"%ia326">;
> +def ia327 : NVPTXReg<"%ia327">;
> +def ia328 : NVPTXReg<"%ia328">;
> +def ia329 : NVPTXReg<"%ia329">;
> +def ia330 : NVPTXReg<"%ia330">;
> +def ia331 : NVPTXReg<"%ia331">;
> +def ia332 : NVPTXReg<"%ia332">;
> +def ia333 : NVPTXReg<"%ia333">;
> +def ia334 : NVPTXReg<"%ia334">;
> +def ia335 : NVPTXReg<"%ia335">;
> +def ia336 : NVPTXReg<"%ia336">;
> +def ia337 : NVPTXReg<"%ia337">;
> +def ia338 : NVPTXReg<"%ia338">;
> +def ia339 : NVPTXReg<"%ia339">;
> +def ia340 : NVPTXReg<"%ia340">;
> +def ia341 : NVPTXReg<"%ia341">;
> +def ia342 : NVPTXReg<"%ia342">;
> +def ia343 : NVPTXReg<"%ia343">;
> +def ia344 : NVPTXReg<"%ia344">;
> +def ia345 : NVPTXReg<"%ia345">;
> +def ia346 : NVPTXReg<"%ia346">;
> +def ia347 : NVPTXReg<"%ia347">;
> +def ia348 : NVPTXReg<"%ia348">;
> +def ia349 : NVPTXReg<"%ia349">;
> +def ia350 : NVPTXReg<"%ia350">;
> +def ia351 : NVPTXReg<"%ia351">;
> +def ia352 : NVPTXReg<"%ia352">;
> +def ia353 : NVPTXReg<"%ia353">;
> +def ia354 : NVPTXReg<"%ia354">;
> +def ia355 : NVPTXReg<"%ia355">;
> +def ia356 : NVPTXReg<"%ia356">;
> +def ia357 : NVPTXReg<"%ia357">;
> +def ia358 : NVPTXReg<"%ia358">;
> +def ia359 : NVPTXReg<"%ia359">;
> +def ia360 : NVPTXReg<"%ia360">;
> +def ia361 : NVPTXReg<"%ia361">;
> +def ia362 : NVPTXReg<"%ia362">;
> +def ia363 : NVPTXReg<"%ia363">;
> +def ia364 : NVPTXReg<"%ia364">;
> +def ia365 : NVPTXReg<"%ia365">;
> +def ia366 : NVPTXReg<"%ia366">;
> +def ia367 : NVPTXReg<"%ia367">;
> +def ia368 : NVPTXReg<"%ia368">;
> +def ia369 : NVPTXReg<"%ia369">;
> +def ia370 : NVPTXReg<"%ia370">;
> +def ia371 : NVPTXReg<"%ia371">;
> +def ia372 : NVPTXReg<"%ia372">;
> +def ia373 : NVPTXReg<"%ia373">;
> +def ia374 : NVPTXReg<"%ia374">;
> +def ia375 : NVPTXReg<"%ia375">;
> +def ia376 : NVPTXReg<"%ia376">;
> +def ia377 : NVPTXReg<"%ia377">;
> +def ia378 : NVPTXReg<"%ia378">;
> +def ia379 : NVPTXReg<"%ia379">;
> +def ia380 : NVPTXReg<"%ia380">;
> +def ia381 : NVPTXReg<"%ia381">;
> +def ia382 : NVPTXReg<"%ia382">;
> +def ia383 : NVPTXReg<"%ia383">;
> +def ia384 : NVPTXReg<"%ia384">;
> +def ia385 : NVPTXReg<"%ia385">;
> +def ia386 : NVPTXReg<"%ia386">;
> +def ia387 : NVPTXReg<"%ia387">;
> +def ia388 : NVPTXReg<"%ia388">;
> +def ia389 : NVPTXReg<"%ia389">;
> +def ia390 : NVPTXReg<"%ia390">;
> +def ia391 : NVPTXReg<"%ia391">;
> +def ia392 : NVPTXReg<"%ia392">;
> +def ia393 : NVPTXReg<"%ia393">;
> +def ia394 : NVPTXReg<"%ia394">;
> +def ia395 : NVPTXReg<"%ia395">;
> +def la0 : NVPTXReg<"%la0">;
> +def la1 : NVPTXReg<"%la1">;
> +def la2 : NVPTXReg<"%la2">;
> +def la3 : NVPTXReg<"%la3">;
> +def la4 : NVPTXReg<"%la4">;
> +def la5 : NVPTXReg<"%la5">;
> +def la6 : NVPTXReg<"%la6">;
> +def la7 : NVPTXReg<"%la7">;
> +def la8 : NVPTXReg<"%la8">;
> +def la9 : NVPTXReg<"%la9">;
> +def la10 : NVPTXReg<"%la10">;
> +def la11 : NVPTXReg<"%la11">;
> +def la12 : NVPTXReg<"%la12">;
> +def la13 : NVPTXReg<"%la13">;
> +def la14 : NVPTXReg<"%la14">;
> +def la15 : NVPTXReg<"%la15">;
> +def la16 : NVPTXReg<"%la16">;
> +def la17 : NVPTXReg<"%la17">;
> +def la18 : NVPTXReg<"%la18">;
> +def la19 : NVPTXReg<"%la19">;
> +def la20 : NVPTXReg<"%la20">;
> +def la21 : NVPTXReg<"%la21">;
> +def la22 : NVPTXReg<"%la22">;
> +def la23 : NVPTXReg<"%la23">;
> +def la24 : NVPTXReg<"%la24">;
> +def la25 : NVPTXReg<"%la25">;
> +def la26 : NVPTXReg<"%la26">;
> +def la27 : NVPTXReg<"%la27">;
> +def la28 : NVPTXReg<"%la28">;
> +def la29 : NVPTXReg<"%la29">;
> +def la30 : NVPTXReg<"%la30">;
> +def la31 : NVPTXReg<"%la31">;
> +def la32 : NVPTXReg<"%la32">;
> +def la33 : NVPTXReg<"%la33">;
> +def la34 : NVPTXReg<"%la34">;
> +def la35 : NVPTXReg<"%la35">;
> +def la36 : NVPTXReg<"%la36">;
> +def la37 : NVPTXReg<"%la37">;
> +def la38 : NVPTXReg<"%la38">;
> +def la39 : NVPTXReg<"%la39">;
> +def la40 : NVPTXReg<"%la40">;
> +def la41 : NVPTXReg<"%la41">;
> +def la42 : NVPTXReg<"%la42">;
> +def la43 : NVPTXReg<"%la43">;
> +def la44 : NVPTXReg<"%la44">;
> +def la45 : NVPTXReg<"%la45">;
> +def la46 : NVPTXReg<"%la46">;
> +def la47 : NVPTXReg<"%la47">;
> +def la48 : NVPTXReg<"%la48">;
> +def la49 : NVPTXReg<"%la49">;
> +def la50 : NVPTXReg<"%la50">;
> +def la51 : NVPTXReg<"%la51">;
> +def la52 : NVPTXReg<"%la52">;
> +def la53 : NVPTXReg<"%la53">;
> +def la54 : NVPTXReg<"%la54">;
> +def la55 : NVPTXReg<"%la55">;
> +def la56 : NVPTXReg<"%la56">;
> +def la57 : NVPTXReg<"%la57">;
> +def la58 : NVPTXReg<"%la58">;
> +def la59 : NVPTXReg<"%la59">;
> +def la60 : NVPTXReg<"%la60">;
> +def la61 : NVPTXReg<"%la61">;
> +def la62 : NVPTXReg<"%la62">;
> +def la63 : NVPTXReg<"%la63">;
> +def la64 : NVPTXReg<"%la64">;
> +def la65 : NVPTXReg<"%la65">;
> +def la66 : NVPTXReg<"%la66">;
> +def la67 : NVPTXReg<"%la67">;
> +def la68 : NVPTXReg<"%la68">;
> +def la69 : NVPTXReg<"%la69">;
> +def la70 : NVPTXReg<"%la70">;
> +def la71 : NVPTXReg<"%la71">;
> +def la72 : NVPTXReg<"%la72">;
> +def la73 : NVPTXReg<"%la73">;
> +def la74 : NVPTXReg<"%la74">;
> +def la75 : NVPTXReg<"%la75">;
> +def la76 : NVPTXReg<"%la76">;
> +def la77 : NVPTXReg<"%la77">;
> +def la78 : NVPTXReg<"%la78">;
> +def la79 : NVPTXReg<"%la79">;
> +def la80 : NVPTXReg<"%la80">;
> +def la81 : NVPTXReg<"%la81">;
> +def la82 : NVPTXReg<"%la82">;
> +def la83 : NVPTXReg<"%la83">;
> +def la84 : NVPTXReg<"%la84">;
> +def la85 : NVPTXReg<"%la85">;
> +def la86 : NVPTXReg<"%la86">;
> +def la87 : NVPTXReg<"%la87">;
> +def la88 : NVPTXReg<"%la88">;
> +def la89 : NVPTXReg<"%la89">;
> +def la90 : NVPTXReg<"%la90">;
> +def la91 : NVPTXReg<"%la91">;
> +def la92 : NVPTXReg<"%la92">;
> +def la93 : NVPTXReg<"%la93">;
> +def la94 : NVPTXReg<"%la94">;
> +def la95 : NVPTXReg<"%la95">;
> +def la96 : NVPTXReg<"%la96">;
> +def la97 : NVPTXReg<"%la97">;
> +def la98 : NVPTXReg<"%la98">;
> +def la99 : NVPTXReg<"%la99">;
> +def la100 : NVPTXReg<"%la100">;
> +def la101 : NVPTXReg<"%la101">;
> +def la102 : NVPTXReg<"%la102">;
> +def la103 : NVPTXReg<"%la103">;
> +def la104 : NVPTXReg<"%la104">;
> +def la105 : NVPTXReg<"%la105">;
> +def la106 : NVPTXReg<"%la106">;
> +def la107 : NVPTXReg<"%la107">;
> +def la108 : NVPTXReg<"%la108">;
> +def la109 : NVPTXReg<"%la109">;
> +def la110 : NVPTXReg<"%la110">;
> +def la111 : NVPTXReg<"%la111">;
> +def la112 : NVPTXReg<"%la112">;
> +def la113 : NVPTXReg<"%la113">;
> +def la114 : NVPTXReg<"%la114">;
> +def la115 : NVPTXReg<"%la115">;
> +def la116 : NVPTXReg<"%la116">;
> +def la117 : NVPTXReg<"%la117">;
> +def la118 : NVPTXReg<"%la118">;
> +def la119 : NVPTXReg<"%la119">;
> +def la120 : NVPTXReg<"%la120">;
> +def la121 : NVPTXReg<"%la121">;
> +def la122 : NVPTXReg<"%la122">;
> +def la123 : NVPTXReg<"%la123">;
> +def la124 : NVPTXReg<"%la124">;
> +def la125 : NVPTXReg<"%la125">;
> +def la126 : NVPTXReg<"%la126">;
> +def la127 : NVPTXReg<"%la127">;
> +def la128 : NVPTXReg<"%la128">;
> +def la129 : NVPTXReg<"%la129">;
> +def la130 : NVPTXReg<"%la130">;
> +def la131 : NVPTXReg<"%la131">;
> +def la132 : NVPTXReg<"%la132">;
> +def la133 : NVPTXReg<"%la133">;
> +def la134 : NVPTXReg<"%la134">;
> +def la135 : NVPTXReg<"%la135">;
> +def la136 : NVPTXReg<"%la136">;
> +def la137 : NVPTXReg<"%la137">;
> +def la138 : NVPTXReg<"%la138">;
> +def la139 : NVPTXReg<"%la139">;
> +def la140 : NVPTXReg<"%la140">;
> +def la141 : NVPTXReg<"%la141">;
> +def la142 : NVPTXReg<"%la142">;
> +def la143 : NVPTXReg<"%la143">;
> +def la144 : NVPTXReg<"%la144">;
> +def la145 : NVPTXReg<"%la145">;
> +def la146 : NVPTXReg<"%la146">;
> +def la147 : NVPTXReg<"%la147">;
> +def la148 : NVPTXReg<"%la148">;
> +def la149 : NVPTXReg<"%la149">;
> +def la150 : NVPTXReg<"%la150">;
> +def la151 : NVPTXReg<"%la151">;
> +def la152 : NVPTXReg<"%la152">;
> +def la153 : NVPTXReg<"%la153">;
> +def la154 : NVPTXReg<"%la154">;
> +def la155 : NVPTXReg<"%la155">;
> +def la156 : NVPTXReg<"%la156">;
> +def la157 : NVPTXReg<"%la157">;
> +def la158 : NVPTXReg<"%la158">;
> +def la159 : NVPTXReg<"%la159">;
> +def la160 : NVPTXReg<"%la160">;
> +def la161 : NVPTXReg<"%la161">;
> +def la162 : NVPTXReg<"%la162">;
> +def la163 : NVPTXReg<"%la163">;
> +def la164 : NVPTXReg<"%la164">;
> +def la165 : NVPTXReg<"%la165">;
> +def la166 : NVPTXReg<"%la166">;
> +def la167 : NVPTXReg<"%la167">;
> +def la168 : NVPTXReg<"%la168">;
> +def la169 : NVPTXReg<"%la169">;
> +def la170 : NVPTXReg<"%la170">;
> +def la171 : NVPTXReg<"%la171">;
> +def la172 : NVPTXReg<"%la172">;
> +def la173 : NVPTXReg<"%la173">;
> +def la174 : NVPTXReg<"%la174">;
> +def la175 : NVPTXReg<"%la175">;
> +def la176 : NVPTXReg<"%la176">;
> +def la177 : NVPTXReg<"%la177">;
> +def la178 : NVPTXReg<"%la178">;
> +def la179 : NVPTXReg<"%la179">;
> +def la180 : NVPTXReg<"%la180">;
> +def la181 : NVPTXReg<"%la181">;
> +def la182 : NVPTXReg<"%la182">;
> +def la183 : NVPTXReg<"%la183">;
> +def la184 : NVPTXReg<"%la184">;
> +def la185 : NVPTXReg<"%la185">;
> +def la186 : NVPTXReg<"%la186">;
> +def la187 : NVPTXReg<"%la187">;
> +def la188 : NVPTXReg<"%la188">;
> +def la189 : NVPTXReg<"%la189">;
> +def la190 : NVPTXReg<"%la190">;
> +def la191 : NVPTXReg<"%la191">;
> +def la192 : NVPTXReg<"%la192">;
> +def la193 : NVPTXReg<"%la193">;
> +def la194 : NVPTXReg<"%la194">;
> +def la195 : NVPTXReg<"%la195">;
> +def la196 : NVPTXReg<"%la196">;
> +def la197 : NVPTXReg<"%la197">;
> +def la198 : NVPTXReg<"%la198">;
> +def la199 : NVPTXReg<"%la199">;
> +def la200 : NVPTXReg<"%la200">;
> +def la201 : NVPTXReg<"%la201">;
> +def la202 : NVPTXReg<"%la202">;
> +def la203 : NVPTXReg<"%la203">;
> +def la204 : NVPTXReg<"%la204">;
> +def la205 : NVPTXReg<"%la205">;
> +def la206 : NVPTXReg<"%la206">;
> +def la207 : NVPTXReg<"%la207">;
> +def la208 : NVPTXReg<"%la208">;
> +def la209 : NVPTXReg<"%la209">;
> +def la210 : NVPTXReg<"%la210">;
> +def la211 : NVPTXReg<"%la211">;
> +def la212 : NVPTXReg<"%la212">;
> +def la213 : NVPTXReg<"%la213">;
> +def la214 : NVPTXReg<"%la214">;
> +def la215 : NVPTXReg<"%la215">;
> +def la216 : NVPTXReg<"%la216">;
> +def la217 : NVPTXReg<"%la217">;
> +def la218 : NVPTXReg<"%la218">;
> +def la219 : NVPTXReg<"%la219">;
> +def la220 : NVPTXReg<"%la220">;
> +def la221 : NVPTXReg<"%la221">;
> +def la222 : NVPTXReg<"%la222">;
> +def la223 : NVPTXReg<"%la223">;
> +def la224 : NVPTXReg<"%la224">;
> +def la225 : NVPTXReg<"%la225">;
> +def la226 : NVPTXReg<"%la226">;
> +def la227 : NVPTXReg<"%la227">;
> +def la228 : NVPTXReg<"%la228">;
> +def la229 : NVPTXReg<"%la229">;
> +def la230 : NVPTXReg<"%la230">;
> +def la231 : NVPTXReg<"%la231">;
> +def la232 : NVPTXReg<"%la232">;
> +def la233 : NVPTXReg<"%la233">;
> +def la234 : NVPTXReg<"%la234">;
> +def la235 : NVPTXReg<"%la235">;
> +def la236 : NVPTXReg<"%la236">;
> +def la237 : NVPTXReg<"%la237">;
> +def la238 : NVPTXReg<"%la238">;
> +def la239 : NVPTXReg<"%la239">;
> +def la240 : NVPTXReg<"%la240">;
> +def la241 : NVPTXReg<"%la241">;
> +def la242 : NVPTXReg<"%la242">;
> +def la243 : NVPTXReg<"%la243">;
> +def la244 : NVPTXReg<"%la244">;
> +def la245 : NVPTXReg<"%la245">;
> +def la246 : NVPTXReg<"%la246">;
> +def la247 : NVPTXReg<"%la247">;
> +def la248 : NVPTXReg<"%la248">;
> +def la249 : NVPTXReg<"%la249">;
> +def la250 : NVPTXReg<"%la250">;
> +def la251 : NVPTXReg<"%la251">;
> +def la252 : NVPTXReg<"%la252">;
> +def la253 : NVPTXReg<"%la253">;
> +def la254 : NVPTXReg<"%la254">;
> +def la255 : NVPTXReg<"%la255">;
> +def la256 : NVPTXReg<"%la256">;
> +def la257 : NVPTXReg<"%la257">;
> +def la258 : NVPTXReg<"%la258">;
> +def la259 : NVPTXReg<"%la259">;
> +def la260 : NVPTXReg<"%la260">;
> +def la261 : NVPTXReg<"%la261">;
> +def la262 : NVPTXReg<"%la262">;
> +def la263 : NVPTXReg<"%la263">;
> +def la264 : NVPTXReg<"%la264">;
> +def la265 : NVPTXReg<"%la265">;
> +def la266 : NVPTXReg<"%la266">;
> +def la267 : NVPTXReg<"%la267">;
> +def la268 : NVPTXReg<"%la268">;
> +def la269 : NVPTXReg<"%la269">;
> +def la270 : NVPTXReg<"%la270">;
> +def la271 : NVPTXReg<"%la271">;
> +def la272 : NVPTXReg<"%la272">;
> +def la273 : NVPTXReg<"%la273">;
> +def la274 : NVPTXReg<"%la274">;
> +def la275 : NVPTXReg<"%la275">;
> +def la276 : NVPTXReg<"%la276">;
> +def la277 : NVPTXReg<"%la277">;
> +def la278 : NVPTXReg<"%la278">;
> +def la279 : NVPTXReg<"%la279">;
> +def la280 : NVPTXReg<"%la280">;
> +def la281 : NVPTXReg<"%la281">;
> +def la282 : NVPTXReg<"%la282">;
> +def la283 : NVPTXReg<"%la283">;
> +def la284 : NVPTXReg<"%la284">;
> +def la285 : NVPTXReg<"%la285">;
> +def la286 : NVPTXReg<"%la286">;
> +def la287 : NVPTXReg<"%la287">;
> +def la288 : NVPTXReg<"%la288">;
> +def la289 : NVPTXReg<"%la289">;
> +def la290 : NVPTXReg<"%la290">;
> +def la291 : NVPTXReg<"%la291">;
> +def la292 : NVPTXReg<"%la292">;
> +def la293 : NVPTXReg<"%la293">;
> +def la294 : NVPTXReg<"%la294">;
> +def la295 : NVPTXReg<"%la295">;
> +def la296 : NVPTXReg<"%la296">;
> +def la297 : NVPTXReg<"%la297">;
> +def la298 : NVPTXReg<"%la298">;
> +def la299 : NVPTXReg<"%la299">;
> +def la300 : NVPTXReg<"%la300">;
> +def la301 : NVPTXReg<"%la301">;
> +def la302 : NVPTXReg<"%la302">;
> +def la303 : NVPTXReg<"%la303">;
> +def la304 : NVPTXReg<"%la304">;
> +def la305 : NVPTXReg<"%la305">;
> +def la306 : NVPTXReg<"%la306">;
> +def la307 : NVPTXReg<"%la307">;
> +def la308 : NVPTXReg<"%la308">;
> +def la309 : NVPTXReg<"%la309">;
> +def la310 : NVPTXReg<"%la310">;
> +def la311 : NVPTXReg<"%la311">;
> +def la312 : NVPTXReg<"%la312">;
> +def la313 : NVPTXReg<"%la313">;
> +def la314 : NVPTXReg<"%la314">;
> +def la315 : NVPTXReg<"%la315">;
> +def la316 : NVPTXReg<"%la316">;
> +def la317 : NVPTXReg<"%la317">;
> +def la318 : NVPTXReg<"%la318">;
> +def la319 : NVPTXReg<"%la319">;
> +def la320 : NVPTXReg<"%la320">;
> +def la321 : NVPTXReg<"%la321">;
> +def la322 : NVPTXReg<"%la322">;
> +def la323 : NVPTXReg<"%la323">;
> +def la324 : NVPTXReg<"%la324">;
> +def la325 : NVPTXReg<"%la325">;
> +def la326 : NVPTXReg<"%la326">;
> +def la327 : NVPTXReg<"%la327">;
> +def la328 : NVPTXReg<"%la328">;
> +def la329 : NVPTXReg<"%la329">;
> +def la330 : NVPTXReg<"%la330">;
> +def la331 : NVPTXReg<"%la331">;
> +def la332 : NVPTXReg<"%la332">;
> +def la333 : NVPTXReg<"%la333">;
> +def la334 : NVPTXReg<"%la334">;
> +def la335 : NVPTXReg<"%la335">;
> +def la336 : NVPTXReg<"%la336">;
> +def la337 : NVPTXReg<"%la337">;
> +def la338 : NVPTXReg<"%la338">;
> +def la339 : NVPTXReg<"%la339">;
> +def la340 : NVPTXReg<"%la340">;
> +def la341 : NVPTXReg<"%la341">;
> +def la342 : NVPTXReg<"%la342">;
> +def la343 : NVPTXReg<"%la343">;
> +def la344 : NVPTXReg<"%la344">;
> +def la345 : NVPTXReg<"%la345">;
> +def la346 : NVPTXReg<"%la346">;
> +def la347 : NVPTXReg<"%la347">;
> +def la348 : NVPTXReg<"%la348">;
> +def la349 : NVPTXReg<"%la349">;
> +def la350 : NVPTXReg<"%la350">;
> +def la351 : NVPTXReg<"%la351">;
> +def la352 : NVPTXReg<"%la352">;
> +def la353 : NVPTXReg<"%la353">;
> +def la354 : NVPTXReg<"%la354">;
> +def la355 : NVPTXReg<"%la355">;
> +def la356 : NVPTXReg<"%la356">;
> +def la357 : NVPTXReg<"%la357">;
> +def la358 : NVPTXReg<"%la358">;
> +def la359 : NVPTXReg<"%la359">;
> +def la360 : NVPTXReg<"%la360">;
> +def la361 : NVPTXReg<"%la361">;
> +def la362 : NVPTXReg<"%la362">;
> +def la363 : NVPTXReg<"%la363">;
> +def la364 : NVPTXReg<"%la364">;
> +def la365 : NVPTXReg<"%la365">;
> +def la366 : NVPTXReg<"%la366">;
> +def la367 : NVPTXReg<"%la367">;
> +def la368 : NVPTXReg<"%la368">;
> +def la369 : NVPTXReg<"%la369">;
> +def la370 : NVPTXReg<"%la370">;
> +def la371 : NVPTXReg<"%la371">;
> +def la372 : NVPTXReg<"%la372">;
> +def la373 : NVPTXReg<"%la373">;
> +def la374 : NVPTXReg<"%la374">;
> +def la375 : NVPTXReg<"%la375">;
> +def la376 : NVPTXReg<"%la376">;
> +def la377 : NVPTXReg<"%la377">;
> +def la378 : NVPTXReg<"%la378">;
> +def la379 : NVPTXReg<"%la379">;
> +def la380 : NVPTXReg<"%la380">;
> +def la381 : NVPTXReg<"%la381">;
> +def la382 : NVPTXReg<"%la382">;
> +def la383 : NVPTXReg<"%la383">;
> +def la384 : NVPTXReg<"%la384">;
> +def la385 : NVPTXReg<"%la385">;
> +def la386 : NVPTXReg<"%la386">;
> +def la387 : NVPTXReg<"%la387">;
> +def la388 : NVPTXReg<"%la388">;
> +def la389 : NVPTXReg<"%la389">;
> +def la390 : NVPTXReg<"%la390">;
> +def la391 : NVPTXReg<"%la391">;
> +def la392 : NVPTXReg<"%la392">;
> +def la393 : NVPTXReg<"%la393">;
> +def la394 : NVPTXReg<"%la394">;
> +def la395 : NVPTXReg<"%la395">;
> +def fa0 : NVPTXReg<"%fa0">;
> +def fa1 : NVPTXReg<"%fa1">;
> +def fa2 : NVPTXReg<"%fa2">;
> +def fa3 : NVPTXReg<"%fa3">;
> +def fa4 : NVPTXReg<"%fa4">;
> +def fa5 : NVPTXReg<"%fa5">;
> +def fa6 : NVPTXReg<"%fa6">;
> +def fa7 : NVPTXReg<"%fa7">;
> +def fa8 : NVPTXReg<"%fa8">;
> +def fa9 : NVPTXReg<"%fa9">;
> +def fa10 : NVPTXReg<"%fa10">;
> +def fa11 : NVPTXReg<"%fa11">;
> +def fa12 : NVPTXReg<"%fa12">;
> +def fa13 : NVPTXReg<"%fa13">;
> +def fa14 : NVPTXReg<"%fa14">;
> +def fa15 : NVPTXReg<"%fa15">;
> +def fa16 : NVPTXReg<"%fa16">;
> +def fa17 : NVPTXReg<"%fa17">;
> +def fa18 : NVPTXReg<"%fa18">;
> +def fa19 : NVPTXReg<"%fa19">;
> +def fa20 : NVPTXReg<"%fa20">;
> +def fa21 : NVPTXReg<"%fa21">;
> +def fa22 : NVPTXReg<"%fa22">;
> +def fa23 : NVPTXReg<"%fa23">;
> +def fa24 : NVPTXReg<"%fa24">;
> +def fa25 : NVPTXReg<"%fa25">;
> +def fa26 : NVPTXReg<"%fa26">;
> +def fa27 : NVPTXReg<"%fa27">;
> +def fa28 : NVPTXReg<"%fa28">;
> +def fa29 : NVPTXReg<"%fa29">;
> +def fa30 : NVPTXReg<"%fa30">;
> +def fa31 : NVPTXReg<"%fa31">;
> +def fa32 : NVPTXReg<"%fa32">;
> +def fa33 : NVPTXReg<"%fa33">;
> +def fa34 : NVPTXReg<"%fa34">;
> +def fa35 : NVPTXReg<"%fa35">;
> +def fa36 : NVPTXReg<"%fa36">;
> +def fa37 : NVPTXReg<"%fa37">;
> +def fa38 : NVPTXReg<"%fa38">;
> +def fa39 : NVPTXReg<"%fa39">;
> +def fa40 : NVPTXReg<"%fa40">;
> +def fa41 : NVPTXReg<"%fa41">;
> +def fa42 : NVPTXReg<"%fa42">;
> +def fa43 : NVPTXReg<"%fa43">;
> +def fa44 : NVPTXReg<"%fa44">;
> +def fa45 : NVPTXReg<"%fa45">;
> +def fa46 : NVPTXReg<"%fa46">;
> +def fa47 : NVPTXReg<"%fa47">;
> +def fa48 : NVPTXReg<"%fa48">;
> +def fa49 : NVPTXReg<"%fa49">;
> +def fa50 : NVPTXReg<"%fa50">;
> +def fa51 : NVPTXReg<"%fa51">;
> +def fa52 : NVPTXReg<"%fa52">;
> +def fa53 : NVPTXReg<"%fa53">;
> +def fa54 : NVPTXReg<"%fa54">;
> +def fa55 : NVPTXReg<"%fa55">;
> +def fa56 : NVPTXReg<"%fa56">;
> +def fa57 : NVPTXReg<"%fa57">;
> +def fa58 : NVPTXReg<"%fa58">;
> +def fa59 : NVPTXReg<"%fa59">;
> +def fa60 : NVPTXReg<"%fa60">;
> +def fa61 : NVPTXReg<"%fa61">;
> +def fa62 : NVPTXReg<"%fa62">;
> +def fa63 : NVPTXReg<"%fa63">;
> +def fa64 : NVPTXReg<"%fa64">;
> +def fa65 : NVPTXReg<"%fa65">;
> +def fa66 : NVPTXReg<"%fa66">;
> +def fa67 : NVPTXReg<"%fa67">;
> +def fa68 : NVPTXReg<"%fa68">;
> +def fa69 : NVPTXReg<"%fa69">;
> +def fa70 : NVPTXReg<"%fa70">;
> +def fa71 : NVPTXReg<"%fa71">;
> +def fa72 : NVPTXReg<"%fa72">;
> +def fa73 : NVPTXReg<"%fa73">;
> +def fa74 : NVPTXReg<"%fa74">;
> +def fa75 : NVPTXReg<"%fa75">;
> +def fa76 : NVPTXReg<"%fa76">;
> +def fa77 : NVPTXReg<"%fa77">;
> +def fa78 : NVPTXReg<"%fa78">;
> +def fa79 : NVPTXReg<"%fa79">;
> +def fa80 : NVPTXReg<"%fa80">;
> +def fa81 : NVPTXReg<"%fa81">;
> +def fa82 : NVPTXReg<"%fa82">;
> +def fa83 : NVPTXReg<"%fa83">;
> +def fa84 : NVPTXReg<"%fa84">;
> +def fa85 : NVPTXReg<"%fa85">;
> +def fa86 : NVPTXReg<"%fa86">;
> +def fa87 : NVPTXReg<"%fa87">;
> +def fa88 : NVPTXReg<"%fa88">;
> +def fa89 : NVPTXReg<"%fa89">;
> +def fa90 : NVPTXReg<"%fa90">;
> +def fa91 : NVPTXReg<"%fa91">;
> +def fa92 : NVPTXReg<"%fa92">;
> +def fa93 : NVPTXReg<"%fa93">;
> +def fa94 : NVPTXReg<"%fa94">;
> +def fa95 : NVPTXReg<"%fa95">;
> +def fa96 : NVPTXReg<"%fa96">;
> +def fa97 : NVPTXReg<"%fa97">;
> +def fa98 : NVPTXReg<"%fa98">;
> +def fa99 : NVPTXReg<"%fa99">;
> +def fa100 : NVPTXReg<"%fa100">;
> +def fa101 : NVPTXReg<"%fa101">;
> +def fa102 : NVPTXReg<"%fa102">;
> +def fa103 : NVPTXReg<"%fa103">;
> +def fa104 : NVPTXReg<"%fa104">;
> +def fa105 : NVPTXReg<"%fa105">;
> +def fa106 : NVPTXReg<"%fa106">;
> +def fa107 : NVPTXReg<"%fa107">;
> +def fa108 : NVPTXReg<"%fa108">;
> +def fa109 : NVPTXReg<"%fa109">;
> +def fa110 : NVPTXReg<"%fa110">;
> +def fa111 : NVPTXReg<"%fa111">;
> +def fa112 : NVPTXReg<"%fa112">;
> +def fa113 : NVPTXReg<"%fa113">;
> +def fa114 : NVPTXReg<"%fa114">;
> +def fa115 : NVPTXReg<"%fa115">;
> +def fa116 : NVPTXReg<"%fa116">;
> +def fa117 : NVPTXReg<"%fa117">;
> +def fa118 : NVPTXReg<"%fa118">;
> +def fa119 : NVPTXReg<"%fa119">;
> +def fa120 : NVPTXReg<"%fa120">;
> +def fa121 : NVPTXReg<"%fa121">;
> +def fa122 : NVPTXReg<"%fa122">;
> +def fa123 : NVPTXReg<"%fa123">;
> +def fa124 : NVPTXReg<"%fa124">;
> +def fa125 : NVPTXReg<"%fa125">;
> +def fa126 : NVPTXReg<"%fa126">;
> +def fa127 : NVPTXReg<"%fa127">;
> +def fa128 : NVPTXReg<"%fa128">;
> +def fa129 : NVPTXReg<"%fa129">;
> +def fa130 : NVPTXReg<"%fa130">;
> +def fa131 : NVPTXReg<"%fa131">;
> +def fa132 : NVPTXReg<"%fa132">;
> +def fa133 : NVPTXReg<"%fa133">;
> +def fa134 : NVPTXReg<"%fa134">;
> +def fa135 : NVPTXReg<"%fa135">;
> +def fa136 : NVPTXReg<"%fa136">;
> +def fa137 : NVPTXReg<"%fa137">;
> +def fa138 : NVPTXReg<"%fa138">;
> +def fa139 : NVPTXReg<"%fa139">;
> +def fa140 : NVPTXReg<"%fa140">;
> +def fa141 : NVPTXReg<"%fa141">;
> +def fa142 : NVPTXReg<"%fa142">;
> +def fa143 : NVPTXReg<"%fa143">;
> +def fa144 : NVPTXReg<"%fa144">;
> +def fa145 : NVPTXReg<"%fa145">;
> +def fa146 : NVPTXReg<"%fa146">;
> +def fa147 : NVPTXReg<"%fa147">;
> +def fa148 : NVPTXReg<"%fa148">;
> +def fa149 : NVPTXReg<"%fa149">;
> +def fa150 : NVPTXReg<"%fa150">;
> +def fa151 : NVPTXReg<"%fa151">;
> +def fa152 : NVPTXReg<"%fa152">;
> +def fa153 : NVPTXReg<"%fa153">;
> +def fa154 : NVPTXReg<"%fa154">;
> +def fa155 : NVPTXReg<"%fa155">;
> +def fa156 : NVPTXReg<"%fa156">;
> +def fa157 : NVPTXReg<"%fa157">;
> +def fa158 : NVPTXReg<"%fa158">;
> +def fa159 : NVPTXReg<"%fa159">;
> +def fa160 : NVPTXReg<"%fa160">;
> +def fa161 : NVPTXReg<"%fa161">;
> +def fa162 : NVPTXReg<"%fa162">;
> +def fa163 : NVPTXReg<"%fa163">;
> +def fa164 : NVPTXReg<"%fa164">;
> +def fa165 : NVPTXReg<"%fa165">;
> +def fa166 : NVPTXReg<"%fa166">;
> +def fa167 : NVPTXReg<"%fa167">;
> +def fa168 : NVPTXReg<"%fa168">;
> +def fa169 : NVPTXReg<"%fa169">;
> +def fa170 : NVPTXReg<"%fa170">;
> +def fa171 : NVPTXReg<"%fa171">;
> +def fa172 : NVPTXReg<"%fa172">;
> +def fa173 : NVPTXReg<"%fa173">;
> +def fa174 : NVPTXReg<"%fa174">;
> +def fa175 : NVPTXReg<"%fa175">;
> +def fa176 : NVPTXReg<"%fa176">;
> +def fa177 : NVPTXReg<"%fa177">;
> +def fa178 : NVPTXReg<"%fa178">;
> +def fa179 : NVPTXReg<"%fa179">;
> +def fa180 : NVPTXReg<"%fa180">;
> +def fa181 : NVPTXReg<"%fa181">;
> +def fa182 : NVPTXReg<"%fa182">;
> +def fa183 : NVPTXReg<"%fa183">;
> +def fa184 : NVPTXReg<"%fa184">;
> +def fa185 : NVPTXReg<"%fa185">;
> +def fa186 : NVPTXReg<"%fa186">;
> +def fa187 : NVPTXReg<"%fa187">;
> +def fa188 : NVPTXReg<"%fa188">;
> +def fa189 : NVPTXReg<"%fa189">;
> +def fa190 : NVPTXReg<"%fa190">;
> +def fa191 : NVPTXReg<"%fa191">;
> +def fa192 : NVPTXReg<"%fa192">;
> +def fa193 : NVPTXReg<"%fa193">;
> +def fa194 : NVPTXReg<"%fa194">;
> +def fa195 : NVPTXReg<"%fa195">;
> +def fa196 : NVPTXReg<"%fa196">;
> +def fa197 : NVPTXReg<"%fa197">;
> +def fa198 : NVPTXReg<"%fa198">;
> +def fa199 : NVPTXReg<"%fa199">;
> +def fa200 : NVPTXReg<"%fa200">;
> +def fa201 : NVPTXReg<"%fa201">;
> +def fa202 : NVPTXReg<"%fa202">;
> +def fa203 : NVPTXReg<"%fa203">;
> +def fa204 : NVPTXReg<"%fa204">;
> +def fa205 : NVPTXReg<"%fa205">;
> +def fa206 : NVPTXReg<"%fa206">;
> +def fa207 : NVPTXReg<"%fa207">;
> +def fa208 : NVPTXReg<"%fa208">;
> +def fa209 : NVPTXReg<"%fa209">;
> +def fa210 : NVPTXReg<"%fa210">;
> +def fa211 : NVPTXReg<"%fa211">;
> +def fa212 : NVPTXReg<"%fa212">;
> +def fa213 : NVPTXReg<"%fa213">;
> +def fa214 : NVPTXReg<"%fa214">;
> +def fa215 : NVPTXReg<"%fa215">;
> +def fa216 : NVPTXReg<"%fa216">;
> +def fa217 : NVPTXReg<"%fa217">;
> +def fa218 : NVPTXReg<"%fa218">;
> +def fa219 : NVPTXReg<"%fa219">;
> +def fa220 : NVPTXReg<"%fa220">;
> +def fa221 : NVPTXReg<"%fa221">;
> +def fa222 : NVPTXReg<"%fa222">;
> +def fa223 : NVPTXReg<"%fa223">;
> +def fa224 : NVPTXReg<"%fa224">;
> +def fa225 : NVPTXReg<"%fa225">;
> +def fa226 : NVPTXReg<"%fa226">;
> +def fa227 : NVPTXReg<"%fa227">;
> +def fa228 : NVPTXReg<"%fa228">;
> +def fa229 : NVPTXReg<"%fa229">;
> +def fa230 : NVPTXReg<"%fa230">;
> +def fa231 : NVPTXReg<"%fa231">;
> +def fa232 : NVPTXReg<"%fa232">;
> +def fa233 : NVPTXReg<"%fa233">;
> +def fa234 : NVPTXReg<"%fa234">;
> +def fa235 : NVPTXReg<"%fa235">;
> +def fa236 : NVPTXReg<"%fa236">;
> +def fa237 : NVPTXReg<"%fa237">;
> +def fa238 : NVPTXReg<"%fa238">;
> +def fa239 : NVPTXReg<"%fa239">;
> +def fa240 : NVPTXReg<"%fa240">;
> +def fa241 : NVPTXReg<"%fa241">;
> +def fa242 : NVPTXReg<"%fa242">;
> +def fa243 : NVPTXReg<"%fa243">;
> +def fa244 : NVPTXReg<"%fa244">;
> +def fa245 : NVPTXReg<"%fa245">;
> +def fa246 : NVPTXReg<"%fa246">;
> +def fa247 : NVPTXReg<"%fa247">;
> +def fa248 : NVPTXReg<"%fa248">;
> +def fa249 : NVPTXReg<"%fa249">;
> +def fa250 : NVPTXReg<"%fa250">;
> +def fa251 : NVPTXReg<"%fa251">;
> +def fa252 : NVPTXReg<"%fa252">;
> +def fa253 : NVPTXReg<"%fa253">;
> +def fa254 : NVPTXReg<"%fa254">;
> +def fa255 : NVPTXReg<"%fa255">;
> +def fa256 : NVPTXReg<"%fa256">;
> +def fa257 : NVPTXReg<"%fa257">;
> +def fa258 : NVPTXReg<"%fa258">;
> +def fa259 : NVPTXReg<"%fa259">;
> +def fa260 : NVPTXReg<"%fa260">;
> +def fa261 : NVPTXReg<"%fa261">;
> +def fa262 : NVPTXReg<"%fa262">;
> +def fa263 : NVPTXReg<"%fa263">;
> +def fa264 : NVPTXReg<"%fa264">;
> +def fa265 : NVPTXReg<"%fa265">;
> +def fa266 : NVPTXReg<"%fa266">;
> +def fa267 : NVPTXReg<"%fa267">;
> +def fa268 : NVPTXReg<"%fa268">;
> +def fa269 : NVPTXReg<"%fa269">;
> +def fa270 : NVPTXReg<"%fa270">;
> +def fa271 : NVPTXReg<"%fa271">;
> +def fa272 : NVPTXReg<"%fa272">;
> +def fa273 : NVPTXReg<"%fa273">;
> +def fa274 : NVPTXReg<"%fa274">;
> +def fa275 : NVPTXReg<"%fa275">;
> +def fa276 : NVPTXReg<"%fa276">;
> +def fa277 : NVPTXReg<"%fa277">;
> +def fa278 : NVPTXReg<"%fa278">;
> +def fa279 : NVPTXReg<"%fa279">;
> +def fa280 : NVPTXReg<"%fa280">;
> +def fa281 : NVPTXReg<"%fa281">;
> +def fa282 : NVPTXReg<"%fa282">;
> +def fa283 : NVPTXReg<"%fa283">;
> +def fa284 : NVPTXReg<"%fa284">;
> +def fa285 : NVPTXReg<"%fa285">;
> +def fa286 : NVPTXReg<"%fa286">;
> +def fa287 : NVPTXReg<"%fa287">;
> +def fa288 : NVPTXReg<"%fa288">;
> +def fa289 : NVPTXReg<"%fa289">;
> +def fa290 : NVPTXReg<"%fa290">;
> +def fa291 : NVPTXReg<"%fa291">;
> +def fa292 : NVPTXReg<"%fa292">;
> +def fa293 : NVPTXReg<"%fa293">;
> +def fa294 : NVPTXReg<"%fa294">;
> +def fa295 : NVPTXReg<"%fa295">;
> +def fa296 : NVPTXReg<"%fa296">;
> +def fa297 : NVPTXReg<"%fa297">;
> +def fa298 : NVPTXReg<"%fa298">;
> +def fa299 : NVPTXReg<"%fa299">;
> +def fa300 : NVPTXReg<"%fa300">;
> +def fa301 : NVPTXReg<"%fa301">;
> +def fa302 : NVPTXReg<"%fa302">;
> +def fa303 : NVPTXReg<"%fa303">;
> +def fa304 : NVPTXReg<"%fa304">;
> +def fa305 : NVPTXReg<"%fa305">;
> +def fa306 : NVPTXReg<"%fa306">;
> +def fa307 : NVPTXReg<"%fa307">;
> +def fa308 : NVPTXReg<"%fa308">;
> +def fa309 : NVPTXReg<"%fa309">;
> +def fa310 : NVPTXReg<"%fa310">;
> +def fa311 : NVPTXReg<"%fa311">;
> +def fa312 : NVPTXReg<"%fa312">;
> +def fa313 : NVPTXReg<"%fa313">;
> +def fa314 : NVPTXReg<"%fa314">;
> +def fa315 : NVPTXReg<"%fa315">;
> +def fa316 : NVPTXReg<"%fa316">;
> +def fa317 : NVPTXReg<"%fa317">;
> +def fa318 : NVPTXReg<"%fa318">;
> +def fa319 : NVPTXReg<"%fa319">;
> +def fa320 : NVPTXReg<"%fa320">;
> +def fa321 : NVPTXReg<"%fa321">;
> +def fa322 : NVPTXReg<"%fa322">;
> +def fa323 : NVPTXReg<"%fa323">;
> +def fa324 : NVPTXReg<"%fa324">;
> +def fa325 : NVPTXReg<"%fa325">;
> +def fa326 : NVPTXReg<"%fa326">;
> +def fa327 : NVPTXReg<"%fa327">;
> +def fa328 : NVPTXReg<"%fa328">;
> +def fa329 : NVPTXReg<"%fa329">;
> +def fa330 : NVPTXReg<"%fa330">;
> +def fa331 : NVPTXReg<"%fa331">;
> +def fa332 : NVPTXReg<"%fa332">;
> +def fa333 : NVPTXReg<"%fa333">;
> +def fa334 : NVPTXReg<"%fa334">;
> +def fa335 : NVPTXReg<"%fa335">;
> +def fa336 : NVPTXReg<"%fa336">;
> +def fa337 : NVPTXReg<"%fa337">;
> +def fa338 : NVPTXReg<"%fa338">;
> +def fa339 : NVPTXReg<"%fa339">;
> +def fa340 : NVPTXReg<"%fa340">;
> +def fa341 : NVPTXReg<"%fa341">;
> +def fa342 : NVPTXReg<"%fa342">;
> +def fa343 : NVPTXReg<"%fa343">;
> +def fa344 : NVPTXReg<"%fa344">;
> +def fa345 : NVPTXReg<"%fa345">;
> +def fa346 : NVPTXReg<"%fa346">;
> +def fa347 : NVPTXReg<"%fa347">;
> +def fa348 : NVPTXReg<"%fa348">;
> +def fa349 : NVPTXReg<"%fa349">;
> +def fa350 : NVPTXReg<"%fa350">;
> +def fa351 : NVPTXReg<"%fa351">;
> +def fa352 : NVPTXReg<"%fa352">;
> +def fa353 : NVPTXReg<"%fa353">;
> +def fa354 : NVPTXReg<"%fa354">;
> +def fa355 : NVPTXReg<"%fa355">;
> +def fa356 : NVPTXReg<"%fa356">;
> +def fa357 : NVPTXReg<"%fa357">;
> +def fa358 : NVPTXReg<"%fa358">;
> +def fa359 : NVPTXReg<"%fa359">;
> +def fa360 : NVPTXReg<"%fa360">;
> +def fa361 : NVPTXReg<"%fa361">;
> +def fa362 : NVPTXReg<"%fa362">;
> +def fa363 : NVPTXReg<"%fa363">;
> +def fa364 : NVPTXReg<"%fa364">;
> +def fa365 : NVPTXReg<"%fa365">;
> +def fa366 : NVPTXReg<"%fa366">;
> +def fa367 : NVPTXReg<"%fa367">;
> +def fa368 : NVPTXReg<"%fa368">;
> +def fa369 : NVPTXReg<"%fa369">;
> +def fa370 : NVPTXReg<"%fa370">;
> +def fa371 : NVPTXReg<"%fa371">;
> +def fa372 : NVPTXReg<"%fa372">;
> +def fa373 : NVPTXReg<"%fa373">;
> +def fa374 : NVPTXReg<"%fa374">;
> +def fa375 : NVPTXReg<"%fa375">;
> +def fa376 : NVPTXReg<"%fa376">;
> +def fa377 : NVPTXReg<"%fa377">;
> +def fa378 : NVPTXReg<"%fa378">;
> +def fa379 : NVPTXReg<"%fa379">;
> +def fa380 : NVPTXReg<"%fa380">;
> +def fa381 : NVPTXReg<"%fa381">;
> +def fa382 : NVPTXReg<"%fa382">;
> +def fa383 : NVPTXReg<"%fa383">;
> +def fa384 : NVPTXReg<"%fa384">;
> +def fa385 : NVPTXReg<"%fa385">;
> +def fa386 : NVPTXReg<"%fa386">;
> +def fa387 : NVPTXReg<"%fa387">;
> +def fa388 : NVPTXReg<"%fa388">;
> +def fa389 : NVPTXReg<"%fa389">;
> +def fa390 : NVPTXReg<"%fa390">;
> +def fa391 : NVPTXReg<"%fa391">;
> +def fa392 : NVPTXReg<"%fa392">;
> +def fa393 : NVPTXReg<"%fa393">;
> +def fa394 : NVPTXReg<"%fa394">;
> +def fa395 : NVPTXReg<"%fa395">;
> +def da0 : NVPTXReg<"%da0">;
> +def da1 : NVPTXReg<"%da1">;
> +def da2 : NVPTXReg<"%da2">;
> +def da3 : NVPTXReg<"%da3">;
> +def da4 : NVPTXReg<"%da4">;
> +def da5 : NVPTXReg<"%da5">;
> +def da6 : NVPTXReg<"%da6">;
> +def da7 : NVPTXReg<"%da7">;
> +def da8 : NVPTXReg<"%da8">;
> +def da9 : NVPTXReg<"%da9">;
> +def da10 : NVPTXReg<"%da10">;
> +def da11 : NVPTXReg<"%da11">;
> +def da12 : NVPTXReg<"%da12">;
> +def da13 : NVPTXReg<"%da13">;
> +def da14 : NVPTXReg<"%da14">;
> +def da15 : NVPTXReg<"%da15">;
> +def da16 : NVPTXReg<"%da16">;
> +def da17 : NVPTXReg<"%da17">;
> +def da18 : NVPTXReg<"%da18">;
> +def da19 : NVPTXReg<"%da19">;
> +def da20 : NVPTXReg<"%da20">;
> +def da21 : NVPTXReg<"%da21">;
> +def da22 : NVPTXReg<"%da22">;
> +def da23 : NVPTXReg<"%da23">;
> +def da24 : NVPTXReg<"%da24">;
> +def da25 : NVPTXReg<"%da25">;
> +def da26 : NVPTXReg<"%da26">;
> +def da27 : NVPTXReg<"%da27">;
> +def da28 : NVPTXReg<"%da28">;
> +def da29 : NVPTXReg<"%da29">;
> +def da30 : NVPTXReg<"%da30">;
> +def da31 : NVPTXReg<"%da31">;
> +def da32 : NVPTXReg<"%da32">;
> +def da33 : NVPTXReg<"%da33">;
> +def da34 : NVPTXReg<"%da34">;
> +def da35 : NVPTXReg<"%da35">;
> +def da36 : NVPTXReg<"%da36">;
> +def da37 : NVPTXReg<"%da37">;
> +def da38 : NVPTXReg<"%da38">;
> +def da39 : NVPTXReg<"%da39">;
> +def da40 : NVPTXReg<"%da40">;
> +def da41 : NVPTXReg<"%da41">;
> +def da42 : NVPTXReg<"%da42">;
> +def da43 : NVPTXReg<"%da43">;
> +def da44 : NVPTXReg<"%da44">;
> +def da45 : NVPTXReg<"%da45">;
> +def da46 : NVPTXReg<"%da46">;
> +def da47 : NVPTXReg<"%da47">;
> +def da48 : NVPTXReg<"%da48">;
> +def da49 : NVPTXReg<"%da49">;
> +def da50 : NVPTXReg<"%da50">;
> +def da51 : NVPTXReg<"%da51">;
> +def da52 : NVPTXReg<"%da52">;
> +def da53 : NVPTXReg<"%da53">;
> +def da54 : NVPTXReg<"%da54">;
> +def da55 : NVPTXReg<"%da55">;
> +def da56 : NVPTXReg<"%da56">;
> +def da57 : NVPTXReg<"%da57">;
> +def da58 : NVPTXReg<"%da58">;
> +def da59 : NVPTXReg<"%da59">;
> +def da60 : NVPTXReg<"%da60">;
> +def da61 : NVPTXReg<"%da61">;
> +def da62 : NVPTXReg<"%da62">;
> +def da63 : NVPTXReg<"%da63">;
> +def da64 : NVPTXReg<"%da64">;
> +def da65 : NVPTXReg<"%da65">;
> +def da66 : NVPTXReg<"%da66">;
> +def da67 : NVPTXReg<"%da67">;
> +def da68 : NVPTXReg<"%da68">;
> +def da69 : NVPTXReg<"%da69">;
> +def da70 : NVPTXReg<"%da70">;
> +def da71 : NVPTXReg<"%da71">;
> +def da72 : NVPTXReg<"%da72">;
> +def da73 : NVPTXReg<"%da73">;
> +def da74 : NVPTXReg<"%da74">;
> +def da75 : NVPTXReg<"%da75">;
> +def da76 : NVPTXReg<"%da76">;
> +def da77 : NVPTXReg<"%da77">;
> +def da78 : NVPTXReg<"%da78">;
> +def da79 : NVPTXReg<"%da79">;
> +def da80 : NVPTXReg<"%da80">;
> +def da81 : NVPTXReg<"%da81">;
> +def da82 : NVPTXReg<"%da82">;
> +def da83 : NVPTXReg<"%da83">;
> +def da84 : NVPTXReg<"%da84">;
> +def da85 : NVPTXReg<"%da85">;
> +def da86 : NVPTXReg<"%da86">;
> +def da87 : NVPTXReg<"%da87">;
> +def da88 : NVPTXReg<"%da88">;
> +def da89 : NVPTXReg<"%da89">;
> +def da90 : NVPTXReg<"%da90">;
> +def da91 : NVPTXReg<"%da91">;
> +def da92 : NVPTXReg<"%da92">;
> +def da93 : NVPTXReg<"%da93">;
> +def da94 : NVPTXReg<"%da94">;
> +def da95 : NVPTXReg<"%da95">;
> +def da96 : NVPTXReg<"%da96">;
> +def da97 : NVPTXReg<"%da97">;
> +def da98 : NVPTXReg<"%da98">;
> +def da99 : NVPTXReg<"%da99">;
> +def da100 : NVPTXReg<"%da100">;
> +def da101 : NVPTXReg<"%da101">;
> +def da102 : NVPTXReg<"%da102">;
> +def da103 : NVPTXReg<"%da103">;
> +def da104 : NVPTXReg<"%da104">;
> +def da105 : NVPTXReg<"%da105">;
> +def da106 : NVPTXReg<"%da106">;
> +def da107 : NVPTXReg<"%da107">;
> +def da108 : NVPTXReg<"%da108">;
> +def da109 : NVPTXReg<"%da109">;
> +def da110 : NVPTXReg<"%da110">;
> +def da111 : NVPTXReg<"%da111">;
> +def da112 : NVPTXReg<"%da112">;
> +def da113 : NVPTXReg<"%da113">;
> +def da114 : NVPTXReg<"%da114">;
> +def da115 : NVPTXReg<"%da115">;
> +def da116 : NVPTXReg<"%da116">;
> +def da117 : NVPTXReg<"%da117">;
> +def da118 : NVPTXReg<"%da118">;
> +def da119 : NVPTXReg<"%da119">;
> +def da120 : NVPTXReg<"%da120">;
> +def da121 : NVPTXReg<"%da121">;
> +def da122 : NVPTXReg<"%da122">;
> +def da123 : NVPTXReg<"%da123">;
> +def da124 : NVPTXReg<"%da124">;
> +def da125 : NVPTXReg<"%da125">;
> +def da126 : NVPTXReg<"%da126">;
> +def da127 : NVPTXReg<"%da127">;
> +def da128 : NVPTXReg<"%da128">;
> +def da129 : NVPTXReg<"%da129">;
> +def da130 : NVPTXReg<"%da130">;
> +def da131 : NVPTXReg<"%da131">;
> +def da132 : NVPTXReg<"%da132">;
> +def da133 : NVPTXReg<"%da133">;
> +def da134 : NVPTXReg<"%da134">;
> +def da135 : NVPTXReg<"%da135">;
> +def da136 : NVPTXReg<"%da136">;
> +def da137 : NVPTXReg<"%da137">;
> +def da138 : NVPTXReg<"%da138">;
> +def da139 : NVPTXReg<"%da139">;
> +def da140 : NVPTXReg<"%da140">;
> +def da141 : NVPTXReg<"%da141">;
> +def da142 : NVPTXReg<"%da142">;
> +def da143 : NVPTXReg<"%da143">;
> +def da144 : NVPTXReg<"%da144">;
> +def da145 : NVPTXReg<"%da145">;
> +def da146 : NVPTXReg<"%da146">;
> +def da147 : NVPTXReg<"%da147">;
> +def da148 : NVPTXReg<"%da148">;
> +def da149 : NVPTXReg<"%da149">;
> +def da150 : NVPTXReg<"%da150">;
> +def da151 : NVPTXReg<"%da151">;
> +def da152 : NVPTXReg<"%da152">;
> +def da153 : NVPTXReg<"%da153">;
> +def da154 : NVPTXReg<"%da154">;
> +def da155 : NVPTXReg<"%da155">;
> +def da156 : NVPTXReg<"%da156">;
> +def da157 : NVPTXReg<"%da157">;
> +def da158 : NVPTXReg<"%da158">;
> +def da159 : NVPTXReg<"%da159">;
> +def da160 : NVPTXReg<"%da160">;
> +def da161 : NVPTXReg<"%da161">;
> +def da162 : NVPTXReg<"%da162">;
> +def da163 : NVPTXReg<"%da163">;
> +def da164 : NVPTXReg<"%da164">;
> +def da165 : NVPTXReg<"%da165">;
> +def da166 : NVPTXReg<"%da166">;
> +def da167 : NVPTXReg<"%da167">;
> +def da168 : NVPTXReg<"%da168">;
> +def da169 : NVPTXReg<"%da169">;
> +def da170 : NVPTXReg<"%da170">;
> +def da171 : NVPTXReg<"%da171">;
> +def da172 : NVPTXReg<"%da172">;
> +def da173 : NVPTXReg<"%da173">;
> +def da174 : NVPTXReg<"%da174">;
> +def da175 : NVPTXReg<"%da175">;
> +def da176 : NVPTXReg<"%da176">;
> +def da177 : NVPTXReg<"%da177">;
> +def da178 : NVPTXReg<"%da178">;
> +def da179 : NVPTXReg<"%da179">;
> +def da180 : NVPTXReg<"%da180">;
> +def da181 : NVPTXReg<"%da181">;
> +def da182 : NVPTXReg<"%da182">;
> +def da183 : NVPTXReg<"%da183">;
> +def da184 : NVPTXReg<"%da184">;
> +def da185 : NVPTXReg<"%da185">;
> +def da186 : NVPTXReg<"%da186">;
> +def da187 : NVPTXReg<"%da187">;
> +def da188 : NVPTXReg<"%da188">;
> +def da189 : NVPTXReg<"%da189">;
> +def da190 : NVPTXReg<"%da190">;
> +def da191 : NVPTXReg<"%da191">;
> +def da192 : NVPTXReg<"%da192">;
> +def da193 : NVPTXReg<"%da193">;
> +def da194 : NVPTXReg<"%da194">;
> +def da195 : NVPTXReg<"%da195">;
> +def da196 : NVPTXReg<"%da196">;
> +def da197 : NVPTXReg<"%da197">;
> +def da198 : NVPTXReg<"%da198">;
> +def da199 : NVPTXReg<"%da199">;
> +def da200 : NVPTXReg<"%da200">;
> +def da201 : NVPTXReg<"%da201">;
> +def da202 : NVPTXReg<"%da202">;
> +def da203 : NVPTXReg<"%da203">;
> +def da204 : NVPTXReg<"%da204">;
> +def da205 : NVPTXReg<"%da205">;
> +def da206 : NVPTXReg<"%da206">;
> +def da207 : NVPTXReg<"%da207">;
> +def da208 : NVPTXReg<"%da208">;
> +def da209 : NVPTXReg<"%da209">;
> +def da210 : NVPTXReg<"%da210">;
> +def da211 : NVPTXReg<"%da211">;
> +def da212 : NVPTXReg<"%da212">;
> +def da213 : NVPTXReg<"%da213">;
> +def da214 : NVPTXReg<"%da214">;
> +def da215 : NVPTXReg<"%da215">;
> +def da216 : NVPTXReg<"%da216">;
> +def da217 : NVPTXReg<"%da217">;
> +def da218 : NVPTXReg<"%da218">;
> +def da219 : NVPTXReg<"%da219">;
> +def da220 : NVPTXReg<"%da220">;
> +def da221 : NVPTXReg<"%da221">;
> +def da222 : NVPTXReg<"%da222">;
> +def da223 : NVPTXReg<"%da223">;
> +def da224 : NVPTXReg<"%da224">;
> +def da225 : NVPTXReg<"%da225">;
> +def da226 : NVPTXReg<"%da226">;
> +def da227 : NVPTXReg<"%da227">;
> +def da228 : NVPTXReg<"%da228">;
> +def da229 : NVPTXReg<"%da229">;
> +def da230 : NVPTXReg<"%da230">;
> +def da231 : NVPTXReg<"%da231">;
> +def da232 : NVPTXReg<"%da232">;
> +def da233 : NVPTXReg<"%da233">;
> +def da234 : NVPTXReg<"%da234">;
> +def da235 : NVPTXReg<"%da235">;
> +def da236 : NVPTXReg<"%da236">;
> +def da237 : NVPTXReg<"%da237">;
> +def da238 : NVPTXReg<"%da238">;
> +def da239 : NVPTXReg<"%da239">;
> +def da240 : NVPTXReg<"%da240">;
> +def da241 : NVPTXReg<"%da241">;
> +def da242 : NVPTXReg<"%da242">;
> +def da243 : NVPTXReg<"%da243">;
> +def da244 : NVPTXReg<"%da244">;
> +def da245 : NVPTXReg<"%da245">;
> +def da246 : NVPTXReg<"%da246">;
> +def da247 : NVPTXReg<"%da247">;
> +def da248 : NVPTXReg<"%da248">;
> +def da249 : NVPTXReg<"%da249">;
> +def da250 : NVPTXReg<"%da250">;
> +def da251 : NVPTXReg<"%da251">;
> +def da252 : NVPTXReg<"%da252">;
> +def da253 : NVPTXReg<"%da253">;
> +def da254 : NVPTXReg<"%da254">;
> +def da255 : NVPTXReg<"%da255">;
> +def da256 : NVPTXReg<"%da256">;
> +def da257 : NVPTXReg<"%da257">;
> +def da258 : NVPTXReg<"%da258">;
> +def da259 : NVPTXReg<"%da259">;
> +def da260 : NVPTXReg<"%da260">;
> +def da261 : NVPTXReg<"%da261">;
> +def da262 : NVPTXReg<"%da262">;
> +def da263 : NVPTXReg<"%da263">;
> +def da264 : NVPTXReg<"%da264">;
> +def da265 : NVPTXReg<"%da265">;
> +def da266 : NVPTXReg<"%da266">;
> +def da267 : NVPTXReg<"%da267">;
> +def da268 : NVPTXReg<"%da268">;
> +def da269 : NVPTXReg<"%da269">;
> +def da270 : NVPTXReg<"%da270">;
> +def da271 : NVPTXReg<"%da271">;
> +def da272 : NVPTXReg<"%da272">;
> +def da273 : NVPTXReg<"%da273">;
> +def da274 : NVPTXReg<"%da274">;
> +def da275 : NVPTXReg<"%da275">;
> +def da276 : NVPTXReg<"%da276">;
> +def da277 : NVPTXReg<"%da277">;
> +def da278 : NVPTXReg<"%da278">;
> +def da279 : NVPTXReg<"%da279">;
> +def da280 : NVPTXReg<"%da280">;
> +def da281 : NVPTXReg<"%da281">;
> +def da282 : NVPTXReg<"%da282">;
> +def da283 : NVPTXReg<"%da283">;
> +def da284 : NVPTXReg<"%da284">;
> +def da285 : NVPTXReg<"%da285">;
> +def da286 : NVPTXReg<"%da286">;
> +def da287 : NVPTXReg<"%da287">;
> +def da288 : NVPTXReg<"%da288">;
> +def da289 : NVPTXReg<"%da289">;
> +def da290 : NVPTXReg<"%da290">;
> +def da291 : NVPTXReg<"%da291">;
> +def da292 : NVPTXReg<"%da292">;
> +def da293 : NVPTXReg<"%da293">;
> +def da294 : NVPTXReg<"%da294">;
> +def da295 : NVPTXReg<"%da295">;
> +def da296 : NVPTXReg<"%da296">;
> +def da297 : NVPTXReg<"%da297">;
> +def da298 : NVPTXReg<"%da298">;
> +def da299 : NVPTXReg<"%da299">;
> +def da300 : NVPTXReg<"%da300">;
> +def da301 : NVPTXReg<"%da301">;
> +def da302 : NVPTXReg<"%da302">;
> +def da303 : NVPTXReg<"%da303">;
> +def da304 : NVPTXReg<"%da304">;
> +def da305 : NVPTXReg<"%da305">;
> +def da306 : NVPTXReg<"%da306">;
> +def da307 : NVPTXReg<"%da307">;
> +def da308 : NVPTXReg<"%da308">;
> +def da309 : NVPTXReg<"%da309">;
> +def da310 : NVPTXReg<"%da310">;
> +def da311 : NVPTXReg<"%da311">;
> +def da312 : NVPTXReg<"%da312">;
> +def da313 : NVPTXReg<"%da313">;
> +def da314 : NVPTXReg<"%da314">;
> +def da315 : NVPTXReg<"%da315">;
> +def da316 : NVPTXReg<"%da316">;
> +def da317 : NVPTXReg<"%da317">;
> +def da318 : NVPTXReg<"%da318">;
> +def da319 : NVPTXReg<"%da319">;
> +def da320 : NVPTXReg<"%da320">;
> +def da321 : NVPTXReg<"%da321">;
> +def da322 : NVPTXReg<"%da322">;
> +def da323 : NVPTXReg<"%da323">;
> +def da324 : NVPTXReg<"%da324">;
> +def da325 : NVPTXReg<"%da325">;
> +def da326 : NVPTXReg<"%da326">;
> +def da327 : NVPTXReg<"%da327">;
> +def da328 : NVPTXReg<"%da328">;
> +def da329 : NVPTXReg<"%da329">;
> +def da330 : NVPTXReg<"%da330">;
> +def da331 : NVPTXReg<"%da331">;
> +def da332 : NVPTXReg<"%da332">;
> +def da333 : NVPTXReg<"%da333">;
> +def da334 : NVPTXReg<"%da334">;
> +def da335 : NVPTXReg<"%da335">;
> +def da336 : NVPTXReg<"%da336">;
> +def da337 : NVPTXReg<"%da337">;
> +def da338 : NVPTXReg<"%da338">;
> +def da339 : NVPTXReg<"%da339">;
> +def da340 : NVPTXReg<"%da340">;
> +def da341 : NVPTXReg<"%da341">;
> +def da342 : NVPTXReg<"%da342">;
> +def da343 : NVPTXReg<"%da343">;
> +def da344 : NVPTXReg<"%da344">;
> +def da345 : NVPTXReg<"%da345">;
> +def da346 : NVPTXReg<"%da346">;
> +def da347 : NVPTXReg<"%da347">;
> +def da348 : NVPTXReg<"%da348">;
> +def da349 : NVPTXReg<"%da349">;
> +def da350 : NVPTXReg<"%da350">;
> +def da351 : NVPTXReg<"%da351">;
> +def da352 : NVPTXReg<"%da352">;
> +def da353 : NVPTXReg<"%da353">;
> +def da354 : NVPTXReg<"%da354">;
> +def da355 : NVPTXReg<"%da355">;
> +def da356 : NVPTXReg<"%da356">;
> +def da357 : NVPTXReg<"%da357">;
> +def da358 : NVPTXReg<"%da358">;
> +def da359 : NVPTXReg<"%da359">;
> +def da360 : NVPTXReg<"%da360">;
> +def da361 : NVPTXReg<"%da361">;
> +def da362 : NVPTXReg<"%da362">;
> +def da363 : NVPTXReg<"%da363">;
> +def da364 : NVPTXReg<"%da364">;
> +def da365 : NVPTXReg<"%da365">;
> +def da366 : NVPTXReg<"%da366">;
> +def da367 : NVPTXReg<"%da367">;
> +def da368 : NVPTXReg<"%da368">;
> +def da369 : NVPTXReg<"%da369">;
> +def da370 : NVPTXReg<"%da370">;
> +def da371 : NVPTXReg<"%da371">;
> +def da372 : NVPTXReg<"%da372">;
> +def da373 : NVPTXReg<"%da373">;
> +def da374 : NVPTXReg<"%da374">;
> +def da375 : NVPTXReg<"%da375">;
> +def da376 : NVPTXReg<"%da376">;
> +def da377 : NVPTXReg<"%da377">;
> +def da378 : NVPTXReg<"%da378">;
> +def da379 : NVPTXReg<"%da379">;
> +def da380 : NVPTXReg<"%da380">;
> +def da381 : NVPTXReg<"%da381">;
> +def da382 : NVPTXReg<"%da382">;
> +def da383 : NVPTXReg<"%da383">;
> +def da384 : NVPTXReg<"%da384">;
> +def da385 : NVPTXReg<"%da385">;
> +def da386 : NVPTXReg<"%da386">;
> +def da387 : NVPTXReg<"%da387">;
> +def da388 : NVPTXReg<"%da388">;
> +def da389 : NVPTXReg<"%da389">;
> +def da390 : NVPTXReg<"%da390">;
> +def da391 : NVPTXReg<"%da391">;
> +def da392 : NVPTXReg<"%da392">;
> +def da393 : NVPTXReg<"%da393">;
> +def da394 : NVPTXReg<"%da394">;
> +def da395 : NVPTXReg<"%da395">;
> +
> +//===----------------------------------------------------------------------===//
> +//  Register classes
> +//===----------------------------------------------------------------------===//
> +def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>;
> +def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%u", 0, 395))>;
> +def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>;
> +def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>;
> +def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>;
> +def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 395))>;
> +def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 395))>;
> +def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 395))>;
> +def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 395))>;
> +def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 395))>;
> +def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
> +
> +// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
> +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
> +
> +class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
> +                       NVPTXRegClass sClass,
> +                       int e,
> +                       string n>
> +  : NVPTXRegClass<regTypes, alignment, regList>
> +{
> +  NVPTXRegClass scalarClass=sClass;
> +  int elems=e;
> +  string name=n;
> +}
> +def V2F32Regs
> +  : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
> +    Float32Regs, 2, ".v2.f32">;
> +def V4F32Regs
> +  : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
> +    Float32Regs, 4, ".v4.f32">;
> +def V2I32Regs
> +  : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
> +    Int32Regs, 2, ".v2.u32">;
> +def V4I32Regs
> +  : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
> +    Int32Regs, 4, ".v4.u32">;
> +def V2F64Regs
> +  : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
> +    Float64Regs, 2, ".v2.f64">;
> +def V2I64Regs
> +  : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
> +    Int64Regs, 2, ".v2.u64">;
> +def V2I16Regs
> +  : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
> +    Int16Regs, 2, ".v2.u16">;
> +def V4I16Regs
> +  : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
> +    Int16Regs, 4, ".v4.u16">;
> +def V2I8Regs
> +  : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
> +    Int8Regs, 2, ".v2.u8">;
> +def V4I8Regs
> +  : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
> +    Int8Regs, 4, ".v4.u8">;
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXSection.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXSection.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXSection.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXSection.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,45 @@
> +//===- NVPTXSection.h - NVPTX-specific section representation -*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file declares the NVPTXSection class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_NVPTXSECTION_H
> +#define LLVM_NVPTXSECTION_H
> +
> +#include "llvm/MC/MCSection.h"
> +#include "llvm/GlobalVariable.h"
> +#include <vector>
> +
> +namespace llvm {
> +/// NVPTXSection - Represents a section in PTX
> +/// PTX does not have sections. We create this class in order to use
> +/// the ASMPrint interface.
> +///
> +class NVPTXSection : public MCSection {
> +
> +public:
> +  NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
> +  ~NVPTXSection() {};
> +
> +  /// Override this as NVPTX has its own way of printing switching
> +  /// to a section.
> +  virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
> +                                    raw_ostream &OS) const {}
> +
> +  /// Base address of PTX sections is zero.
> +  virtual bool isBaseAddressKnownZero() const { return true; }
> +  virtual bool UseCodeAlign() const { return false; }
> +  virtual bool isVirtualSection() const { return false; }
> +};
> +
> +} // end namespace llvm
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,77 @@
> +//===- NVPTXSplitBBatBar.cpp - Split BB at Barrier  --*- C++ -*--===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +// Split basic blocks so that a basic block that contains a barrier instruction
> +// only contains the barrier instruction.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/Function.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/Intrinsics.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Support/InstIterator.h"
> +#include "NVPTXUtilities.h"
> +#include "NVPTXSplitBBatBar.h"
> +
> +using namespace llvm;
> +
> +namespace llvm {
> +FunctionPass *createSplitBBatBarPass();
> +}
> +
> +char NVPTXSplitBBatBar::ID = 0;
> +
> +bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
> +
> +  SmallVector<Instruction *, 4> SplitPoints;
> +  bool changed = false;
> +
> +  // Collect all the split points in SplitPoints
> +  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
> +    BasicBlock::iterator IB = BI->begin();
> +    BasicBlock::iterator II = IB;
> +    BasicBlock::iterator IE = BI->end();
> +
> +    // Skit the first intruction. No splitting is needed at this
> +    // point even if this is a bar.
> +    while (II != IE) {
> +      if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) {
> +        Intrinsic::ID id = inst->getIntrinsicID();
> +        // If this is a barrier, split at this instruction
> +        // and the next instruction.
> +        if (llvm::isBarrierIntrinsic(id)) {
> +          if (II != IB)
> +            SplitPoints.push_back(II);
> +          II++;
> +          if ((II != IE) && (!II->isTerminator())) {
> +            SplitPoints.push_back(II);
> +            II++;
> +          }
> +          continue;
> +        }
> +      }
> +      II++;
> +    }
> +  }
> +
> +  for (unsigned i = 0; i != SplitPoints.size(); i++) {
> +    changed = true;
> +    Instruction *inst = SplitPoints[i];
> +    inst->getParent()->splitBasicBlock(inst, "bar_split");
> +  }
> +
> +  return changed;
> +}
> +
> +// This interface will most likely not be necessary, because this pass will
> +// not be invoked by the driver, but will be used as a prerequisite to
> +// another pass.
> +FunctionPass *llvm::createSplitBBatBarPass() {
> +  return new NVPTXSplitBBatBar();
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXSplitBBatBar.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,41 @@
> +//===-- llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h ---------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the declaration of the NVIDIA specific declarations
> +// for splitting basic blocks at barrier instructions.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTX_SPLIT_BB_AT_BAR_H
> +#define NVPTX_SPLIT_BB_AT_BAR_H
> +
> +#include "llvm/Pass.h"
> +#include "llvm/CodeGen/MachineFunctionAnalysis.h"
> +
> +namespace llvm {
> +
> +// actual analysis class, which is a functionpass
> +struct NVPTXSplitBBatBar : public FunctionPass {
> +  static char ID;
> +
> +  NVPTXSplitBBatBar() : FunctionPass(ID) {}
> +  void getAnalysisUsage(AnalysisUsage &AU) const {
> +    AU.addPreserved<MachineFunctionAnalysis>();
> +  }
> +  virtual bool runOnFunction(Function &F);
> +
> +  virtual const char *getPassName() const {
> +    return "Split basic blocks at barrier";
> +  }
> +};
> +
> +extern FunctionPass *createSplitBBatBarPass();
> +}
> +
> +#endif //NVPTX_SPLIT_BB_AT_BAR_H
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,57 @@
> +//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file implements the NVPTX specific subclass of TargetSubtarget.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTXSubtarget.h"
> +#define GET_SUBTARGETINFO_ENUM
> +#define GET_SUBTARGETINFO_TARGET_DESC
> +#define GET_SUBTARGETINFO_CTOR
> +#include "NVPTXGenSubtargetInfo.inc"
> +
> +using namespace llvm;
> +
> +// Select Driver Interface
> +#include "llvm/Support/CommandLine.h"
> +namespace {
> +cl::opt<NVPTX::DrvInterface>
> +DriverInterface(cl::desc("Choose driver interface:"),
> +                cl::values(
> +                    clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
> +                    clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
> +                    clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
> +                    clEnumValEnd),
> +                    cl::init(NVPTX::NVCL));
> +}
> +
> +NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
> +                               const std::string &FS, bool is64Bit)
> +:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget,
> + // because we don't register all
> + // nvptx targets.
> + Is64Bit(is64Bit) {
> +
> +  drvInterface = DriverInterface;
> +
> +  // Provide the default CPU if none
> +  std::string defCPU = "sm_10";
> +
> +  // Get the TargetName from the FS if available
> +  if (FS.empty() && CPU.empty())
> +    TargetName = defCPU;
> +  else if (!CPU.empty())
> +    TargetName = CPU;
> +  else
> +    llvm_unreachable("we are not using FeatureStr");
> +
> +  // Set up the SmVersion
> +  SmVersion = atoi(TargetName.c_str()+3);
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXSubtarget.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,92 @@
> +//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file declares the NVPTX specific subclass of TargetSubtarget.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXSUBTARGET_H
> +#define NVPTXSUBTARGET_H
> +
> +#include "llvm/Target/TargetSubtargetInfo.h"
> +#include "NVPTX.h"
> +
> +#define GET_SUBTARGETINFO_HEADER
> +#include "NVPTXGenSubtargetInfo.inc"
> +
> +#include <string>
> +
> +namespace llvm {
> +
> +class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
> +
> +  unsigned int SmVersion;
> +  std::string TargetName;
> +  NVPTX::DrvInterface drvInterface;
> +  bool dummy; // For the 'dummy' feature, see NVPTX.td
> +  bool Is64Bit;
> +
> +public:
> +  /// This constructor initializes the data members to match that
> +  /// of the specified module.
> +  ///
> +  NVPTXSubtarget(const std::string &TT, const std::string &CPU,
> +                 const std::string &FS, bool is64Bit);
> +
> +  bool hasBrkPt() const { return SmVersion >= 11; }
> +  bool hasAtomRedG32() const { return SmVersion >= 11; }
> +  bool hasAtomRedS32() const { return SmVersion >= 12; }
> +  bool hasAtomRedG64() const { return SmVersion >= 12; }
> +  bool hasAtomRedS64() const { return SmVersion >= 20; }
> +  bool hasAtomRedGen32() const { return SmVersion >= 20; }
> +  bool hasAtomRedGen64() const { return SmVersion >= 20; }
> +  bool hasAtomAddF32() const { return SmVersion >= 20; }
> +  bool hasVote() const { return SmVersion >= 12; }
> +  bool hasDouble() const { return SmVersion >= 13; }
> +  bool reqPTX20() const { return SmVersion >= 20; }
> +  bool hasF32FTZ() const { return SmVersion >= 20; }
> +  bool hasFMAF32() const { return SmVersion >= 20; }
> +  bool hasFMAF64() const { return SmVersion >= 13; }
> +  bool hasLDU() const { return SmVersion >= 20; }
> +  bool hasGenericLdSt() const { return SmVersion >= 20; }
> +  inline bool hasHWROT32() const { return false; }
> +  inline bool hasSWROT32() const {
> +    return true;
> +  }
> +  inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
> +  inline bool hasROT64() const { return SmVersion >= 20; }
> +
> +
> +  bool is64Bit() const { return Is64Bit; }
> +
> +  unsigned int getSmVersion() const { return SmVersion; }
> +  NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
> +  std::string getTargetName() const { return TargetName; }
> +
> +  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
> +
> +  std::string getDataLayout() const {
> +    const char *p;
> +    if (is64Bit())
> +      p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
> +          "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
> +          "n16:32:64";
> +    else
> +      p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
> +          "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
> +          "n16:32:64";
> +
> +    return std::string(p);
> +  }
> +
> +};
> +
> +} // End llvm namespace
> +
> +#endif  // NVPTXSUBTARGET_H
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,133 @@
> +//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Top-level implementation for the NVPTX target.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTXTargetMachine.h"
> +#include "NVPTX.h"
> +#include "NVPTXSplitBBatBar.h"
> +#include "NVPTXLowerAggrCopies.h"
> +#include "MCTargetDesc/NVPTXMCAsmInfo.h"
> +#include "NVPTXAllocaHoisting.h"
> +#include "llvm/PassManager.h"
> +#include "llvm/Analysis/Passes.h"
> +#include "llvm/Analysis/Verifier.h"
> +#include "llvm/Assembly/PrintModulePass.h"
> +#include "llvm/ADT/OwningPtr.h"
> +#include "llvm/CodeGen/AsmPrinter.h"
> +#include "llvm/CodeGen/MachineFunctionAnalysis.h"
> +#include "llvm/CodeGen/MachineModuleInfo.h"
> +#include "llvm/CodeGen/Passes.h"
> +#include "llvm/MC/MCAsmInfo.h"
> +#include "llvm/MC/MCInstrInfo.h"
> +#include "llvm/MC/MCStreamer.h"
> +#include "llvm/MC/MCSubtargetInfo.h"
> +#include "llvm/Support/TargetRegistry.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/Target/TargetData.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +#include "llvm/Target/TargetLowering.h"
> +#include "llvm/Target/TargetLoweringObjectFile.h"
> +#include "llvm/Target/TargetMachine.h"
> +#include "llvm/Target/TargetOptions.h"
> +#include "llvm/Target/TargetRegisterInfo.h"
> +#include "llvm/Target/TargetSubtargetInfo.h"
> +#include "llvm/Transforms/Scalar.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/FormattedStream.h"
> +#include "llvm/Support/TargetRegistry.h"
> +
> +
> +using namespace llvm;
> +
> +
> +extern "C" void LLVMInitializeNVPTXTarget() {
> +  // Register the target.
> +  RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
> +  RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
> +
> +  RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
> +  RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
> +
> +}
> +
> +NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
> +                                       StringRef TT,
> +                                       StringRef CPU,
> +                                       StringRef FS,
> +                                       const TargetOptions& Options,
> +                                       Reloc::Model RM,
> +                                       CodeModel::Model CM,
> +                                       CodeGenOpt::Level OL,
> +                                       bool is64bit)
> +: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
> +  Subtarget(TT, CPU, FS, is64bit),
> +  DataLayout(Subtarget.getDataLayout()),
> +  InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
> +/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
> +}
> +
> +
> +
> +void NVPTXTargetMachine32::anchor() {}
> +
> +NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
> +                                           StringRef CPU, StringRef FS,
> +                                           const TargetOptions &Options,
> +                                           Reloc::Model RM, CodeModel::Model CM,
> +                                           CodeGenOpt::Level OL)
> +: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
> +}
> +
> +void NVPTXTargetMachine64::anchor() {}
> +
> +NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
> +                                           StringRef CPU, StringRef FS,
> +                                           const TargetOptions &Options,
> +                                           Reloc::Model RM, CodeModel::Model CM,
> +                                           CodeGenOpt::Level OL)
> +: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
> +}
> +
> +
> +namespace llvm {
> +class NVPTXPassConfig : public TargetPassConfig {
> +public:
> +  NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
> +  : TargetPassConfig(TM, PM) {}
> +
> +  NVPTXTargetMachine &getNVPTXTargetMachine() const {
> +    return getTM<NVPTXTargetMachine>();
> +  }
> +
> +  virtual bool addInstSelector();
> +  virtual bool addPreRegAlloc();
> +};
> +}
> +
> +TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
> +  NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
> +  return PassConfig;
> +}
> +
> +bool NVPTXPassConfig::addInstSelector() {
> +  PM->add(createLowerAggrCopies());
> +  PM->add(createSplitBBatBarPass());
> +  PM->add(createAllocaHoisting());
> +  PM->add(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
> +  PM->add(createVectorElementizePass(getNVPTXTargetMachine()));
> +  return false;
> +}
> +
> +bool NVPTXPassConfig::addPreRegAlloc() {
> +  return false;
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,131 @@
> +//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file declares the NVPTX specific subclass of TargetMachine.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +
> +#ifndef NVPTX_TARGETMACHINE_H
> +#define NVPTX_TARGETMACHINE_H
> +
> +#include "NVPTXInstrInfo.h"
> +#include "NVPTXISelLowering.h"
> +#include "NVPTXRegisterInfo.h"
> +#include "NVPTXSubtarget.h"
> +#include "NVPTXFrameLowering.h"
> +#include "ManagedStringPool.h"
> +#include "llvm/Target/TargetData.h"
> +#include "llvm/Target/TargetFrameLowering.h"
> +#include "llvm/Target/TargetMachine.h"
> +#include "llvm/Target/TargetSelectionDAGInfo.h"
> +
> +namespace llvm {
> +
> +/// NVPTXTargetMachine
> +///
> +class NVPTXTargetMachine : public LLVMTargetMachine {
> +  NVPTXSubtarget        Subtarget;
> +  const TargetData      DataLayout;       // Calculates type size & alignment
> +  NVPTXInstrInfo        InstrInfo;
> +  NVPTXTargetLowering   TLInfo;
> +  TargetSelectionDAGInfo   TSInfo;
> +
> +  // NVPTX does not have any call stack frame, but need a NVPTX specific
> +  // FrameLowering class because TargetFrameLowering is abstract.
> +  NVPTXFrameLowering       FrameLowering;
> +
> +  // Hold Strings that can be free'd all together with NVPTXTargetMachine
> +  ManagedStringPool     ManagedStrPool;
> +
> +  //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
> +  //                            bool DisableVerify, MCContext *&OutCtx);
> +
> +public:
> +  //virtual bool addPassesToEmitFile(PassManagerBase &PM,
> +  //                                 formatted_raw_ostream &Out,
> +  //                                 CodeGenFileType FileType,
> +  //                                 CodeGenOpt::Level OptLevel,
> +  //                                 bool DisableVerify = true) ;
> +
> +  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
> +                     StringRef FS, const TargetOptions &Options,
> +                     Reloc::Model RM, CodeModel::Model CM,
> +                     CodeGenOpt::Level OP,
> +                     bool is64bit);
> +
> +  virtual const TargetFrameLowering *getFrameLowering() const {
> +    return &FrameLowering;
> +  }
> +  virtual const NVPTXInstrInfo *getInstrInfo() const  { return &InstrInfo; }
> +  virtual const TargetData *getTargetData() const     { return &DataLayout;}
> +  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
> +
> +  virtual const NVPTXRegisterInfo *getRegisterInfo() const {
> +    return &(InstrInfo.getRegisterInfo());
> +  }
> +
> +  virtual NVPTXTargetLowering *getTargetLowering() const {
> +    return const_cast<NVPTXTargetLowering*>(&TLInfo);
> +  }
> +
> +  virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
> +    return &TSInfo;
> +  }
> +
> +  //virtual bool addInstSelector(PassManagerBase &PM,
> +  //                             CodeGenOpt::Level OptLevel);
> +
> +  //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
> +
> +  ManagedStringPool *getManagedStrPool() const {
> +    return const_cast<ManagedStringPool*>(&ManagedStrPool);
> +  }
> +
> +  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
> +
> +  // Emission of machine code through JITCodeEmitter is not supported.
> +  virtual bool addPassesToEmitMachineCode(PassManagerBase &,
> +                                          JITCodeEmitter &,
> +                                          bool = true) {
> +    return true;
> +  }
> +
> +  // Emission of machine code through MCJIT is not supported.
> +  virtual bool addPassesToEmitMC(PassManagerBase &,
> +                                 MCContext *&,
> +                                 raw_ostream &,
> +                                 bool = true) {
> +    return true;
> +  }
> +
> +}; // NVPTXTargetMachine.
> +
> +class NVPTXTargetMachine32 : public NVPTXTargetMachine {
> +  virtual void anchor();
> +public:
> +  NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU,
> +                       StringRef FS, const TargetOptions &Options,
> +                       Reloc::Model RM, CodeModel::Model CM,
> +                       CodeGenOpt::Level OL);
> +};
> +
> +class NVPTXTargetMachine64 : public NVPTXTargetMachine {
> +  virtual void anchor();
> +public:
> +  NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU,
> +                       StringRef FS, const TargetOptions &Options,
> +                       Reloc::Model RM, CodeModel::Model CM,
> +                       CodeGenOpt::Level OL);
> +};
> +
> +
> +} // end namespace llvm
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXTargetObjectFile.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetObjectFile.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXTargetObjectFile.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetObjectFile.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,105 @@
> +//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
> +#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
> +
> +#include "NVPTXSection.h"
> +#include "llvm/Target/TargetLoweringObjectFile.h"
> +#include <string>
> +
> +namespace llvm {
> +class GlobalVariable;
> +class Module;
> +
> +class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
> +
> +public:
> +  NVPTXTargetObjectFile() {};
> +  ~NVPTXTargetObjectFile() {
> +    delete TextSection;
> +    delete DataSection;
> +    delete BSSSection;
> +    delete ReadOnlySection;
> +
> +    delete StaticCtorSection;
> +    delete StaticDtorSection;
> +    delete LSDASection;
> +    delete EHFrameSection;
> +    delete DwarfAbbrevSection;
> +    delete DwarfInfoSection;
> +    delete DwarfLineSection;
> +    delete DwarfFrameSection;
> +    delete DwarfPubTypesSection;
> +    delete DwarfDebugInlineSection;
> +    delete DwarfStrSection;
> +    delete DwarfLocSection;
> +    delete DwarfARangesSection;
> +    delete DwarfRangesSection;
> +    delete DwarfMacroInfoSection;
> +  };
> +
> +  virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
> +    TextSection = new NVPTXSection(MCSection::SV_ELF,
> +                                   SectionKind::getText());
> +    DataSection = new NVPTXSection(MCSection::SV_ELF,
> +                                   SectionKind::getDataRel());
> +    BSSSection = new NVPTXSection(MCSection::SV_ELF,
> +                                  SectionKind::getBSS());
> +    ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
> +                                       SectionKind::getReadOnly());
> +
> +    StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
> +                                         SectionKind::getMetadata());
> +    StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
> +                                         SectionKind::getMetadata());
> +    LSDASection = new NVPTXSection(MCSection::SV_ELF,
> +                                   SectionKind::getMetadata());
> +    EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
> +                                      SectionKind::getMetadata());
> +    DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
> +                                          SectionKind::getMetadata());
> +    DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
> +                                        SectionKind::getMetadata());
> +    DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
> +                                        SectionKind::getMetadata());
> +    DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
> +                                         SectionKind::getMetadata());
> +    DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
> +                                            SectionKind::getMetadata());
> +    DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
> +                                               SectionKind::getMetadata());
> +    DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
> +                                       SectionKind::getMetadata());
> +    DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
> +                                       SectionKind::getMetadata());
> +    DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
> +                                           SectionKind::getMetadata());
> +    DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
> +                                          SectionKind::getMetadata());
> +    DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
> +                                             SectionKind::getMetadata());
> +  };
> +
> +  virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
> +    return ReadOnlySection;
> +  };
> +
> +  virtual const MCSection *
> +  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
> +                           Mangler *Mang,
> +                           const TargetMachine &TM) const {
> +    return DataSection;
> +  };
> +
> +};
> +
> +} // end namespace llvm
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,514 @@
> +//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains miscellaneous utility functions
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTXUtilities.h"
> +#include "NVPTX.h"
> +#include "llvm/GlobalVariable.h"
> +#include "llvm/Function.h"
> +#include "llvm/Module.h"
> +#include "llvm/Constants.h"
> +#include "llvm/Operator.h"
> +#include <algorithm>
> +#include <cstring>
> +#include <map>
> +#include <string>
> +#include <vector>
> +//#include <iostream>
> +#include "llvm/Support/ManagedStatic.h"
> +#include "llvm/Support/InstIterator.h"
> +
> +using namespace llvm;
> +
> +typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
> +typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
> +typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
> +
> +ManagedStatic<per_module_annot_t> annotationCache;
> +
> +
> +static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
> +  assert(md && "Invalid mdnode for annotation");
> +  assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
> +  // start index = 1, to skip the global variable key
> +  // increment = 2, to skip the value for each property-value pairs
> +  for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) {
> +    // property
> +    const MDString *prop = dyn_cast<MDString>(md->getOperand(i));
> +    assert(prop && "Annotation property not a string");
> +
> +    // value
> +    ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
> +    assert(Val && "Value operand not a constant int");
> +
> +    std::string keyname = prop->getString().str();
> +    if (retval.find(keyname) != retval.end())
> +      retval[keyname].push_back(Val->getZExtValue());
> +    else {
> +      std::vector<unsigned> tmp;
> +      tmp.push_back(Val->getZExtValue());
> +      retval[keyname] = tmp;
> +    }
> +  }
> +}
> +
> +static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
> +  NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
> +  if (!NMD)
> +    return;
> +  key_val_pair_t tmp;
> +  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
> +    const MDNode *elem = NMD->getOperand(i);
> +
> +    Value *entity = elem->getOperand(0);
> +    // entity may be null due to DCE
> +    if (!entity)
> +      continue;
> +    if (entity != gv)
> +      continue;
> +
> +    // accumulate annotations for entity in tmp
> +    cacheAnnotationFromMD(elem, tmp);
> +  }
> +
> +  if (tmp.empty()) // no annotations for this gv
> +    return;
> +
> +  if ((*annotationCache).find(m) != (*annotationCache).end())
> +    (*annotationCache)[m][gv] = tmp;
> +  else {
> +    global_val_annot_t tmp1;
> +    tmp1[gv] = tmp;
> +    (*annotationCache)[m] = tmp1;
> +  }
> +}
> +
> +bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
> +                                 unsigned &retval) {
> +  const Module *m = gv->getParent();
> +  if ((*annotationCache).find(m) == (*annotationCache).end())
> +    cacheAnnotationFromMD(m, gv);
> +  else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
> +    cacheAnnotationFromMD(m, gv);
> +  if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
> +    return false;
> +  retval = (*annotationCache)[m][gv][prop][0];
> +  return true;
> +}
> +
> +bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
> +                                 std::vector<unsigned> &retval) {
> +  const Module *m = gv->getParent();
> +  if ((*annotationCache).find(m) == (*annotationCache).end())
> +    cacheAnnotationFromMD(m, gv);
> +  else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
> +    cacheAnnotationFromMD(m, gv);
> +  if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
> +    return false;
> +  retval = (*annotationCache)[m][gv][prop];
> +  return true;
> +}
> +
> +bool llvm::isTexture(const llvm::Value &val) {
> +  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
> +    unsigned annot;
> +    if (llvm::findOneNVVMAnnotation(gv,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
> +                                   annot)) {
> +      assert((annot == 1) && "Unexpected annotation on a texture symbol");
> +      return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +bool llvm::isSurface(const llvm::Value &val) {
> +  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
> +    unsigned annot;
> +    if (llvm::findOneNVVMAnnotation(gv,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
> +                                   annot)) {
> +      assert((annot == 1) && "Unexpected annotation on a surface symbol");
> +      return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +bool llvm::isSampler(const llvm::Value &val) {
> +  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
> +    unsigned annot;
> +    if (llvm::findOneNVVMAnnotation(gv,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
> +                                   annot)) {
> +      assert((annot == 1) && "Unexpected annotation on a sampler symbol");
> +      return true;
> +    }
> +  }
> +  if (const Argument *arg = dyn_cast<Argument>(&val)) {
> +    const Function *func = arg->getParent();
> +    std::vector<unsigned> annot;
> +    if (llvm::findAllNVVMAnnotation(func,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
> +                                   annot)) {
> +      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
> +        return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +bool llvm::isImageReadOnly(const llvm::Value &val) {
> +  if (const Argument *arg = dyn_cast<Argument>(&val)) {
> +    const Function *func = arg->getParent();
> +    std::vector<unsigned> annot;
> +    if (llvm::findAllNVVMAnnotation(func,
> +          llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
> +                                   annot)) {
> +      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
> +        return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +bool llvm::isImageWriteOnly(const llvm::Value &val) {
> +  if (const Argument *arg = dyn_cast<Argument>(&val)) {
> +    const Function *func = arg->getParent();
> +    std::vector<unsigned> annot;
> +    if (llvm::findAllNVVMAnnotation(func,
> +         llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
> +                                   annot)) {
> +      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
> +        return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +bool llvm::isImage(const llvm::Value &val) {
> +  return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val);
> +}
> +
> +std::string llvm::getTextureName(const llvm::Value &val) {
> +  assert(val.hasName() && "Found texture variable with no name");
> +  return val.getName();
> +}
> +
> +std::string llvm::getSurfaceName(const llvm::Value &val) {
> +  assert(val.hasName() && "Found surface variable with no name");
> +  return val.getName();
> +}
> +
> +std::string llvm::getSamplerName(const llvm::Value &val) {
> +  assert(val.hasName() && "Found sampler variable with no name");
> +  return val.getName();
> +}
> +
> +bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
> +  return (llvm::findOneNVVMAnnotation(&F,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
> +                                      x));
> +}
> +
> +bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
> +  return (llvm::findOneNVVMAnnotation(&F,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
> +                                      y));
> +}
> +
> +bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
> +  return (llvm::findOneNVVMAnnotation(&F,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
> +                                      z));
> +}
> +
> +bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
> +  return (llvm::findOneNVVMAnnotation(&F,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
> +                                      x));
> +}
> +
> +bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
> +  return (llvm::findOneNVVMAnnotation(&F,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
> +                                      y));
> +}
> +
> +bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
> +  return (llvm::findOneNVVMAnnotation(&F,
> +                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
> +                                      z));
> +}
> +
> +bool llvm::getMinCTASm(const Function &F, unsigned &x) {
> +  return (llvm::findOneNVVMAnnotation(&F,
> +                    llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
> +                                      x));
> +}
> +
> +bool llvm::isKernelFunction(const Function &F) {
> +  unsigned x = 0;
> +  bool retval = llvm::findOneNVVMAnnotation(&F,
> +               llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
> +                                            x);
> +  if (retval == false) {
> +    // There is no NVVM metadata, check the calling convention
> +    if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
> +      return true;
> +    else
> +      return false;
> +  }
> +  return (x==1);
> +}
> +
> +bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
> +  std::vector<unsigned> Vs;
> +  bool retval = llvm::findAllNVVMAnnotation(&F,
> +                           llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
> +                                            Vs);
> +  if (retval == false)
> +    return false;
> +  for (int i=0, e=Vs.size(); i<e; i++) {
> +    unsigned v = Vs[i];
> +    if ( (v >> 16) == index ) {
> +      align =  v & 0xFFFF;
> +      return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
> +  if (MDNode *alignNode = I.getMetadata("callalign")) {
> +    for (int i=0, n = alignNode->getNumOperands();
> +        i<n; i++) {
> +      if (const ConstantInt *CI =
> +          dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
> +        unsigned v = CI->getZExtValue();
> +        if ( (v>>16) == index ) {
> +          align = v & 0xFFFF;
> +          return true;
> +        }
> +        if ( (v>>16) > index ) {
> +          return false;
> +        }
> +      }
> +    }
> +  }
> +  return false;
> +}
> +
> +bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
> +  if ((id == Intrinsic::nvvm_barrier0) ||
> +      (id == Intrinsic::nvvm_barrier0_popc) ||
> +      (id == Intrinsic::nvvm_barrier0_and) ||
> +      (id == Intrinsic::nvvm_barrier0_or) ||
> +      (id == Intrinsic::cuda_syncthreads))
> +    return true;
> +  return false;
> +}
> +
> +// Interface for checking all memory space transfer related intrinsics
> +bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
> +  if (id == Intrinsic::nvvm_ptr_local_to_gen ||
> +      id == Intrinsic::nvvm_ptr_shared_to_gen ||
> +      id == Intrinsic::nvvm_ptr_global_to_gen ||
> +      id == Intrinsic::nvvm_ptr_constant_to_gen ||
> +      id == Intrinsic::nvvm_ptr_gen_to_global ||
> +      id == Intrinsic::nvvm_ptr_gen_to_shared ||
> +      id == Intrinsic::nvvm_ptr_gen_to_local ||
> +      id == Intrinsic::nvvm_ptr_gen_to_constant ||
> +      id == Intrinsic::nvvm_ptr_gen_to_param) {
> +    return true;
> +  }
> +
> +  return false;
> +}
> +
> +// consider several special intrinsics in striping pointer casts, and
> +// provide an option to ignore GEP indicies for find out the base address only
> +// which could be used in simple alias disambigurate.
> +const Value *llvm::skipPointerTransfer(const Value *V,
> +                                       bool ignore_GEP_indices) {
> +  V = V->stripPointerCasts();
> +  while (true) {
> +    if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
> +      if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
> +        V = IS->getArgOperand(0)->stripPointerCasts();
> +        continue;
> +      }
> +    } else if (ignore_GEP_indices)
> +      if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
> +        V = GEP->getPointerOperand()->stripPointerCasts();
> +        continue;
> +      }
> +    break;
> +  }
> +  return V;
> +}
> +
> +// consider several special intrinsics in striping pointer casts, and
> +// - ignore GEP indicies for find out the base address only, and
> +// - tracking PHINode
> +// which could be used in simple alias disambigurate.
> +const Value *llvm::skipPointerTransfer(const Value *V,
> +                                       std::set<const Value *> &processed) {
> +  if (processed.find(V) != processed.end())
> +    return NULL;
> +  processed.insert(V);
> +
> +  const Value *V2 = V->stripPointerCasts();
> +  if (V2 != V && processed.find(V2) != processed.end())
> +    return NULL;
> +  processed.insert(V2);
> +
> +  V = V2;
> +
> +  while (true) {
> +    if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
> +      if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
> +        V = IS->getArgOperand(0)->stripPointerCasts();
> +        continue;
> +      }
> +    } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
> +      V = GEP->getPointerOperand()->stripPointerCasts();
> +      continue;
> +    } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
> +      if (V != V2 && processed.find(V) != processed.end())
> +        return NULL;
> +      processed.insert(PN);
> +      const Value *common = 0;
> +      for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
> +        const Value *pv = PN->getIncomingValue(i);
> +        const Value *base = skipPointerTransfer(pv, processed);
> +        if (base) {
> +          if (common == 0)
> +            common = base;
> +          else if (common != base)
> +            return PN;
> +        }
> +      }
> +      if (common == 0)
> +        return PN;
> +      V = common;
> +    }
> +    break;
> +  }
> +  return V;
> +}
> +
> +
> +// The following are some useful utilities for debuggung
> +
> +BasicBlock *llvm::getParentBlock(Value *v) {
> +  if (BasicBlock *B = dyn_cast<BasicBlock>(v))
> +    return B;
> +
> +  if (Instruction *I = dyn_cast<Instruction>(v))
> +    return I->getParent();
> +
> +  return 0;
> +}
> +
> +Function *llvm::getParentFunction(Value *v) {
> +  if (Function *F = dyn_cast<Function>(v))
> +    return F;
> +
> +  if (Instruction *I = dyn_cast<Instruction>(v))
> +    return I->getParent()->getParent();
> +
> +  if (BasicBlock *B = dyn_cast<BasicBlock>(v))
> +    return B->getParent();
> +
> +  return 0;
> +}
> +
> +// Dump a block by name
> +void llvm::dumpBlock(Value *v, char *blockName) {
> +  Function *F = getParentFunction(v);
> +  if (F == 0)
> +    return;
> +
> +  for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
> +    BasicBlock *B = it;
> +    if (strcmp(B->getName().data(), blockName) == 0) {
> +      B->dump();
> +      return;
> +    }
> +  }
> +}
> +
> +// Find an instruction by name
> +Instruction *llvm::getInst(Value *base, char *instName) {
> +  Function *F = getParentFunction(base);
> +  if (F == 0)
> +    return 0;
> +
> +  for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
> +    Instruction *I = &*it;
> +    if (strcmp(I->getName().data(), instName) == 0) {
> +      return I;
> +    }
> +  }
> +
> +  return 0;
> +}
> +
> +// Dump an instruction by nane
> +void llvm::dumpInst(Value *base, char *instName) {
> +  Instruction *I = getInst(base, instName);
> +  if (I)
> +    I->dump();
> +}
> +
> +// Dump an instruction and all dependent instructions
> +void llvm::dumpInstRec(Value *v, std::set<Instruction *> *visited) {
> +  if (Instruction *I = dyn_cast<Instruction>(v)) {
> +
> +    if (visited->find(I) != visited->end())
> +      return;
> +
> +    visited->insert(I);
> +
> +    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
> +      dumpInstRec(I->getOperand(i), visited);
> +
> +    I->dump();
> +  }
> +}
> +
> +// Dump an instruction and all dependent instructions
> +void llvm::dumpInstRec(Value *v) {
> +  std::set<Instruction *> visited;
> +
> +  //BasicBlock *B = getParentBlock(v);
> +
> +  dumpInstRec(v, &visited);
> +}
> +
> +// Dump the parent for Instruction, block or function
> +void llvm::dumpParent(Value *v) {
> +  if (Instruction *I = dyn_cast<Instruction>(v)) {
> +    I->getParent()->dump();
> +    return;
> +  }
> +
> +  if (BasicBlock *B = dyn_cast<BasicBlock>(v)) {
> +    B->getParent()->dump();
> +    return;
> +  }
> +
> +  if (Function *F = dyn_cast<Function>(v)) {
> +    F->getParent()->dump();
> +    return;
> +  }
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXUtilities.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,94 @@
> +//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the declaration of the NVVM specific utility functions.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTXUTILITIES_H
> +#define NVPTXUTILITIES_H
> +
> +#include "llvm/Value.h"
> +#include "llvm/GlobalVariable.h"
> +#include "llvm/Function.h"
> +#include "llvm/IntrinsicInst.h"
> +#include <cstdarg>
> +#include <set>
> +#include <string>
> +#include <vector>
> +
> +namespace llvm
> +{
> +
> +#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
> +#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
> +
> +bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);
> +bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,
> +                           std::vector<unsigned> &);
> +
> +bool isTexture(const llvm::Value &);
> +bool isSurface(const llvm::Value &);
> +bool isSampler(const llvm::Value &);
> +bool isImage(const llvm::Value &);
> +bool isImageReadOnly(const llvm::Value &);
> +bool isImageWriteOnly(const llvm::Value &);
> +
> +std::string getTextureName(const llvm::Value &);
> +std::string getSurfaceName(const llvm::Value &);
> +std::string getSamplerName(const llvm::Value &);
> +
> +bool getMaxNTIDx(const llvm::Function &, unsigned &);
> +bool getMaxNTIDy(const llvm::Function &, unsigned &);
> +bool getMaxNTIDz(const llvm::Function &, unsigned &);
> +
> +bool getReqNTIDx(const llvm::Function &, unsigned &);
> +bool getReqNTIDy(const llvm::Function &, unsigned &);
> +bool getReqNTIDz(const llvm::Function &, unsigned &);
> +
> +bool getMinCTASm(const llvm::Function &, unsigned &);
> +bool isKernelFunction(const llvm::Function &);
> +
> +bool getAlign(const llvm::Function &, unsigned index, unsigned &);
> +bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
> +
> +bool isBarrierIntrinsic(llvm::Intrinsic::ID);
> +
> +/// make_vector - Helper function which is useful for building temporary vectors
> +/// to pass into type construction of CallInst ctors.  This turns a null
> +/// terminated list of pointers (or other value types) into a real live vector.
> +///
> +template<typename T>
> +inline std::vector<T> make_vector(T A, ...) {
> +  va_list Args;
> +  va_start(Args, A);
> +  std::vector<T> Result;
> +  Result.push_back(A);
> +  while (T Val = va_arg(Args, T))
> +    Result.push_back(Val);
> +  va_end(Args);
> +  return Result;
> +}
> +
> +bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
> +const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
> +const Value *skipPointerTransfer(const Value *V,
> +                                 std::set<const Value *> &processed);
> +BasicBlock *getParentBlock(Value *v);
> +Function *getParentFunction(Value *v);
> +void dumpBlock(Value *v, char *blockName);
> +Instruction *getInst(Value *base, char *instName);
> +void dumpInst(Value *base, char *instName);
> +void dumpInstRec(Value *v, std::set<Instruction *> *visited);
> +void dumpInstRec(Value *v);
> +void dumpParent(Value *v);
> +
> +}
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXVector.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXVector.td?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXVector.td (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXVector.td Fri May  4 15:18:50 2012
> @@ -0,0 +1,1481 @@
> +//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +//-----------------------------------
> +// Vector Specific
> +//-----------------------------------
> +
> +//
> +// All vector instructions derive from NVPTXVecInst
> +//
> +
> +class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
> +  NVPTXInst sInst=NOP>
> +  : NVPTXInst<outs, ins, asmstr, pattern> {
> +  NVPTXInst scalarInst=sInst;
> +}
> +
> +let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
> +// Extract v2i16
> +def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
> +  (ins V2I16Regs:$src, i8imm:$c),
> +                         "mov.u16 \t$dst, $src${c:vecelem};",
> +                         [(set Int16Regs:$dst, (vector_extract
> +                           (v2i16 V2I16Regs:$src), imm:$c))],
> +                         IMOV16rr>;
> +
> +// Extract v4i16
> +def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
> +  (ins V4I16Regs:$src, i8imm:$c),
> +                         "mov.u16 \t$dst, $src${c:vecelem};",
> +                         [(set Int16Regs:$dst, (vector_extract
> +                           (v4i16 V4I16Regs:$src), imm:$c))],
> +                         IMOV16rr>;
> +
> +// Extract v2i8
> +def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
> +  (ins V2I8Regs:$src, i8imm:$c),
> +                         "mov.u16 \t$dst, $src${c:vecelem};",
> +                         [(set Int8Regs:$dst, (vector_extract
> +                           (v2i8 V2I8Regs:$src), imm:$c))],
> +                         IMOV8rr>;
> +
> +// Extract v4i8
> +def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
> +  (ins V4I8Regs:$src, i8imm:$c),
> +                         "mov.u16 \t$dst, $src${c:vecelem};",
> +                         [(set Int8Regs:$dst, (vector_extract
> +                           (v4i8 V4I8Regs:$src), imm:$c))],
> +                         IMOV8rr>;
> +
> +// Extract v2i32
> +def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
> +  (ins V2I32Regs:$src, i8imm:$c),
> +                         "mov.u32 \t$dst, $src${c:vecelem};",
> +                         [(set Int32Regs:$dst, (vector_extract
> +                           (v2i32 V2I32Regs:$src), imm:$c))],
> +                         IMOV32rr>;
> +
> +// Extract v2f32
> +def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
> +  (ins V2F32Regs:$src, i8imm:$c),
> +                         "mov.f32 \t$dst, $src${c:vecelem};",
> +                         [(set Float32Regs:$dst, (vector_extract
> +                           (v2f32 V2F32Regs:$src), imm:$c))],
> +                         FMOV32rr>;
> +
> +// Extract v2i64
> +def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
> +  (ins V2I64Regs:$src, i8imm:$c),
> +                         "mov.u64 \t$dst, $src${c:vecelem};",
> +                         [(set Int64Regs:$dst, (vector_extract
> +                           (v2i64 V2I64Regs:$src), imm:$c))],
> +                         IMOV64rr>;
> +
> +// Extract v2f64
> +def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
> +  (ins V2F64Regs:$src, i8imm:$c),
> +                         "mov.f64 \t$dst, $src${c:vecelem};",
> +                         [(set Float64Regs:$dst, (vector_extract
> +                           (v2f64 V2F64Regs:$src), imm:$c))],
> +                         FMOV64rr>;
> +
> +// Extract v4i32
> +def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
> +  (ins V4I32Regs:$src, i8imm:$c),
> +                         "mov.u32 \t$dst, $src${c:vecelem};",
> +                         [(set Int32Regs:$dst, (vector_extract
> +                           (v4i32 V4I32Regs:$src), imm:$c))],
> +                         IMOV32rr>;
> +
> +// Extract v4f32
> +def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
> +  (ins V4F32Regs:$src, i8imm:$c),
> +                         "mov.f32 \t$dst, $src${c:vecelem};",
> +                         [(set Float32Regs:$dst, (vector_extract
> +                           (v4f32 V4F32Regs:$src), imm:$c))],
> +                         FMOV32rr>;
> +}
> +
> +let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
> +// Insert v2i8
> +def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
> +  (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
> +        "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
> +        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
> +       [(set V2I8Regs:$dst,
> +         (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
> +                         IMOV8rr>;
> +
> +// Insert v4i8
> +def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
> +  (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
> +                       "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
> +       [(set V4I8Regs:$dst,
> +         (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
> +                         IMOV8rr>;
> +
> +// Insert v2i16
> +def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
> +  (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
> +                       "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
> +       [(set V2I16Regs:$dst,
> +         (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
> +                         IMOV16rr>;
> +
> +// Insert v4i16
> +def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
> +  (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
> +                       "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
> +       [(set V4I16Regs:$dst,
> +         (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
> +                         IMOV16rr>;
> +
> +// Insert v2i32
> +def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
> +  (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
> +                       "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
> +       [(set V2I32Regs:$dst,
> +         (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
> +                         IMOV32rr>;
> +
> +// Insert v2f32
> +def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
> +  (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
> +                       "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
> +       [(set V2F32Regs:$dst,
> +         (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
> +                         FMOV32rr>;
> +
> +// Insert v2i64
> +def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
> +  (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
> +                       "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
> +       [(set V2I64Regs:$dst,
> +         (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
> +                         IMOV64rr>;
> +
> +// Insert v2f64
> +def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
> +  (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
> +                       "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
> +       [(set V2F64Regs:$dst,
> +         (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
> +                         FMOV64rr>;
> +
> +// Insert v4i32
> +def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
> +  (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
> +                       "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
> +       [(set V4I32Regs:$dst,
> +         (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
> +                         IMOV32rr>;
> +
> +// Insert v4f32
> +def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
> +  (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
> +                       "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
> +                       "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
> +       [(set V4F32Regs:$dst,
> +         (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
> +                         FMOV32rr>;
> +}
> +
> +class BinOpAsmString<string c> {
> +  string s = c;
> +}
> +
> +class V4AsmStr<string opcode> : BinOpAsmString<
> +                          !strconcat(!strconcat(!strconcat(!strconcat(
> +                            !strconcat(!strconcat(!strconcat(
> +                          opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
> +                          opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
> +                          opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
> +                          opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
> +
> +class V2AsmStr<string opcode> : BinOpAsmString<
> +                           !strconcat(!strconcat(!strconcat(
> +                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
> +                           opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
> +
> +class V4MADStr<string opcode> : BinOpAsmString<
> +                          !strconcat(!strconcat(!strconcat(!strconcat(
> +                            !strconcat(!strconcat(!strconcat(
> +                          opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
> +                          opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
> +                          opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
> +                          opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
> +
> +class V2MADStr<string opcode> : BinOpAsmString<
> +                           !strconcat(!strconcat(!strconcat(
> +                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
> +                           opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
> +
> +class V4UnaryStr<string opcode> : BinOpAsmString<
> +                          !strconcat(!strconcat(!strconcat(!strconcat(
> +                            !strconcat(!strconcat(!strconcat(
> +                          opcode,  " \t${dst}_0, ${a}_0;\n\t"),
> +                          opcode), " \t${dst}_1, ${a}_1;\n\t"),
> +                          opcode), " \t${dst}_2, ${a}_2;\n\t"),
> +                          opcode), " \t${dst}_3, ${a}_3;")>;
> +
> +class V2UnaryStr<string opcode> : BinOpAsmString<
> +                           !strconcat(!strconcat(!strconcat(
> +                           opcode,  " \t${dst}_0, ${a}_0;\n\t"),
> +                           opcode), " \t${dst}_1, ${a}_1;")>;
> +
> +class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
> +  NVPTXInst sInst=NOP> :
> +      NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
> +                 asmstr.s,
> +                 [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
> +                 sInst>;
> +
> +class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
> +                 NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
> +      NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
> +                 asmstr.s,
> +                 [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
> +                 sInst>;
> +
> +class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
> +  NVPTXInst sInst=NOP> :
> +      NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
> +                 asmstr.s,
> +                 [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
> +
> +multiclass IntBinVOp<string asmstr, SDNode OpNode,
> +                     NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
> +                     i16op=NOP, NVPTXInst i8op=NOP> {
> +  def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
> +    i64op>;
> +  def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
> +    i32op>;
> +  def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
> +    i32op>;
> +  def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
> +    i16op>;
> +  def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
> +    i16op>;
> +  def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
> +    i8op>;
> +  def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
> +    i8op>;
> +}
> +
> +multiclass FloatBinVOp<string asmstr, SDNode OpNode,
> +                       NVPTXInst f64=NOP, NVPTXInst f32=NOP,
> +                       NVPTXInst f32_ftz=NOP> {
> +  def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
> +    V2F64Regs, f64>;
> +  def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
> +    V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
> +  def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
> +    V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
> +  def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
> +    V4F32Regs, f32>;
> +  def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
> +    V2F32Regs, f32>;
> +}
> +
> +multiclass IntUnaryVOp<string asmstr, PatFrag OpNode,
> +                       NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
> +                       NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
> +  def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
> +    V2I64Regs, i64op>;
> +  def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
> +    V4I32Regs, i32op>;
> +  def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
> +    V2I32Regs, i32op>;
> +  def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
> +    V4I16Regs, i16op>;
> +  def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
> +    V2I16Regs, i16op>;
> +  def V4I8  : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
> +    V4I8Regs,   i8op>;
> +  def V2I8  : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
> +    V2I8Regs,   i8op>;
> +}
> +
> +
> +// Integer Arithmetic
> +let VecInstType=isVecOther.Value in {
> +defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
> +defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
> +
> +def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
> +  ADDCCi32rr>;
> +def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
> +  ADDCCi32rr>;
> +def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
> +  SUBCCi32rr>;
> +def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
> +  SUBCCi32rr>;
> +def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
> +  ADDCCCi32rr>;
> +def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
> +  ADDCCCi32rr>;
> +def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
> +  SUBCCCi32rr>;
> +def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
> +  SUBCCCi32rr>;
> +
> +def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
> +  SHLi64rr>;
> +def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
> +  SHLi32rr>;
> +def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
> +  SHLi32rr>;
> +def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
> +  SHLi16rr>;
> +def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
> +  SHLi16rr>;
> +def ShiftLV2I8  : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs,  V2I32Regs,
> +  SHLi8rr>;
> +def ShiftLV4I8  : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs,  V4I32Regs,
> +  SHLi8rr>;
> +}
> +
> +// cvt to v*i32, helpers for shift
> +class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
> +  NVPTXInst sInst=NOP> :
> +      NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
> +
> +class VecCVTStrHelper<string op, string dest, string src> {
> +  string s=!strconcat(op, !strconcat("\t",
> +           !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
> +}
> +
> +class Vec2CVTStr<string op> {
> +  string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
> +           !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
> +}
> +
> +class Vec4CVTStr<string op> {
> +  string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
> +           !strconcat("\n\t",
> +           !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
> +           !strconcat("\n\t",
> +           !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
> +           !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
> +}
> +
> +let VecInstType=isVecOther.Value in {
> +def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
> +  Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
> +def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
> +  Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
> +def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
> +  Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
> +def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
> +  Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
> +def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
> +  Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
> +}
> +
> +def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
> +          (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
> +def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
> +          (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
> +def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
> +          (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
> +
> +def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
> +          (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
> +def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
> +          (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
> +
> +let VecInstType=isVecOther.Value in {
> +def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
> +  SRAi64rr>;
> +def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
> +  SRAi32rr>;
> +def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
> +  SRAi32rr>;
> +def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
> +  SRAi16rr>;
> +def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
> +  SRAi16rr>;
> +def ShiftRAV2I8  : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs,  V2I32Regs,
> +  SRAi8rr>;
> +def ShiftRAV4I8  : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs,  V4I32Regs,
> +  SRAi8rr>;
> +
> +def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
> +  SRLi64rr>;
> +def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
> +  SRLi32rr>;
> +def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
> +  SRLi32rr>;
> +def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
> +  SRLi16rr>;
> +def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
> +  SRLi16rr>;
> +def ShiftRLV2I8  : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs,  V2I32Regs,
> +  SRLi8rr>;
> +def ShiftRLV4I8  : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs,  V4I32Regs,
> +  SRLi8rr>;
> +
> +defm VMult   : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
> +  MULTi8rr>;
> +defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
> +  MULTHSi16rr,
> +  MULTHSi8rr>;
> +defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
> +  MULTHUi16rr,
> +  MULTHUi8rr>;
> +defm VSDiv   : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
> +  SDIVi8rr>;
> +defm VUDiv   : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
> +  UDIVi8rr>;
> +defm VSRem   : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
> +  SREMi8rr>;
> +defm VURem   : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
> +  UREMi8rr>;
> +}
> +
> +def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
> +          (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
> +def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
> +          (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
> +def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
> +          (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
> +
> +def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
> +          (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
> +def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
> +          (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
> +
> +def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
> +          (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
> +def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
> +          (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
> +def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
> +          (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
> +
> +def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
> +          (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
> +def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
> +          (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
> +
> +multiclass VMAD<string asmstr, NVPTXRegClass regclassv4,
> +  NVPTXRegClass regclassv2,
> +                SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
> +                Predicate Pred> {
> +  def V4 : NVPTXVecInst<(outs regclassv4:$dst),
> +    (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
> +                      V4MADStr<asmstr>.s,
> +                      [(set regclassv4:$dst,
> +                        (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
> +                      sop>,
> +           Requires<[Pred]>;
> +  def V2 : NVPTXVecInst<(outs regclassv2:$dst),
> +    (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
> +                      V2MADStr<asmstr>.s,
> +                      [(set regclassv2:$dst,
> +                        (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
> +                      sop>,
> +           Requires<[Pred]>;
> +}
> +
> +multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
> +  Predicate Pred> {
> +  def V2 : NVPTXVecInst<(outs regclass:$dst),
> +    (ins regclass:$a, regclass:$b, regclass:$c),
> +                      V2MADStr<asmstr>.s,
> +                      [(set regclass:$dst, (add
> +                        (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
> +           Requires<[Pred]>;
> +}
> +multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
> +  Predicate Pred> {
> +  def V2 : NVPTXVecInst<(outs regclass:$dst),
> +    (ins regclass:$a, regclass:$b, regclass:$c),
> +                      V2MADStr<asmstr>.s,
> +                      [(set regclass:$dst, (fadd
> +                        (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
> +           Requires<[Pred]>;
> +}
> +
> +let VecInstType=isVecOther.Value in {
> +defm I8MAD  : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
> +defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
> +  true>;
> +defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
> +  true>;
> +defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
> +
> +defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
> +
> +defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
> +defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
> +defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
> +
> +defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
> +  FMAD32_ftzrrr, doFMADF32_ftz>;
> +defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
> +  FMA32_ftzrrr, doFMAF32_ftz>;
> +defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
> +  doFMADF32>;
> +defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
> +  doFMAF32>;
> +defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
> +}
> +
> +let VecInstType=isVecOther.Value in {
> +def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
> +  FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
> +def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
> +  FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
> +def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
> +  FDIV32rr_prec>, Requires<[reqPTX20]>;
> +def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
> +  FDIV32rr_prec>, Requires<[reqPTX20]>;
> +def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
> +  FDIV32rr_ftz>, Requires<[doF32FTZ]>;
> +def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
> +  FDIV32rr_ftz>, Requires<[doF32FTZ]>;
> +def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
> +def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
> +def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
> +}
> +
> +def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
> +
> +let VecInstType=isVecOther.Value in {
> +def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
> +  FNEGf32_ftz>, Requires<[doF32FTZ]>;
> +def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
> +  FNEGf32_ftz>, Requires<[doF32FTZ]>;
> +def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
> +def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
> +def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
> +
> +// Logical Arithmetic
> +defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
> +defm VOr  : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
> +defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
> +
> +defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
> +}
> +
> +
> +multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
> +  def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
> +          (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c,  V2F32Regs:$a)>,
> +          Requires<[Pred]>;
> +
> +  def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
> +          (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
> +          Requires<[Pred]>;
> +}
> +
> +defm V2FMAF32ext_ftz  : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
> +defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
> +defm V2FMAF32ext  : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
> +defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
> +
> +multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
> +  def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
> +          (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c,  V4F32Regs:$a)>,
> +          Requires<[Pred]>;
> +
> +  def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
> +          (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
> +          Requires<[Pred]>;
> +}
> +
> +defm V4FMAF32ext_ftz  : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
> +defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
> +defm V4FMAF32ext  : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
> +defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
> +
> +multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
> +  def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
> +          (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
> +          Requires<[Pred]>;
> +
> +  def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
> +          (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
> +          Requires<[Pred]>;
> +}
> +
> +defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
> +
> +class VecModStr<string vecsize, string elem, string extra, string l="">
> +{
> +  string t1 = !strconcat("${c", elem);
> +  string t2 = !strconcat(t1, ":vecv");
> +  string t3 = !strconcat(t2, vecsize);
> +  string t4 = !strconcat(t3, extra);
> +  string t5 = !strconcat(t4, l);
> +  string s =  !strconcat(t5, "}");
> +}
> +class ShuffleOneLine<string vecsize, string elem, string type>
> +{
> +  string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
> +  string t2 = !strconcat(t1, "mov.");
> +  string t3 = !strconcat(t2, type);
> +  string t4 = !strconcat(t3, " \t${dst}_");
> +  string t5 = !strconcat(t4, elem);
> +  string t6 = !strconcat(t5, ", $src1");
> +  string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
> +  string t8 = !strconcat(t7, ";\n\t");
> +  string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
> +  string t10 = !strconcat(t9, "mov.");
> +  string t11 = !strconcat(t10, type);
> +  string t12 = !strconcat(t11, " \t${dst}_");
> +  string t13 = !strconcat(t12, elem);
> +  string t14 = !strconcat(t13, ", $src2");
> +  string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
> +  string s =   !strconcat(t15, ";");
> +}
> +class ShuffleAsmStr2<string type>
> +{
> +  string t1 = ShuffleOneLine<"2", "0", type>.s;
> +  string t2 = !strconcat(t1, "\n\t");
> +  string s  = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
> +}
> +class ShuffleAsmStr4<string type>
> +{
> +  string t1 = ShuffleOneLine<"4", "0", type>.s;
> +  string t2 = !strconcat(t1, "\n\t");
> +  string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
> +  string t4 = !strconcat(t3, "\n\t");
> +  string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
> +  string t6 = !strconcat(t5, "\n\t");
> +  string s  = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
> +}
> +
> +let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
> +def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
> +                       (ins  V4F32Regs:$src1, V4F32Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
> +                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
> +                                 ShuffleAsmStr4<"f32">.s),
> +                       [], FMOV32rr>;
> +
> +def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
> +                       (ins  V4I32Regs:$src1, V4I32Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
> +                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
> +                                 ShuffleAsmStr4<"u32">.s),
> +                       [], IMOV32rr>;
> +
> +def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
> +                       (ins  V4I16Regs:$src1, V4I16Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
> +                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
> +                                 ShuffleAsmStr4<"u16">.s),
> +                       [], IMOV16rr>;
> +
> +def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
> +                       (ins  V4I8Regs:$src1, V4I8Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
> +                 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
> +                                 ShuffleAsmStr4<"u16">.s),
> +                       [], IMOV8rr>;
> +
> +def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
> +                       (ins  V2F32Regs:$src1, V2F32Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1),
> +                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
> +                                 ShuffleAsmStr2<"f32">.s),
> +                       [], FMOV32rr>;
> +
> +def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
> +                       (ins  V2I32Regs:$src1, V2I32Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1),
> +                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
> +                                 ShuffleAsmStr2<"u32">.s),
> +                       [], IMOV32rr>;
> +
> +def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
> +                       (ins  V2I8Regs:$src1, V2I8Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1),
> +                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
> +                                 ShuffleAsmStr2<"u16">.s),
> +                       [], IMOV8rr>;
> +
> +def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
> +                       (ins  V2I16Regs:$src1, V2I16Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1),
> +                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
> +                                 ShuffleAsmStr2<"u16">.s),
> +                       [], IMOV16rr>;
> +
> +def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
> +                       (ins  V2F64Regs:$src1, V2F64Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1),
> +                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
> +                                 ShuffleAsmStr2<"f64">.s),
> +                       [], FMOV64rr>;
> +
> +def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
> +                       (ins  V2I64Regs:$src1, V2I64Regs:$src2,
> +                             i8imm:$c0, i8imm:$c1),
> +                       !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
> +                                 ShuffleAsmStr2<"u64">.s),
> +                       [], IMOV64rr>;
> +}
> +
> +def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
> +  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
> +  return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
> +}]>;
> +def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
> +  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
> +  return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
> +}]>;
> +def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
> +  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
> +  return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
> +}]>;
> +def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
> +  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
> +  return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
> +}]>;
> +
> +// The spurious call is here to silence a compiler warning about N being
> +// unused.
> +def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
> +                       (vector_shuffle node:$lhs, node:$rhs),
> +                       [{ N->getGluedNode(); return true; }]>;
> +
> +def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
> +          (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
> +
> +def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
> +          (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
> +                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
> +
> +def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
> +          (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
> +
> +def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
> +          (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
> +
> +def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
> +          (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
> +                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
> +
> +def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
> +          (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
> +
> +def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
> +          (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
> +                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
> +
> +def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
> +          (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
> +
> +def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
> +          (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
> +                            (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
> +
> +def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
> +          (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
> +                            (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
> +
> +class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
> +  NVPTXInst si>
> +                   : NVPTXVecInst<(outs vclass:$dst),
> +                   (ins  sclass:$a1, sclass:$a2),
> +                   !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
> +                   [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
> +                   si>;
> +class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
> +  NVPTXInst si>
> +                   : NVPTXVecInst<(outs vclass:$dst),
> +                   (ins  sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
> +               !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
> +                   [(set vclass:$dst,
> +                     (build_vector sclass:$a1, sclass:$a2,
> +                       sclass:$a3, sclass:$a4))], si>;
> +
> +let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
> +def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
> +  FMOV32rr>;
> +def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
> +  FMOV64rr>;
> +
> +def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
> +  IMOV32rr>;
> +def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
> +  IMOV64rr>;
> +def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
> +  IMOV16rr>;
> +def Build_Vector2_i8  : Build_Vector2<"mov.v2.u16",  V2I8Regs,  Int8Regs,
> +  IMOV8rr>;
> +
> +def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
> +  FMOV32rr>;
> +
> +def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
> +  IMOV32rr>;
> +def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
> +  IMOV16rr>;
> +def Build_Vector4_i8  : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
> +  IMOV8rr>;
> +}
> +
> +class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
> +                 : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
> +                   !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
> +                   [], sop>;
> +
> +let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
> +  VecInstType=isVecOther.Value in {
> +def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
> +def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
> +
> +def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
> +def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
> +
> +def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
> +def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
> +
> +def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
> +def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
> +
> +def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
> +def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
> +}
> +
> +// extract subvector patterns
> +def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
> +                        SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
> +
> +def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
> +                 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
> +                                    (V4f32Extract V4F32Regs:$src, 1))>;
> +def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
> +                 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
> +                                    (V4f32Extract V4F32Regs:$src, 3))>;
> +def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
> +                 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
> +                                    (V4i32Extract V4I32Regs:$src, 1))>;
> +def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
> +                 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
> +                                    (V4i32Extract V4I32Regs:$src, 3))>;
> +def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
> +                 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
> +                                    (V4i16Extract V4I16Regs:$src, 1))>;
> +def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
> +                 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
> +                                    (V4i16Extract V4I16Regs:$src, 3))>;
> +def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
> +                 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
> +                                    (V4i8Extract V4I8Regs:$src, 1))>;
> +def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
> +                 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
> +                                    (V4i8Extract V4I8Regs:$src, 3))>;
> +
> +// Select instructions
> +class Select_OneLine<string type, string pos> {
> +  string t1 = !strconcat("selp.", type);
> +  string t2 = !strconcat(t1, " \t${dst}_");
> +  string t3 = !strconcat(t2, pos);
> +  string t4 = !strconcat(t3, ", ${src1}_");
> +  string t5 = !strconcat(t4, pos);
> +  string t6 = !strconcat(t5, ", ${src2}_");
> +  string t7 = !strconcat(t6, pos);
> +  string s  = !strconcat(t7, ", $p;");
> +}
> +
> +class Select_Str2<string type> {
> +  string t1 = Select_OneLine<type, "0">.s;
> +  string t2 = !strconcat(t1, "\n\t");
> +  string s  = !strconcat(t2, Select_OneLine<type, "1">.s);
> +}
> +
> +class Select_Str4<string type> {
> +  string t1 = Select_OneLine<type, "0">.s;
> +  string t2 = !strconcat(t1, "\n\t");
> +  string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
> +  string t4 = !strconcat(t3, "\n\t");
> +  string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
> +  string t6 = !strconcat(t5, "\n\t");
> +  string s  = !strconcat(t6, Select_OneLine<type, "3">.s);
> +
> +}
> +
> +class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
> +      : NVPTXVecInst<(outs vclass:$dst),
> +                     (ins  vclass:$src1, vclass:$src2, Int1Regs:$p),
> +                     asmstr,
> +                     [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
> +                       vclass:$src2))],
> +                     sop>;
> +
> +let VecInstType=isVecOther.Value in {
> +def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
> +def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
> +def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
> +def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
> +def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
> +def V4I8_Select  : Vec_Select<V4I8Regs,  Select_Str4<"b16">.s, SELECTi8rr>;
> +def V2I8_Select  : Vec_Select<V2I8Regs,  Select_Str2<"b16">.s, SELECTi8rr>;
> +
> +def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
> +def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
> +def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
> +}
> +
> +// Comparison instructions
> +
> +// setcc convenience fragments.
> +def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETOEQ)>;
> +def vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETOGT)>;
> +def vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETOGE)>;
> +def vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETOLT)>;
> +def vsetole : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETOLE)>;
> +def vsetone : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETONE)>;
> +def vseto   : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETO)>;
> +def vsetuo  : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETUO)>;
> +def vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETUEQ)>;
> +def vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETUGT)>;
> +def vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETUGE)>;
> +def vsetult : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETULT)>;
> +def vsetule : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETULE)>;
> +def vsetune : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETUNE)>;
> +def vseteq  : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETEQ)>;
> +def vsetgt  : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETGT)>;
> +def vsetge  : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETGE)>;
> +def vsetlt  : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETLT)>;
> +def vsetle  : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETLE)>;
> +def vsetne  : PatFrag<(ops node:$lhs, node:$rhs),
> +                      (setcc node:$lhs, node:$rhs, SETNE)>;
> +
> +class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
> +  NVPTXInst sop>
> +    : NVPTXVecInst<(outs outrclass:$dst),
> +                   (ins  inrclass:$a, inrclass:$b),
> +                   "Unsupported",
> +                   [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
> +                   sop>;
> +
> +multiclass Vec_Compare_All<PatFrag op,
> +                           NVPTXInst inst8,
> +                           NVPTXInst inst16,
> +                           NVPTXInst inst32,
> +                           NVPTXInst inst64>
> +{
> +  def  V2I8 : Vec_Compare<op, V2I8Regs,  V2I8Regs,  inst8>;
> +  def  V4I8 : Vec_Compare<op, V4I8Regs,  V4I8Regs,  inst8>;
> +  def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
> +  def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
> +  def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
> +  def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
> +  def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
> +}
> +
> +let VecInstType=isVecOther.Value in {
> +  defm VecSGT : Vec_Compare_All<vsetgt,  ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
> +    ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
> +  defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
> +    ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
> +  defm VecSLT : Vec_Compare_All<vsetlt,  ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
> +    ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
> +  defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
> +    ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
> +  defm VecSGE : Vec_Compare_All<vsetge,  ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
> +    ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
> +  defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
> +    ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
> +  defm VecSLE : Vec_Compare_All<vsetle,  ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
> +    ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
> +  defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
> +    ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
> +  defm VecSEQ : Vec_Compare_All<vseteq,  ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
> +    ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
> +  defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
> +    ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
> +  defm VecSNE : Vec_Compare_All<vsetne,  ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
> +    ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
> +  defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
> +    ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
> +}
> +
> +multiclass FVec_Compare_All<PatFrag op,
> +                            NVPTXInst instf32,
> +                            NVPTXInst instf64>
> +{
> +  def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
> +  def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
> +  def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
> +}
> +
> +let VecInstType=isVecOther.Value in {
> +  defm FVecGT :  FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
> +    FSetGTf64rr_toi64>;
> +  defm FVecLT :  FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
> +    FSetLTf64rr_toi64>;
> +  defm FVecGE :  FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
> +    FSetGEf64rr_toi64>;
> +  defm FVecLE :  FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
> +    FSetLEf64rr_toi64>;
> +  defm FVecEQ :  FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
> +    FSetEQf64rr_toi64>;
> +  defm FVecNE :  FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
> +    FSetNEf64rr_toi64>;
> +
> +  defm FVecUGT :  FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
> +    FSetUGTf64rr_toi64>;
> +  defm FVecULT :  FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
> +    FSetULTf64rr_toi64>;
> +  defm FVecUGE :  FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
> +    FSetUGEf64rr_toi64>;
> +  defm FVecULE :  FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
> +    FSetULEf64rr_toi64>;
> +  defm FVecUEQ :  FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
> +    FSetUEQf64rr_toi64>;
> +  defm FVecUNE :  FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
> +    FSetUNEf64rr_toi64>;
> +
> +  defm FVecNUM :  FVec_Compare_All<vseto,  FSetNUMf32rr_toi32,
> +    FSetNUMf64rr_toi64>;
> +  defm FVecNAN :  FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
> +    FSetNANf64rr_toi64>;
> +}
> +
> +class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
> +                (ins i32imm:$a, i32imm:$b),
> +                !strconcat(!strconcat("ld.param", opstr),
> +                  "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
> +
> +class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs regclass:$d1, regclass:$d2),
> +                (ins i32imm:$a, i32imm:$b),
> +                !strconcat(!strconcat("ld.param", opstr),
> +                  "\t{{$d1, $d2}}, [retval0+$b];"), []>;
> +
> +
> +class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs),
> +                (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
> +                  i32imm:$a, i32imm:$b),
> +                !strconcat(!strconcat("st.param", opstr),
> +                  "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
> +
> +class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs),
> +                (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
> +                !strconcat(!strconcat("st.param", opstr),
> +                  "\t[param$a+$b], {{$s1, $s2}};"), []>;
> +
> +class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs),
> +                (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
> +                  i32imm:$a),
> +                !strconcat(!strconcat("st.param", opstr),
> +                  "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
> +
> +class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
> +      NVPTXInst<(outs),
> +                (ins regclass:$s1, regclass:$s2, i32imm:$a),
> +                !strconcat(!strconcat("st.param", opstr),
> +                  "\t[func_retval+$a], {{$s1, $s2}};"), []>;
> +
> +def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
> +def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
> +def LoadParamScalar4I8  : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
> +
> +def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
> +def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
> +def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
> +def LoadParamScalar2I8  : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
> +
> +def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
> +def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
> +def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
> +
> +def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
> +def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
> +def StoreParamScalar4I8  : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
> +
> +def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
> +def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
> +def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
> +def StoreParamScalar2I8  : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
> +
> +def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
> +def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
> +def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
> +
> +def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
> +def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
> +def StoreRetvalScalar4I8  : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
> +
> +def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
> +def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
> +def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
> +def StoreRetvalScalar2I8  : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
> +
> +def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
> +def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
> +def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
> +
> +class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
> +      NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
> +                "loadparam : $dst <- [$a, $b]",
> +                [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
> +                sop>;
> +
> +class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
> +      : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
> +                "storeparam : [$a, $b] <- $val",
> +                [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
> +
> +class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
> +  NVPTXInst sop=NOP>
> +      : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
> +                "storeretval : retval[$a] <- $val",
> +                [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
> +
> +let VecInstType=isVecLD.Value in {
> +def LoadParamV4I32  : LoadParamVecInst<V4I32Regs, ".v4.b32",
> +  LoadParamScalar4I32>;
> +def LoadParamV4I16  : LoadParamVecInst<V4I16Regs, ".v4.b16",
> +  LoadParamScalar4I16>;
> +def LoadParamV4I8   : LoadParamVecInst<V4I8Regs, ".v4.b8",
> +  LoadParamScalar4I8>;
> +
> +def LoadParamV2I64  : LoadParamVecInst<V2I64Regs, ".v2.b64",
> +  LoadParamScalar2I64>;
> +def LoadParamV2I32  : LoadParamVecInst<V2I32Regs, ".v2.b32",
> +  LoadParamScalar2I32>;
> +def LoadParamV2I16  : LoadParamVecInst<V2I16Regs, ".v2.b16",
> +  LoadParamScalar2I16>;
> +def LoadParamV2I8   : LoadParamVecInst<V2I8Regs, ".v2.b8",
> +  LoadParamScalar2I8>;
> +
> +def LoadParamV4F32  : LoadParamVecInst<V4F32Regs, ".v4.f32",
> +  LoadParamScalar4F32>;
> +def LoadParamV2F32  : LoadParamVecInst<V2F32Regs, ".v2.f32",
> +  LoadParamScalar2F32>;
> +def LoadParamV2F64  : LoadParamVecInst<V2F64Regs, ".v2.f64",
> +  LoadParamScalar2F64>;
> +}
> +
> +let VecInstType=isVecST.Value in {
> +def StoreParamV4I32  : StoreParamVecInst<V4I32Regs, ".v4.b32",
> +  StoreParamScalar4I32>;
> +def StoreParamV4I16  : StoreParamVecInst<V4I16Regs, ".v4.b16",
> +  StoreParamScalar4I16>;
> +def StoreParamV4I8   : StoreParamVecInst<V4I8Regs, ".v4.b8",
> +  StoreParamScalar4I8>;
> +
> +def StoreParamV2I64  : StoreParamVecInst<V2I64Regs, ".v2.b64",
> +  StoreParamScalar2I64>;
> +def StoreParamV2I32  : StoreParamVecInst<V2I32Regs, ".v2.b32",
> +  StoreParamScalar2I32>;
> +def StoreParamV2I16  : StoreParamVecInst<V2I16Regs, ".v2.b16",
> +  StoreParamScalar2I16>;
> +def StoreParamV2I8   : StoreParamVecInst<V2I8Regs, ".v2.b8",
> +  StoreParamScalar2I8>;
> +
> +def StoreParamV4F32  : StoreParamVecInst<V4F32Regs, ".v4.f32",
> +  StoreParamScalar4F32>;
> +def StoreParamV2F32  : StoreParamVecInst<V2F32Regs, ".v2.f32",
> +  StoreParamScalar2F32>;
> +def StoreParamV2F64  : StoreParamVecInst<V2F64Regs, ".v2.f64",
> +  StoreParamScalar2F64>;
> +
> +def StoreRetvalV4I32  : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
> +  StoreRetvalScalar4I32>;
> +def StoreRetvalV4I16  : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
> +  StoreRetvalScalar4I16>;
> +def StoreRetvalV4I8   : StoreRetvalVecInst<V4I8Regs,  ".v4.b8",
> +  StoreRetvalScalar4I8>;
> +
> +def StoreRetvalV2I64  : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
> +  StoreRetvalScalar2I64>;
> +def StoreRetvalV2I32  : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
> +  StoreRetvalScalar2I32>;
> +def StoreRetvalV2I16  : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
> +  StoreRetvalScalar2I16>;
> +def StoreRetvalV2I8   : StoreRetvalVecInst<V2I8Regs,  ".v2.b8",
> +  StoreRetvalScalar2I8>;
> +
> +def StoreRetvalV4F32  : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
> +  StoreRetvalScalar4F32>;
> +def StoreRetvalV2F32  : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
> +  StoreRetvalScalar2F32>;
> +def StoreRetvalV2F64  : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
> +  StoreRetvalScalar2F64>;
> +
> +}
> +
> +
> +// Int vector to int scalar bit convert
> +// v4i8 -> i32
> +def : Pat<(i32 (bitconvert V4I8Regs:$s)),
> +          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
> +                     (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
> +// v4i16 -> i64
> +def : Pat<(i64 (bitconvert V4I16Regs:$s)),
> +          (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
> +            (V4i16Extract V4I16Regs:$s,1),
> +                     (V4i16Extract V4I16Regs:$s,2),
> +                     (V4i16Extract V4I16Regs:$s,3))>;
> +// v2i8 -> i16
> +def : Pat<(i16 (bitconvert V2I8Regs:$s)),
> +          (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
> +// v2i16 -> i32
> +def : Pat<(i32 (bitconvert V2I16Regs:$s)),
> +          (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
> +            (V2i16Extract V2I16Regs:$s,1))>;
> +// v2i32 -> i64
> +def : Pat<(i64 (bitconvert V2I32Regs:$s)),
> +          (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
> +            (V2i32Extract V2I32Regs:$s,1))>;
> +
> +// Int scalar to int vector bit convert
> +let VecInstType=isVecDest.Value in {
> +// i32 -> v4i8
> +def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
> +                                "Error!",
> +                                [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
> +                                I32toV4I8>;
> +// i64 -> v4i16
> +def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
> +                                 "Error!",
> +                                [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
> +                                 I64toV4I16>;
> +// i16 -> v2i8
> +def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
> +                                "Error!",
> +                               [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
> +                                I16toV2I8>;
> +// i32 -> v2i16
> +def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
> +                                 "Error!",
> +                                [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
> +                                 I32toV2I16>;
> +// i64 -> v2i32
> +def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
> +                                  "Error!",
> +                                [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
> +                                  I64toV2I32>;
> +}
> +
> +// Int vector to int vector bit convert
> +// v4i8 -> v2i16
> +def : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
> +          (VecI32toV2I16
> +          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
> +                    (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
> +// v4i16 -> v2i32
> +def : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
> +          (VecI64toV2I32
> +       (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
> +                (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
> +// v2i16 -> v4i8
> +def : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
> +          (VecI32toV4I8
> +    (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
> +// v2i32 -> v4i16
> +def : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
> +          (VecI64toV4I16
> +    (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
> +// v2i64 -> v4i32
> +def : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
> +          (Build_Vector4_i32
> +            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
> +            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
> +            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
> +            (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
> +// v4i32 -> v2i64
> +def : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
> +          (Build_Vector2_i64
> +      (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
> +    (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
> +
> +// Fp scalar to fp vector convert
> +// f64 -> v2f32
> +let VecInstType=isVecDest.Value in {
> +def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
> +                                  "Error!",
> +                              [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
> +                                  F64toV2F32>;
> +}
> +
> +// Fp vector to fp scalar convert
> +// v2f32 -> f64
> +def : Pat<(f64 (bitconvert V2F32Regs:$s)),
> +     (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
> +
> +// Fp scalar to int vector convert
> +// f32 -> v4i8
> +def : Pat<(v4i8 (bitconvert Float32Regs:$s)),
> +          (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
> +// f32 -> v2i16
> +def : Pat<(v2i16 (bitconvert Float32Regs:$s)),
> +          (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
> +// f64 -> v4i16
> +def : Pat<(v4i16 (bitconvert Float64Regs:$s)),
> +          (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
> +// f64 -> v2i32
> +def : Pat<(v2i32 (bitconvert Float64Regs:$s)),
> +          (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
> +
> +// Int vector to fp scalar convert
> +// v4i8 -> f32
> +def : Pat<(f32 (bitconvert V4I8Regs:$s)),
> +          (BITCONVERT_32_I2F
> +          (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
> +                    (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
> +// v4i16 -> f64
> +def : Pat<(f64 (bitconvert V4I16Regs:$s)),
> +          (BITCONVERT_64_I2F
> +       (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
> +                (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
> +// v2i16 -> f32
> +def : Pat<(f32 (bitconvert V2I16Regs:$s)),
> +          (BITCONVERT_32_I2F
> +    (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
> +// v2i32 -> f64
> +def : Pat<(f64 (bitconvert V2I32Regs:$s)),
> +          (BITCONVERT_64_I2F
> +    (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
> +
> +// Int scalar to fp vector convert
> +// i64 -> v2f32
> +def : Pat<(v2f32 (bitconvert Int64Regs:$s)),
> +          (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
> +
> +// Fp vector to int scalar convert
> +// v2f32 -> i64
> +def : Pat<(i64 (bitconvert V2F32Regs:$s)),
> +          (BITCONVERT_64_F2I
> +    (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
> +
> +// Int vector to fp vector convert
> +// v2i64 -> v4f32
> +def : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
> +          (Build_Vector4_f32
> +            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
> +              (V2i64Extract V2I64Regs:$s, 0)), 0)),
> +            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
> +              (V2i64Extract V2I64Regs:$s, 0)), 1)),
> +            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
> +              (V2i64Extract V2I64Regs:$s, 1)), 0)),
> +            (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
> +              (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
> +// v2i64 -> v2f64
> +def : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
> +    (Build_Vector2_f64
> +            (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
> +            (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
> +// v2i32 -> v2f32
> +def : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
> +    (Build_Vector2_f32
> +            (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
> +            (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
> +// v4i32 -> v2f64
> +def : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
> +          (Build_Vector2_f64
> +           (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
> +             (V4i32Extract V4I32Regs:$s,1))),
> +           (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
> +             (V4i32Extract V4I32Regs:$s,3))))>;
> +// v4i32 -> v4f32
> +def : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
> +    (Build_Vector4_f32
> +            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
> +            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
> +            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
> +            (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
> +// v4i16 -> v2f32
> +def : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
> +          (VecF64toV2F32 (BITCONVERT_64_I2F
> +          (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
> +            (V4i16Extract V4I16Regs:$s,1),
> +                      (V4i16Extract V4I16Regs:$s,2),
> +                      (V4i16Extract V4I16Regs:$s,3))))>;
> +
> +// Fp vector to int vector convert
> +// v2i64 <- v4f32
> +def : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
> +          (Build_Vector2_i64
> +           (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
> +             (V4f32Extract V4F32Regs:$s,1))),
> +           (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
> +             (V4f32Extract V4F32Regs:$s,3))))>;
> +// v2i64 <- v2f64
> +def : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
> +    (Build_Vector2_i64
> +            (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
> +            (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
> +// v2i32 <- v2f32
> +def : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
> +    (Build_Vector2_i32
> +            (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
> +            (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
> +// v4i32 <- v2f64
> +def : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
> +          (Build_Vector4_i32
> +            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
> +              (V2f64Extract V2F64Regs:$s, 0)), 0)),
> +            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
> +              (V2f64Extract V2F64Regs:$s, 0)), 1)),
> +            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
> +              (V2f64Extract V2F64Regs:$s, 1)), 0)),
> +            (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
> +              (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
> +// v4i32 <- v4f32
> +def : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
> +          (Build_Vector4_i32
> +            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
> +            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
> +            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
> +            (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
> +// v4i16 <- v2f32
> +def : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
> +          (VecI64toV4I16 (BITCONVERT_64_F2I
> +          (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
> +            (V2f32Extract V2F32Regs:$s,1))))>;
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXutil.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXutil.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXutil.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXutil.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,91 @@
> +//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the functions that can be used in CodeGen.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTXutil.h"
> +#include "NVPTX.h"
> +
> +using namespace llvm;
> +
> +namespace llvm {
> +
> +bool isParamLoad(const MachineInstr *MI)
> +{
> +  if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
> +      (MI->getOpcode() != NVPTX::LD_i64_avar))
> +    return false;
> +  if (MI->getOperand(2).isImm() == false)
> +    return false;
> +  if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
> +    return false;
> +  return true;
> +}
> +
> +#define DATA_MASK     0x7f
> +#define DIGIT_WIDTH   7
> +#define MORE_BYTES    0x80
> +
> +static int encode_leb128(uint64_t val, int *nbytes,
> +                         char *space, int splen)
> +{
> +  char *a;
> +  char *end = space + splen;
> +
> +  a = space;
> +  do {
> +    unsigned char uc;
> +
> +    if (a >= end)
> +      return 1;
> +    uc = val & DATA_MASK;
> +    val >>= DIGIT_WIDTH;
> +    if (val != 0)
> +      uc |= MORE_BYTES;
> +    *a = uc;
> +    a++;
> +  } while (val);
> +  *nbytes = a - space;
> +  return 0;
> +}
> +
> +#undef DATA_MASK
> +#undef DIGIT_WIDTH
> +#undef MORE_BYTES
> +
> +uint64_t encode_leb128(const char *str)
> +{
> +  union { uint64_t x; char a[8]; } temp64;
> +
> +  temp64.x = 0;
> +
> +  for (unsigned i=0,e=strlen(str); i!=e; ++i)
> +    temp64.a[i] = str[e-1-i];
> +
> +  char encoded[16];
> +  int nbytes;
> +
> +  int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
> +
> +  assert(retval == 0 &&
> +         "Encoding to leb128 failed");
> +
> +  assert(nbytes <= 8 &&
> +         "Cannot support register names with leb128 encoding > 8 bytes");
> +
> +  temp64.x = 0;
> +  for (int i=0; i<nbytes; ++i)
> +    temp64.a[i] = encoded[i];
> +
> +  return temp64.x;
> +}
> +
> +} // end namespace llvm
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXutil.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXutil.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXutil.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXutil.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,25 @@
> +//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains the functions that can be used in CodeGen.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_TARGET_NVPTX_UTIL_H
> +#define LLVM_TARGET_NVPTX_UTIL_H
> +
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineInstr.h"
> +
> +namespace llvm {
> +bool isParamLoad(const MachineInstr *);
> +uint64_t encode_leb128(const char *str);
> +}
> +
> +#endif
>
> Added: llvm/trunk/lib/Target/NVPTX/TargetInfo/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/TargetInfo/CMakeLists.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/TargetInfo/CMakeLists.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/TargetInfo/CMakeLists.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,7 @@
> +#include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
> +
> +add_llvm_library(LLVMNVPTXInfo
> +  NVPTXTargetInfo.cpp
> +  )
> +
> +add_dependencies(LLVMNVPTXInfo NVPTXCommonTableGen)
>
> Added: llvm/trunk/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt (added)
> +++ llvm/trunk/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt Fri May  4 15:18:50 2012
> @@ -0,0 +1,23 @@
> +;===- ./lib/Target/NVPTX/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
> +;
> +;                     The LLVM Compiler Infrastructure
> +;
> +; This file is distributed under the University of Illinois Open Source
> +; License. See LICENSE.TXT for details.
> +;
> +;===------------------------------------------------------------------------===;
> +;
> +; This is an LLVMBuild description file for the components in this subdirectory.
> +;
> +; For more information on the LLVMBuild system, please see:
> +;
> +;   http://llvm.org/docs/LLVMBuild.html
> +;
> +;===------------------------------------------------------------------------===;
> +
> +[component_0]
> +type = Library
> +name = NVPTXInfo
> +parent = NVPTX
> +required_libraries = MC Support Target
> +add_to_library_groups = NVPTX
>
> Added: llvm/trunk/lib/Target/NVPTX/TargetInfo/Makefile
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/TargetInfo/Makefile?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/TargetInfo/Makefile (added)
> +++ llvm/trunk/lib/Target/NVPTX/TargetInfo/Makefile Fri May  4 15:18:50 2012
> @@ -0,0 +1,15 @@
> +##===- lib/Target/NVPTX/TargetInfo/Makefile ----------------*- Makefile -*-===##
> +#
> +#                     The LLVM Compiler Infrastructure
> +#
> +# This file is distributed under the University of Illinois Open Source
> +# License. See LICENSE.TXT for details.
> +#
> +##===----------------------------------------------------------------------===##
> +LEVEL = ../../../..
> +LIBRARYNAME = LLVMNVPTXInfo
> +
> +# Hack: we need to include 'main' target directory to grab private headers
> +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
> +
> +include $(LEVEL)/Makefile.common
>
> Added: llvm/trunk/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,23 @@
> +//===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTX.h"
> +#include "llvm/Module.h"
> +#include "llvm/Support/TargetRegistry.h"
> +using namespace llvm;
> +
> +Target llvm::TheNVPTXTarget32;
> +Target llvm::TheNVPTXTarget64;
> +
> +extern "C" void LLVMInitializeNVPTXTargetInfo() {
> +  RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
> +    "NVIDIA PTX 32-bit");
> +  RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
> +    "NVIDIA PTX 64-bit");
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/VectorElementize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/VectorElementize.cpp?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/VectorElementize.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/VectorElementize.cpp Fri May  4 15:18:50 2012
> @@ -0,0 +1,1250 @@
> +//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This pass converts operations on vector types to operations on their
> +// element types.
> +//
> +// For generic binary and unary vector instructions, the conversion is simple.
> +// Suppose we have
> +//        av = bv Vop cv
> +// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
> +// This gets converted to the following :
> +//       a1 = b1 Sop c1
> +//       a2 = b2 Sop c2
> +//
> +// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
> +// For the above example, the map will look as follows:
> +// av => [a1, a2]
> +// bv => [b1, b2]
> +//
> +// In addition, initVectorInfo creates the following opcode->opcode map.
> +// Vop => Sop
> +// OtherVop => OtherSop
> +// ...
> +//
> +// For vector specific instructions like vecbuild, vecshuffle etc, the
> +// conversion is different. Look at comments near the functions with
> +// prefix createVec<...>.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/CodeGen/Passes.h"
> +#include "llvm/Constant.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/Function.h"
> +#include "llvm/Pass.h"
> +#include "llvm/Type.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "llvm/CodeGen/MachineModuleInfo.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/Support/CFG.h"
> +#include "llvm/Support/Compiler.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +#include "llvm/ADT/DepthFirstIterator.h"
> +#include "llvm/ADT/SmallPtrSet.h"
> +#include "NVPTX.h"
> +#include "NVPTXTargetMachine.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
> +  virtual bool runOnMachineFunction(MachineFunction &F);
> +
> +  NVPTXTargetMachine &TM;
> +  MachineRegisterInfo *MRI;
> +  const NVPTXRegisterInfo *RegInfo;
> +  const NVPTXInstrInfo *InstrInfo;
> +
> +  llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
> +  RegClassMap;
> +  llvm::DenseMap<unsigned, bool> SimpleMoveMap;
> +
> +  llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
> +
> +  bool isVectorInstr(MachineInstr *);
> +
> +  SmallVector<unsigned, 4> getScalarRegisters(unsigned);
> +  unsigned getScalarVersion(unsigned);
> +  unsigned getScalarVersion(MachineInstr *);
> +
> +  bool isVectorRegister(unsigned);
> +  const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
> +  unsigned numCopiesNeeded(MachineInstr *);
> +
> +  void createLoadCopy(MachineFunction&, MachineInstr *,
> +                      std::vector<MachineInstr *>&);
> +  void createStoreCopy(MachineFunction&, MachineInstr *,
> +                       std::vector<MachineInstr *>&);
> +
> +  void createVecDest(MachineFunction&, MachineInstr *,
> +                     std::vector<MachineInstr *>&);
> +
> +  void createCopies(MachineFunction&, MachineInstr *,
> +                    std::vector<MachineInstr *>&);
> +
> +  unsigned copyProp(MachineFunction&);
> +  unsigned removeDeadMoves(MachineFunction&);
> +
> +  void elementize(MachineFunction&);
> +
> +  bool isSimpleMove(MachineInstr *);
> +
> +  void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
> +                        std::vector<MachineInstr *>& copies);
> +
> +  void createVecExtract(MachineFunction& F, MachineInstr *Instr,
> +                        std::vector<MachineInstr *>& copies);
> +
> +  void createVecInsert(MachineFunction& F, MachineInstr *Instr,
> +                       std::vector<MachineInstr *>& copies);
> +
> +  void createVecBuild(MachineFunction& F, MachineInstr *Instr,
> +                      std::vector<MachineInstr *>& copies);
> +
> +public:
> +
> +  static char ID; // Pass identification, replacement for typeid
> +  VectorElementize(NVPTXTargetMachine &tm)
> +  : MachineFunctionPass(ID), TM(tm) {}
> +
> +  virtual const char *getPassName() const {
> +    return "Convert LLVM vector types to their element types";
> +  }
> +};
> +
> +char VectorElementize::ID = 1;
> +}
> +
> +static cl::opt<bool>
> +RemoveRedundantMoves("nvptx-remove-redundant-moves",
> +       cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
> +                     cl::init(true));
> +
> +#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
> +    >> NVPTX::VecInstTypeShift)
> +#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
> +#define ISVECLOAD(x)    (VECINST(x) == NVPTX::VecLoad)
> +#define ISVECSTORE(x)   (VECINST(x) == NVPTX::VecStore)
> +#define ISVECBUILD(x)   (VECINST(x) == NVPTX::VecBuild)
> +#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
> +#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
> +#define ISVECINSERT(x)  (VECINST(x) == NVPTX::VecInsert)
> +#define ISVECDEST(x)     (VECINST(x) == NVPTX::VecDest)
> +
> +bool VectorElementize::isSimpleMove(MachineInstr *mi) {
> +  if (mi->isCopy())
> +    return true;
> +  unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
> +        >> NVPTX::SimpleMoveShift;
> +  return (TSFlags == 1);
> +}
> +
> +bool VectorElementize::isVectorInstr(MachineInstr *mi) {
> +  if ((mi->getOpcode() == NVPTX::PHI) ||
> +      (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
> +    MachineOperand dest = mi->getOperand(0);
> +    return isVectorRegister(dest.getReg());
> +  }
> +  return ISVECINST(mi);
> +}
> +
> +unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
> +  return getScalarVersion(mi->getOpcode());
> +}
> +
> +///=============================================================================
> +///Instr is assumed to be a vector instruction. For most vector instructions,
> +///the size of the destination vector register gives the number of scalar copies
> +///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
> +///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
> +///number of scalar copies needed.
> +///=============================================================================
> +unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {

I think this function could be simplified a little (& as a
coincidence, would then conform to this stylistic guideline (
http://llvm.org/docs/CodingStandards.html#hl_else_after_return) and
not fire GCC's maybe-uninitialized warning) with something like this:

unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
  int def = -1;
  for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
    MachineOperand oper = Instr->getOperand(i);

    if (!oper.isReg()) continue;
    if (!oper.isDef()) continue;
    assert(def == -1 && "Only 0 or 1 defs supported");
    def = i;
  }

  if (def != -1) {
    unsigned regnum = Instr->getOperand(def).getReg();
    if (ISVECEXTRACT(Instr))
      regnum = Instr->getOperand(1).getReg();
    return getNVPTXVectorSize(MRI->getRegClass(regnum));
  }
  assert(ISVECSTORE(Instr)
         && "Only 0 def instruction supported is vector store");

  unsigned regnum = Instr->getOperand(0).getReg();
  return getNVPTXVectorSize(MRI->getRegClass(regnum));
}

> +  unsigned numDefs=0;
> +  unsigned def;
> +  for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
> +    MachineOperand oper = Instr->getOperand(i);
> +
> +    if (!oper.isReg()) continue;
> +    if (!oper.isDef()) continue;
> +    def = i;
> +    numDefs++;
> +  }
> +  assert((numDefs <= 1) && "Only 0 or 1 defs supported");
> +
> +  if (numDefs == 1) {
> +    unsigned regnum = Instr->getOperand(def).getReg();
> +    if (ISVECEXTRACT(Instr))
> +      regnum = Instr->getOperand(1).getReg();
> +    return getNVPTXVectorSize(MRI->getRegClass(regnum));
> +  }
> +  else if (numDefs == 0) {
> +    assert(ISVECSTORE(Instr)
> +           && "Only 0 def instruction supported is vector store");
> +
> +    unsigned regnum = Instr->getOperand(0).getReg();
> +    return getNVPTXVectorSize(MRI->getRegClass(regnum));
> +  }
> +  return 1;
> +}
> +
> +const TargetRegisterClass *VectorElementize::
> +getScalarRegClass(const TargetRegisterClass *RC) {
> +  assert(isNVPTXVectorRegClass(RC) &&
> +         "Not a vector register class");
> +  return getNVPTXElemClass(RC);
> +}
> +
> +bool VectorElementize::isVectorRegister(unsigned reg) {
> +  const TargetRegisterClass *RC=MRI->getRegClass(reg);
> +  return isNVPTXVectorRegClass(RC);
> +}
> +
> +///=============================================================================
> +///For every vector register 'v' that is not already in the VectorToScalarMap,
> +///create n scalar registers of the corresponding element type, where n
> +///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
> +///=============================================================================
> +SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
> +  assert(isVectorRegister(regnum) && "Expecting a vector register here");
> +  // Create the scalar registers and put them in the map, if not already there.
> +  if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
> +    const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
> +    const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
> +
> +    SmallVector<unsigned, 4> temp;
> +
> +    for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
> +      temp.push_back(MRI->createVirtualRegister(scalarClass));
> +
> +    VectorToScalarMap[regnum] = temp;
> +  }
> +  return VectorToScalarMap[regnum];
> +}
> +
> +///=============================================================================
> +///For a vector load of the form
> +///va <= ldv2 [addr]
> +///the following multi output instruction is created :
> +///[v1, v2] <= LD [addr]
> +///Look at NVPTXVector.td for the definitions of multi output loads.
> +///=============================================================================
> +void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
> +                                      std::vector<MachineInstr *>& copies) {
> +  copies.push_back(F.CloneMachineInstr(Instr));
> +
> +  MachineInstr *copy=copies[0];
> +  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
> +
> +  // Remove the dest, that should be a vector operand.
> +  MachineOperand dest = copy->getOperand(0);
> +  unsigned regnum = dest.getReg();
> +
> +  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
> +  copy->RemoveOperand(0);
> +
> +  std::vector<MachineOperand> otherOperands;
> +  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
> +    otherOperands.push_back(copy->getOperand(i));
> +
> +  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
> +    copy->RemoveOperand(0);
> +
> +  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) {
> +    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
> +  }
> +
> +  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
> +    copy->addOperand(otherOperands[i]);
> +
> +}
> +
> +///=============================================================================
> +///For a vector store of the form
> +///stv2 va, [addr]
> +///the following multi input instruction is created :
> +///ST v1, v2, [addr]
> +///Look at NVPTXVector.td for the definitions of multi input stores.
> +///=============================================================================
> +void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
> +                                       std::vector<MachineInstr *>& copies) {
> +  copies.push_back(F.CloneMachineInstr(Instr));
> +
> +  MachineInstr *copy=copies[0];
> +  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
> +
> +  MachineOperand src = copy->getOperand(0);
> +  unsigned regnum = src.getReg();
> +
> +  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
> +  copy->RemoveOperand(0);
> +
> +  std::vector<MachineOperand> otherOperands;
> +  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
> +    otherOperands.push_back(copy->getOperand(i));
> +
> +  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
> +    copy->RemoveOperand(0);
> +
> +  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
> +    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false));
> +
> +  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
> +    copy->addOperand(otherOperands[i]);
> +}
> +
> +///=============================================================================
> +///va <= shufflev2 vb, vc, <i1>, <i2>
> +///gets converted to 2 moves into a1 and a2. The source of the moves depend on
> +///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
> +///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
> +///instructions will be
> +///a1 <= c2
> +///a2 <= b1
> +///=============================================================================
> +void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
> +                                        std::vector<MachineInstr *>& copies) {
> +  unsigned numcopies=numCopiesNeeded(Instr);
> +
> +  unsigned destregnum = Instr->getOperand(0).getReg();
> +  unsigned src1regnum = Instr->getOperand(1).getReg();
> +  unsigned src2regnum = Instr->getOperand(2).getReg();
> +
> +  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
> +  SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
> +  SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
> +
> +  DebugLoc DL = Instr->getDebugLoc();
> +
> +  for (unsigned i=0; i<numcopies; i++) {
> +    MachineInstr *copy = BuildMI(F, DL,
> +                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
> +    MachineOperand which=Instr->getOperand(3+i);
> +    assert(which.isImm() && "Shuffle operand not a constant");
> +
> +    int src=which.getImm();
> +    int elem=src%numcopies;
> +
> +    if (which.getImm() < numcopies)
> +      copy->addOperand(MachineOperand::CreateReg(src1[elem], false));
> +    else
> +      copy->addOperand(MachineOperand::CreateReg(src2[elem], false));
> +    copies.push_back(copy);
> +  }
> +}
> +
> +///=============================================================================
> +///a <= extractv2 va, <i1>
> +///gets turned into a simple move to the scalar register a. The source depends
> +///on i1.
> +///=============================================================================
> +void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
> +                                        std::vector<MachineInstr *>& copies) {
> +  unsigned srcregnum = Instr->getOperand(1).getReg();
> +
> +  SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
> +
> +  MachineOperand which = Instr->getOperand(2);
> +  assert(which.isImm() && "Extract operand not a constant");
> +
> +  DebugLoc DL = Instr->getDebugLoc();
> +
> +  MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
> +                               Instr->getOperand(0).getReg());
> +  copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false));
> +
> +  copies.push_back(copy);
> +}
> +
> +///=============================================================================
> +///va <= vecinsertv2 vb, c, <i1>
> +///This instruction copies all elements of vb to va, except the 'i1'th element.
> +///The scalar value c becomes the 'i1'th element of va.
> +///This gets translated to 2 (4 for vecinsertv4) moves.
> +///=============================================================================
> +void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
> +                                       std::vector<MachineInstr *>& copies) {
> +  unsigned numcopies=numCopiesNeeded(Instr);
> +
> +  unsigned destregnum = Instr->getOperand(0).getReg();
> +  unsigned srcregnum = Instr->getOperand(1).getReg();
> +
> +  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
> +  SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
> +
> +  MachineOperand which=Instr->getOperand(3);
> +  assert(which.isImm() && "Insert operand not a constant");
> +  unsigned int elem=which.getImm();
> +
> +  DebugLoc DL = Instr->getDebugLoc();
> +
> +  for (unsigned i=0; i<numcopies; i++) {
> +    MachineInstr *copy = BuildMI(F, DL,
> +                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
> +
> +    if (i != elem)
> +      copy->addOperand(MachineOperand::CreateReg(src[i], false));
> +    else
> +      copy->addOperand(Instr->getOperand(2));
> +
> +    copies.push_back(copy);
> +  }
> +
> +}
> +
> +///=============================================================================
> +///va <= buildv2 b1, b2
> +///gets translated to
> +///a1 <= b1
> +///a2 <= b2
> +///=============================================================================
> +void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
> +                                      std::vector<MachineInstr *>& copies) {
> +  unsigned numcopies=numCopiesNeeded(Instr);
> +
> +  unsigned destregnum = Instr->getOperand(0).getReg();
> +
> +  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
> +
> +  DebugLoc DL = Instr->getDebugLoc();
> +
> +  for (unsigned i=0; i<numcopies; i++) {
> +    MachineInstr *copy = BuildMI(F, DL,
> +                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
> +
> +    copy->addOperand(Instr->getOperand(1+i));
> +
> +    copies.push_back(copy);
> +  }
> +
> +}
> +
> +///=============================================================================
> +///For a tex inst of the form
> +///va <= op [scalar operands]
> +///the following multi output instruction is created :
> +///[v1, v2] <= op' [scalar operands]
> +///=============================================================================
> +void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
> +                                     std::vector<MachineInstr *>& copies) {
> +  copies.push_back(F.CloneMachineInstr(Instr));
> +
> +  MachineInstr *copy=copies[0];
> +  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
> +
> +  // Remove the dest, that should be a vector operand.
> +  MachineOperand dest = copy->getOperand(0);
> +  unsigned regnum = dest.getReg();
> +
> +  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
> +  copy->RemoveOperand(0);
> +
> +  std::vector<MachineOperand> otherOperands;
> +  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
> +    otherOperands.push_back(copy->getOperand(i));
> +
> +  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
> +    copy->RemoveOperand(0);
> +
> +  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
> +    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
> +
> +  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
> +    copy->addOperand(otherOperands[i]);
> +}
> +
> +///=============================================================================
> +///Look at the vector instruction type and dispatch to the createVec<...>
> +///function that creates the scalar copies.
> +///=============================================================================
> +void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
> +                                    std::vector<MachineInstr *>& copies) {
> +  if (ISVECLOAD(Instr)) {
> +    createLoadCopy(F, Instr, copies);
> +    return;
> +  }
> +  if (ISVECSTORE(Instr)) {
> +    createStoreCopy(F, Instr, copies);
> +    return;
> +  }
> +  if (ISVECSHUFFLE(Instr)) {
> +    createVecShuffle(F, Instr, copies);
> +    return;
> +  }
> +  if (ISVECEXTRACT(Instr)) {
> +    createVecExtract(F, Instr, copies);
> +    return;
> +  }
> +  if (ISVECINSERT(Instr)) {
> +    createVecInsert(F, Instr, copies);
> +    return;
> +  }
> +  if (ISVECDEST(Instr)) {
> +    createVecDest(F, Instr, copies);
> +    return;
> +  }
> +  if (ISVECBUILD(Instr)) {
> +    createVecBuild(F, Instr, copies);
> +    return;
> +  }
> +
> +  unsigned numcopies=numCopiesNeeded(Instr);
> +
> +  for (unsigned i=0; i<numcopies; ++i)
> +    copies.push_back(F.CloneMachineInstr(Instr));
> +
> +  for (unsigned i=0; i<numcopies; ++i) {
> +    MachineInstr *copy = copies[i];
> +
> +    std::vector<MachineOperand> allOperands;
> +    std::vector<bool> isDef;
> +
> +    for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
> +      MachineOperand oper = copy->getOperand(j);
> +      allOperands.push_back(oper);
> +      if (oper.isReg())
> +        isDef.push_back(oper.isDef());
> +      else
> +        isDef.push_back(false);
> +    }
> +
> +    for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
> +      copy->RemoveOperand(0);
> +
> +    copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
> +
> +    for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
> +      MachineOperand oper=allOperands[j];
> +      if (oper.isReg()) {
> +        unsigned regnum = oper.getReg();
> +        if (isVectorRegister(regnum)) {
> +
> +          SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
> +          copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j]));
> +        }
> +        else
> +          copy->addOperand(oper);
> +      }
> +      else
> +        copy->addOperand(oper);
> +    }
> +  }
> +}
> +
> +///=============================================================================
> +///Scan through all basic blocks, looking for vector instructions.
> +///For each vector instruction I, insert the scalar copies before I, and
> +///add I into toRemove vector. Finally remove all instructions in toRemove.
> +///=============================================================================
> +void VectorElementize::elementize(MachineFunction &F) {
> +  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
> +      BI!=BE; ++BI) {
> +    MachineBasicBlock *BB = &*BI;
> +
> +    std::vector<MachineInstr *> copies;
> +    std::vector<MachineInstr *> toRemove;
> +
> +    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
> +        II!=IE; ++II) {
> +      MachineInstr *Instr = &*II;
> +
> +      if (!isVectorInstr(Instr))
> +        continue;
> +
> +      copies.clear();
> +      createCopies(F, Instr, copies);
> +      for (unsigned i=0, e=copies.size(); i!=e; ++i)
> +        BB->insert(II, copies[i]);
> +
> +      assert((copies.size() > 0) && "Problem in createCopies");
> +      toRemove.push_back(Instr);
> +    }
> +    for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
> +      F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
> +  }
> +}
> +
> +///=============================================================================
> +///a <= b
> +///...
> +///...
> +///x <= op(a, ...)
> +///gets converted to
> +///
> +///x <= op(b, ...)
> +///The original move is still present. This works on SSA form machine code.
> +///Note that a <= b should be a simple vreg-to-vreg move instruction.
> +///TBD : I didn't find a function that can do replaceOperand, so I remove
> +///all operands and add all of them again, replacing the one while adding.
> +///=============================================================================
> +unsigned VectorElementize::copyProp(MachineFunction &F) {
> +  unsigned numReplacements = 0;
> +
> +  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
> +      ++BI) {
> +    MachineBasicBlock *BB = &*BI;
> +
> +    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
> +        ++II) {
> +      MachineInstr *Instr = &*II;
> +
> +      // Don't do copy propagation on PHI as it will cause unnecessary
> +      // live range overlap.
> +      if ((Instr->getOpcode() == TargetOpcode::PHI) ||
> +          (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
> +        continue;
> +
> +      bool needsReplacement = false;
> +
> +      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
> +        MachineOperand oper = Instr->getOperand(i);
> +        if (!oper.isReg()) continue;
> +        if (oper.isDef()) continue;
> +        if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
> +
> +        MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
> +
> +        if (!defInstr) continue;
> +
> +        if (!isSimpleMove(defInstr)) continue;
> +
> +        MachineOperand defSrc = defInstr->getOperand(1);
> +        if (!defSrc.isReg()) continue;
> +        if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
> +
> +        needsReplacement = true;
> +
> +      }
> +      if (!needsReplacement) continue;
> +
> +      numReplacements++;
> +
> +      std::vector<MachineOperand> operands;
> +
> +      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
> +        MachineOperand oper = Instr->getOperand(i);
> +        bool flag = false;
> +        do {
> +          if (!(oper.isReg()))
> +            break;
> +          if (oper.isDef())
> +            break;
> +          if (!(RegInfo->isVirtualRegister(oper.getReg())))
> +            break;
> +          MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
> +          if (!(isSimpleMove(defInstr)))
> +            break;
> +          MachineOperand defSrc = defInstr->getOperand(1);
> +          if (!(defSrc.isReg()))
> +            break;
> +          if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
> +            break;
> +          operands.push_back(defSrc);
> +          flag = true;
> +        } while (0);
> +        if (flag == false)
> +          operands.push_back(oper);
> +      }
> +
> +      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
> +        Instr->RemoveOperand(0);
> +      for (unsigned i=0, e=operands.size(); i!=e; ++i)
> +        Instr->addOperand(operands[i]);
> +
> +    }
> +  }
> +  return numReplacements;
> +}
> +
> +///=============================================================================
> +///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
> +///them to deadMoves vector. Then remove all instructions in deadMoves.
> +///=============================================================================
> +unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
> +  std::vector<MachineInstr *> deadMoves;
> +  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
> +      ++BI) {
> +    MachineBasicBlock *BB = &*BI;
> +
> +    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
> +        ++II) {
> +      MachineInstr *Instr = &*II;
> +
> +      if (!isSimpleMove(Instr)) continue;
> +
> +      MachineOperand dest = Instr->getOperand(0);
> +      assert(dest.isReg() && "dest of move not a register");
> +      assert(RegInfo->isVirtualRegister(dest.getReg()) &&
> +             "dest of move not a virtual register");
> +
> +      if (MRI->use_empty(dest.getReg())) {
> +        deadMoves.push_back(Instr);
> +      }
> +    }
> +  }
> +
> +  for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
> +    F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
> +
> +  return deadMoves.size();
> +}
> +
> +///=============================================================================
> +///Main function for this pass.
> +///=============================================================================
> +bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
> +  MRI = &F.getRegInfo();
> +
> +  RegInfo = TM.getRegisterInfo();
> +  InstrInfo = TM.getInstrInfo();
> +
> +  VectorToScalarMap.clear();
> +
> +  elementize(F);
> +
> +  if (RemoveRedundantMoves)
> +    while (1) {
> +      if (copyProp(F) == 0) break;
> +      removeDeadMoves(F);
> +    }
> +
> +  return true;
> +}
> +
> +FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
> +  return new VectorElementize(tm);
> +}
> +
> +unsigned VectorElementize::getScalarVersion(unsigned opcode) {
> +  if (opcode == NVPTX::PHI)
> +    return opcode;
> +  if (opcode == NVPTX::IMPLICIT_DEF)
> +    return opcode;
> +  switch(opcode) {
> +  default:
> +    assert(0 && "Scalar version not set, fix NVPTXVector.td");
> +    return 0;
> +  case TargetOpcode::COPY: return TargetOpcode::COPY;
> +  case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
> +  case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
> +  case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
> +  case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
> +  case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
> +  case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
> +  case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
> +  case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
> +  case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
> +  case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
> +  case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
> +  case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
> +  case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
> +  case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
> +  case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
> +  case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
> +  case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
> +  case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
> +  case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
> +  case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
> +  case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
> +  case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
> +  case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
> +  case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
> +  case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
> +  case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
> +  case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
> +  case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
> +  case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
> +  case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
> +  case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
> +  case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
> +  case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
> +  case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
> +  case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
> +  case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
> +  case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
> +  case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
> +  case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
> +  case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
> +  case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
> +  case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
> +  case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
> +  case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
> +  case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
> +  case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
> +  case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
> +  case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
> +  case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
> +  case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
> +  case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
> +  case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
> +  case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
> +  case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
> +  case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
> +  case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
> +  case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
> +  case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
> +  case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
> +  case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
> +  case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
> +  case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
> +  case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
> +  case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
> +  case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
> +  case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
> +  case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
> +  case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
> +  case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
> +  case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
> +  case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
> +  case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
> +  case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
> +  case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
> +  case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
> +  case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
> +  case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
> +  case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
> +  case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
> +  case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
> +  case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
> +  case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
> +  case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
> +  case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
> +  case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
> +  case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
> +  case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
> +  case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
> +  case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
> +  case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
> +  case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
> +  case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
> +  case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
> +  case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
> +  case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
> +  case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
> +  case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
> +  case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
> +  case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
> +  case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
> +  case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
> +  case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
> +  case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
> +  case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
> +  case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
> +  case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
> +  case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
> +  case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
> +  case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
> +  case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
> +  case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
> +  case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
> +  case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
> +  case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
> +  case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
> +  case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
> +  case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
> +  case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
> +  case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
> +  case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
> +  case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
> +  case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
> +  case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
> +  case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
> +  case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
> +  case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
> +  case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
> +  case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
> +  case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
> +  case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
> +  case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
> +  case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
> +  case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
> +  case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
> +  case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
> +  case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
> +  case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
> +  case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
> +  case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
> +  case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
> +  case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
> +  case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
> +  case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
> +  case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
> +  case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
> +  case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
> +  case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
> +  case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
> +  case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
> +  case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
> +  case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
> +  case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
> +  case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
> +  case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
> +  case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
> +  case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
> +  case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
> +  case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
> +  case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
> +  case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
> +  case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
> +  case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
> +  case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
> +  case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
> +  case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
> +  case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
> +  case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
> +  case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
> +  case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
> +  case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
> +  case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
> +  case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
> +  case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
> +  case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
> +  case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
> +  case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
> +  case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
> +  case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
> +  case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
> +  case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
> +  case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
> +  case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
> +  case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
> +  case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
> +  case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
> +  case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
> +  case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
> +  case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
> +  case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
> +  case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
> +  case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
> +  case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
> +  case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
> +  case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
> +  case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
> +  case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
> +  case NVPTX::VNegV2I16: return NVPTX::INEG16;
> +  case NVPTX::VNegV2I32: return NVPTX::INEG32;
> +  case NVPTX::VNegV2I64: return NVPTX::INEG64;
> +  case NVPTX::VNegV2I8: return NVPTX::INEG8;
> +  case NVPTX::VNegV4I16: return NVPTX::INEG16;
> +  case NVPTX::VNegV4I32: return NVPTX::INEG32;
> +  case NVPTX::VNegV4I8: return NVPTX::INEG8;
> +  case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
> +  case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
> +  case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
> +  case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
> +  case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
> +  case NVPTX::VNotV2I16: return NVPTX::NOT16;
> +  case NVPTX::VNotV2I32: return NVPTX::NOT32;
> +  case NVPTX::VNotV2I64: return NVPTX::NOT64;
> +  case NVPTX::VNotV2I8: return NVPTX::NOT8;
> +  case NVPTX::VNotV4I16: return NVPTX::NOT16;
> +  case NVPTX::VNotV4I32: return NVPTX::NOT32;
> +  case NVPTX::VNotV4I8: return NVPTX::NOT8;
> +  case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
> +  case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
> +  case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
> +  case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
> +  case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
> +  case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
> +  case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
> +  case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
> +  case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
> +  case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
> +  case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
> +  case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
> +  case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
> +  case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
> +  case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
> +  case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
> +  case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
> +  case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
> +  case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
> +  case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
> +  case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
> +  case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
> +  case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
> +  case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
> +  case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
> +  case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
> +  case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
> +  case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
> +  case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
> +  case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
> +  case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
> +  case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
> +  case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
> +  case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
> +  case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
> +  case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
> +  case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
> +  case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
> +  case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
> +  case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
> +  case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
> +  case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
> +  case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
> +  case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
> +  case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
> +  case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
> +  case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
> +  case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
> +  case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
> +  case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
> +  case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
> +  case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
> +  case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
> +  case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
> +  case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
> +  case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
> +  case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
> +  case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
> +  case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
> +  case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
> +  case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
> +  case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
> +  case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
> +  case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
> +  case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
> +  case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
> +  case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
> +  case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
> +  case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
> +  case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
> +  case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
> +  case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
> +  case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
> +  case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
> +  case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
> +  case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
> +  case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
> +  case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
> +  case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
> +  case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
> +  case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
> +  case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
> +  case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
> +  case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
> +  case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
> +  case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
> +  case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
> +  case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
> +  case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
> +  case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
> +  case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
> +  case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
> +  case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
> +  case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
> +  case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
> +  case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
> +  case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
> +  case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
> +  case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
> +  case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
> +  case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
> +  case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
> +  case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
> +  case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
> +  case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
> +  case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
> +  case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
> +  case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
> +  case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
> +  case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
> +  case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
> +  case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
> +  case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
> +  case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
> +  case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
> +  case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
> +  case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
> +  case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
> +  case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
> +  case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
> +  case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
> +  case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
> +  case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
> +  case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
> +  case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
> +  case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
> +  case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
> +  case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
> +  case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
> +  case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
> +  case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
> +  case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
> +  case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
> +  case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
> +  case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
> +  case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
> +  case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
> +  case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
> +  case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
> +  case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
> +  case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
> +  case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
> +  case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
> +  case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
> +  case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
> +  case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
> +  case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
> +  case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
> +  case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
> +  case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
> +  case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
> +
> +  case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
> +  case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
> +  case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
> +  case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
> +  case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
> +  case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
> +  case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
> +  case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
> +  case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
> +  case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
> +  case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
> +  case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
> +  case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
> +  case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
> +  case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
> +  case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
> +  case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
> +  case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
> +  case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
> +  case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
> +  case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
> +  case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
> +  case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
> +  case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
> +  case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
> +  case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
> +  case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
> +  case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
> +  case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
> +  case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
> +  case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
> +  case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
> +  case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
> +  case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
> +  case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
> +  case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
> +
> +  case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
> +  case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
> +  case NVPTX::LD_v2i8_ari:  return NVPTX::LDV_i8_v2_ari;
> +  case NVPTX::LD_v2i8_asi:  return NVPTX::LDV_i8_v2_asi;
> +  case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
> +  case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
> +  case NVPTX::LD_v4i8_ari:  return NVPTX::LDV_i8_v4_ari;
> +  case NVPTX::LD_v4i8_asi:  return NVPTX::LDV_i8_v4_asi;
> +
> +  case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
> +  case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
> +  case NVPTX::LD_v2i16_ari:  return NVPTX::LDV_i16_v2_ari;
> +  case NVPTX::LD_v2i16_asi:  return NVPTX::LDV_i16_v2_asi;
> +  case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
> +  case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
> +  case NVPTX::LD_v4i16_ari:  return NVPTX::LDV_i16_v4_ari;
> +  case NVPTX::LD_v4i16_asi:  return NVPTX::LDV_i16_v4_asi;
> +
> +  case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
> +  case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
> +  case NVPTX::LD_v2i32_ari:  return NVPTX::LDV_i32_v2_ari;
> +  case NVPTX::LD_v2i32_asi:  return NVPTX::LDV_i32_v2_asi;
> +  case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
> +  case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
> +  case NVPTX::LD_v4i32_ari:  return NVPTX::LDV_i32_v4_ari;
> +  case NVPTX::LD_v4i32_asi:  return NVPTX::LDV_i32_v4_asi;
> +
> +  case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
> +  case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
> +  case NVPTX::LD_v2f32_ari:  return NVPTX::LDV_f32_v2_ari;
> +  case NVPTX::LD_v2f32_asi:  return NVPTX::LDV_f32_v2_asi;
> +  case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
> +  case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
> +  case NVPTX::LD_v4f32_ari:  return NVPTX::LDV_f32_v4_ari;
> +  case NVPTX::LD_v4f32_asi:  return NVPTX::LDV_f32_v4_asi;
> +
> +  case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
> +  case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
> +  case NVPTX::LD_v2i64_ari:  return NVPTX::LDV_i64_v2_ari;
> +  case NVPTX::LD_v2i64_asi:  return NVPTX::LDV_i64_v2_asi;
> +  case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
> +  case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
> +  case NVPTX::LD_v2f64_ari:  return NVPTX::LDV_f64_v2_ari;
> +  case NVPTX::LD_v2f64_asi:  return NVPTX::LDV_f64_v2_asi;
> +
> +  case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
> +  case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
> +  case NVPTX::ST_v2i8_ari:  return NVPTX::STV_i8_v2_ari;
> +  case NVPTX::ST_v2i8_asi:  return NVPTX::STV_i8_v2_asi;
> +  case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
> +  case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
> +  case NVPTX::ST_v4i8_ari:  return NVPTX::STV_i8_v4_ari;
> +  case NVPTX::ST_v4i8_asi:  return NVPTX::STV_i8_v4_asi;
> +
> +  case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
> +  case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
> +  case NVPTX::ST_v2i16_ari:  return NVPTX::STV_i16_v2_ari;
> +  case NVPTX::ST_v2i16_asi:  return NVPTX::STV_i16_v2_asi;
> +  case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
> +  case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
> +  case NVPTX::ST_v4i16_ari:  return NVPTX::STV_i16_v4_ari;
> +  case NVPTX::ST_v4i16_asi:  return NVPTX::STV_i16_v4_asi;
> +
> +  case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
> +  case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
> +  case NVPTX::ST_v2i32_ari:  return NVPTX::STV_i32_v2_ari;
> +  case NVPTX::ST_v2i32_asi:  return NVPTX::STV_i32_v2_asi;
> +  case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
> +  case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
> +  case NVPTX::ST_v4i32_ari:  return NVPTX::STV_i32_v4_ari;
> +  case NVPTX::ST_v4i32_asi:  return NVPTX::STV_i32_v4_asi;
> +
> +  case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
> +  case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
> +  case NVPTX::ST_v2f32_ari:  return NVPTX::STV_f32_v2_ari;
> +  case NVPTX::ST_v2f32_asi:  return NVPTX::STV_f32_v2_asi;
> +  case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
> +  case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
> +  case NVPTX::ST_v4f32_ari:  return NVPTX::STV_f32_v4_ari;
> +  case NVPTX::ST_v4f32_asi:  return NVPTX::STV_f32_v4_asi;
> +
> +  case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
> +  case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
> +  case NVPTX::ST_v2i64_ari:  return NVPTX::STV_i64_v2_ari;
> +  case NVPTX::ST_v2i64_asi:  return NVPTX::STV_i64_v2_asi;
> +  case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
> +  case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
> +  case NVPTX::ST_v2f64_ari:  return NVPTX::STV_f64_v2_ari;
> +  case NVPTX::ST_v2f64_asi:  return NVPTX::STV_f64_v2_asi;
> +  }
> +  return 0;
> +}
>
> Added: llvm/trunk/lib/Target/NVPTX/cl_common_defines.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/cl_common_defines.h?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/cl_common_defines.h (added)
> +++ llvm/trunk/lib/Target/NVPTX/cl_common_defines.h Fri May  4 15:18:50 2012
> @@ -0,0 +1,125 @@
> +#ifndef __CL_COMMON_DEFINES_H__
> +#define __CL_COMMON_DEFINES_H__
> +// This file includes defines that are common to both kernel code and
> +// the NVPTX back-end.
> +
> +//
> +// Common defines for Image intrinsics
> +// Channel order
> +enum {
> +  CLK_R = 0x10B0,
> +  CLK_A = 0x10B1,
> +  CLK_RG = 0x10B2,
> +  CLK_RA = 0x10B3,
> +  CLK_RGB = 0x10B4,
> +  CLK_RGBA = 0x10B5,
> +  CLK_BGRA = 0x10B6,
> +  CLK_ARGB = 0x10B7,
> +
> +#if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0)
> +  CLK_xRGB = 0x10B7,
> +#endif
> +
> +  CLK_INTENSITY = 0x10B8,
> +  CLK_LUMINANCE = 0x10B9
> +
> +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
> +  ,
> +  CLK_Rx = 0x10BA,
> +  CLK_RGx = 0x10BB,
> +  CLK_RGBx = 0x10BC
> +#endif
> +};
> +
> +
> +typedef enum clk_channel_type {
> +  // valid formats for float return types
> +  CLK_SNORM_INT8 = 0x10D0,            // four channel RGBA unorm8
> +  CLK_SNORM_INT16 = 0x10D1,           // four channel RGBA unorm16
> +  CLK_UNORM_INT8 = 0x10D2,            // four channel RGBA unorm8
> +  CLK_UNORM_INT16 = 0x10D3,           // four channel RGBA unorm16
> +  CLK_HALF_FLOAT = 0x10DD,            // four channel RGBA half
> +  CLK_FLOAT = 0x10DE,                 // four channel RGBA float
> +
> +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
> +  CLK_UNORM_SHORT_565 = 0x10D4,
> +  CLK_UNORM_SHORT_555 = 0x10D5,
> +  CLK_UNORM_INT_101010 = 0x10D6,
> +#endif
> +
> +  // valid only for integer return types
> +  CLK_SIGNED_INT8 =  0x10D7,
> +  CLK_SIGNED_INT16 = 0x10D8,
> +  CLK_SIGNED_INT32 = 0x10D9,
> +  CLK_UNSIGNED_INT8 = 0x10DA,
> +  CLK_UNSIGNED_INT16 = 0x10DB,
> +  CLK_UNSIGNED_INT32 = 0x10DC,
> +
> +  // CI SPI for CPU
> +  __CLK_UNORM_INT8888 ,         // four channel ARGB unorm8
> +  __CLK_UNORM_INT8888R,        // four channel BGRA unorm8
> +
> +  __CLK_VALID_IMAGE_TYPE_COUNT,
> +  __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
> +  __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4,         // number of bits required to
> +                                                // represent any image type
> +  __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
> +}clk_channel_type;
> +
> +typedef enum clk_sampler_type {
> +    __CLK_ADDRESS_BASE             = 0,
> +    CLK_ADDRESS_NONE               = 0 << __CLK_ADDRESS_BASE,
> +    CLK_ADDRESS_CLAMP              = 1 << __CLK_ADDRESS_BASE,
> +    CLK_ADDRESS_CLAMP_TO_EDGE      = 2 << __CLK_ADDRESS_BASE,
> +    CLK_ADDRESS_REPEAT             = 3 << __CLK_ADDRESS_BASE,
> +    CLK_ADDRESS_MIRROR             = 4 << __CLK_ADDRESS_BASE,
> +
> +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
> +    CLK_ADDRESS_MIRRORED_REPEAT    = CLK_ADDRESS_MIRROR,
> +#endif
> +    __CLK_ADDRESS_MASK             = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
> +                                     CLK_ADDRESS_CLAMP_TO_EDGE |
> +                                     CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
> +    __CLK_ADDRESS_BITS             = 3,        // number of bits required to
> +                                               // represent address info
> +
> +    __CLK_NORMALIZED_BASE          = __CLK_ADDRESS_BITS,
> +    CLK_NORMALIZED_COORDS_FALSE    = 0,
> +    CLK_NORMALIZED_COORDS_TRUE     = 1 << __CLK_NORMALIZED_BASE,
> +    __CLK_NORMALIZED_MASK          = CLK_NORMALIZED_COORDS_FALSE |
> +                                     CLK_NORMALIZED_COORDS_TRUE,
> +    __CLK_NORMALIZED_BITS          = 1,        // number of bits required to
> +                                               // represent normalization
> +
> +    __CLK_FILTER_BASE              = __CLK_NORMALIZED_BASE +
> +                                     __CLK_NORMALIZED_BITS,
> +    CLK_FILTER_NEAREST             = 0 << __CLK_FILTER_BASE,
> +    CLK_FILTER_LINEAR              = 1 << __CLK_FILTER_BASE,
> +    CLK_FILTER_ANISOTROPIC         = 2 << __CLK_FILTER_BASE,
> +    __CLK_FILTER_MASK              = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
> +                                     CLK_FILTER_ANISOTROPIC,
> +    __CLK_FILTER_BITS              = 2,        // number of bits required to
> +                                               // represent address info
> +
> +    __CLK_MIP_BASE                 = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
> +    CLK_MIP_NEAREST                = 0 << __CLK_MIP_BASE,
> +    CLK_MIP_LINEAR                 = 1 << __CLK_MIP_BASE,
> +    CLK_MIP_ANISOTROPIC            = 2 << __CLK_MIP_BASE,
> +    __CLK_MIP_MASK                 = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
> +                                     CLK_MIP_ANISOTROPIC,
> +    __CLK_MIP_BITS                 = 2,
> +
> +    __CLK_SAMPLER_BITS             = __CLK_MIP_BASE + __CLK_MIP_BITS,
> +    __CLK_SAMPLER_MASK             = __CLK_MIP_MASK | __CLK_FILTER_MASK |
> +                                     __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
> +
> +    __CLK_ANISOTROPIC_RATIO_BITS   = 5,
> +    __CLK_ANISOTROPIC_RATIO_MASK   = (int) 0x80000000 >>
> +                                      (__CLK_ANISOTROPIC_RATIO_BITS-1)
> +} clk_sampler_type;
> +
> +// Memory synchronization
> +#define CLK_LOCAL_MEM_FENCE     (1 << 0)
> +#define CLK_GLOBAL_MEM_FENCE    (1 << 1)
> +
> +#endif // __CL_COMMON_DEFINES_H__
>
> Added: llvm/trunk/lib/Target/NVPTX/gen-register-defs.py
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/gen-register-defs.py?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/gen-register-defs.py (added)
> +++ llvm/trunk/lib/Target/NVPTX/gen-register-defs.py Fri May  4 15:18:50 2012
> @@ -0,0 +1,202 @@
> +#!/usr/bin/env python
> +
> +num_regs = 396
> +
> +outFile = open('NVPTXRegisterInfo.td', 'w')
> +
> +outFile.write('''
> +//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +//===----------------------------------------------------------------------===//
> +//  Declarations that describe the PTX register file
> +//===----------------------------------------------------------------------===//
> +
> +class NVPTXReg<string n> : Register<n> {
> +  let Namespace = "NVPTX";
> +}
> +
> +class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
> +     : RegisterClass <"NVPTX", regTypes, alignment, regList>;
> +
> +//===----------------------------------------------------------------------===//
> +//  Registers
> +//===----------------------------------------------------------------------===//
> +
> +// Special Registers used as stack pointer
> +def VRFrame         : NVPTXReg<"%SP">;
> +def VRFrameLocal    : NVPTXReg<"%SPL">;
> +
> +// Special Registers used as the stack
> +def VRDepot  : NVPTXReg<"%Depot">;
> +''')
> +
> +# Predicates
> +outFile.write('''
> +//===--- Predicate --------------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
> +
> +# Int8
> +outFile.write('''
> +//===--- 8-bit ------------------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
> +
> +# Int16
> +outFile.write('''
> +//===--- 16-bit -----------------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
> +
> +# Int32
> +outFile.write('''
> +//===--- 32-bit -----------------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
> +
> +# Int64
> +outFile.write('''
> +//===--- 64-bit -----------------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
> +
> +# F32
> +outFile.write('''
> +//===--- 32-bit float -----------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
> +
> +# F64
> +outFile.write('''
> +//===--- 64-bit float -----------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
> +
> +# Vector registers
> +outFile.write('''
> +//===--- Vector -----------------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
> +
> +for i in range(0, num_regs):
> +  outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
> +
> +# Argument registers
> +outFile.write('''
> +//===--- Arguments --------------------------------------------------------===//
> +''')
> +for i in range(0, num_regs):
> +  outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
> +for i in range(0, num_regs):
> +  outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
> +
> +outFile.write('''
> +//===----------------------------------------------------------------------===//
> +//  Register classes
> +//===----------------------------------------------------------------------===//
> +''')
> +
> +outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
> +
> +outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
> +
> +outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
> +outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
> +
> +outFile.write('''
> +// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
> +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
> +''')
> +
> +outFile.write('''
> +class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
> +                       NVPTXRegClass sClass,
> +                       int e,
> +                       string n>
> +  : NVPTXRegClass<regTypes, alignment, regList>
> +{
> +  NVPTXRegClass scalarClass=sClass;
> +  int elems=e;
> +  string name=n;
> +}
> +''')
> +
> +
> +outFile.write('def V2F32Regs\n  : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n    Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
> +outFile.write('def V4F32Regs\n  : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n    Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
> +
> +outFile.write('def V2I32Regs\n  : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n    Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
> +outFile.write('def V4I32Regs\n  : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n    Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
> +
> +outFile.write('def V2F64Regs\n  : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n    Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
> +outFile.write('def V2I64Regs\n  : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n    Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
> +
> +outFile.write('def V2I16Regs\n  : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n    Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
> +outFile.write('def V4I16Regs\n  : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n    Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
> +
> +outFile.write('def V2I8Regs\n  : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n    Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
> +outFile.write('def V4I8Regs\n  : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n    Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
> +
> +outFile.close()
> +
> +
> +outFile = open('NVPTXNumRegisters.h', 'w')
> +outFile.write('''
> +//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef NVPTX_NUM_REGISTERS_H
> +#define NVPTX_NUM_REGISTERS_H
> +
> +namespace llvm {
> +
> +const unsigned NVPTXNumRegisters = %d;
> +
> +}
> +
> +#endif
> +''' % num_regs)
> +
> +outFile.close()
>
> Modified: llvm/trunk/projects/sample/autoconf/configure.ac
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/projects/sample/autoconf/configure.ac?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/projects/sample/autoconf/configure.ac (original)
> +++ llvm/trunk/projects/sample/autoconf/configure.ac Fri May  4 15:18:50 2012
> @@ -310,6 +310,7 @@
>   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
>   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
>   ptx-*)                  llvm_cv_target_arch="PTX" ;;
> +  nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
>   *)                      llvm_cv_target_arch="Unknown" ;;
>  esac])
>
> @@ -457,6 +458,7 @@
>     Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
>     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
>     PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
> +    NVPTX)       AC_SUBST(TARGET_HAS_JIT,0) ;;
>     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
>   esac
>  fi
> @@ -567,13 +569,13 @@
>  AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
>     [Build specific host targets: all or target1,target2,... Valid targets are:
>      host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
> -     xcore, msp430, ptx, cbe, and cpp (default=all)]),,
> +     xcore, msp430, ptx, nvptx, cbe, and cpp (default=all)]),,
>     enableval=all)
>  if test "$enableval" = host-only ; then
>   enableval=host
>  fi
>  case "$enableval" in
> -  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
> +  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
>   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
>       case "$a_target" in
>         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -589,6 +591,7 @@
>         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
>         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
>         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +        nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>         host) case "$llvm_cv_target_arch" in
>             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -602,6 +605,7 @@
>             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
>             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
>             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +            NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>             *)       AC_MSG_ERROR([Can not set target to build]) ;;
>           esac ;;
>         *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
>
> Modified: llvm/trunk/projects/sample/configure
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/projects/sample/configure?rev=156196&r1=156195&r2=156196&view=diff
> ==============================================================================
> --- llvm/trunk/projects/sample/configure (original)
> +++ llvm/trunk/projects/sample/configure Fri May  4 15:18:50 2012
> @@ -1402,7 +1402,8 @@
>   --enable-targets        Build specific host targets: all or
>                           target1,target2,... Valid targets are: host, x86,
>                           x86_64, sparc, powerpc, arm, mips, spu, hexagon,
> -                          xcore, msp430, ptx, cbe, and cpp (default=all)
> +                          xcore, msp430, ptx, nvptx, cbe, and cpp
> +                          (default=all)
>   --enable-bindings       Build specific language bindings:
>                           all,auto,none,{binding-name} (default=auto)
>   --enable-libffi         Check for the presence of libffi (default is NO)
> @@ -3846,6 +3847,7 @@
>   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
>   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
>   ptx-*)                  llvm_cv_target_arch="PTX" ;;
> +  nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
>   *)                      llvm_cv_target_arch="Unknown" ;;
>  esac
>  fi
> @@ -5070,6 +5072,8 @@
>  ;;
>     PTX)         TARGET_HAS_JIT=0
>  ;;
> +    NVPTX)       TARGET_HAS_JIT=0
> + ;;
>     *)           TARGET_HAS_JIT=0
>  ;;
>   esac
> @@ -5254,7 +5258,7 @@
>   enableval=host
>  fi
>  case "$enableval" in
> -  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
> +  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
>   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
>       case "$a_target" in
>         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -5270,6 +5274,7 @@
>         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
>         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
>         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +        nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>         host) case "$llvm_cv_target_arch" in
>             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
>             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
> @@ -5283,6 +5288,7 @@
>             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
>             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
>             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
> +            NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
>             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
>  echo "$as_me: error: Can not set target to build" >&2;}
>    { (exit 1); exit 1; }; } ;;
> @@ -10307,7 +10313,7 @@
>   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>   lt_status=$lt_dlunknown
>   cat > conftest.$ac_ext <<EOF
> -#line 10303 "configure"
> +#line 10316 "configure"
>  #include "confdefs.h"
>
>  #if HAVE_DLFCN_H
>
> Added: llvm/trunk/test/CodeGen/NVPTX/annotations.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/annotations.ll?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/annotations.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/annotations.ll Fri May  4 15:18:50 2012
> @@ -0,0 +1,55 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
> +
> +
> + at texture = internal addrspace(1) global i64 0, align 8
> +; CHECK: .global .texref texture
> + at surface = internal addrspace(1) global i64 0, align 8
> +; CHECK: .global .surfref surface
> +
> +
> +; CHECK: .entry kernel_func_maxntid
> +define void @kernel_func_maxntid(float* %a) {
> +; CHECK: .maxntid 10, 20, 30
> +; CHECK: ret
> +  ret void
> +}
> +
> +; CHECK: .entry kernel_func_reqntid
> +define void @kernel_func_reqntid(float* %a) {
> +; CHECK: .reqntid 11, 22, 33
> +; CHECK: ret
> +  ret void
> +}
> +
> +; CHECK: .entry kernel_func_minctasm
> +define void @kernel_func_minctasm(float* %a) {
> +; CHECK: .minnctapersm 42
> +; CHECK: ret
> +  ret void
> +}
> +
> +
> +
> +!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8}
> +
> +!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1}
> +!2 = metadata !{void (float*)* @kernel_func_maxntid,
> +                metadata !"maxntidx", i32 10,
> +                metadata !"maxntidy", i32 20,
> +                metadata !"maxntidz", i32 30}
> +
> +!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1}
> +!4 = metadata !{void (float*)* @kernel_func_reqntid,
> +                metadata !"reqntidx", i32 11,
> +                metadata !"reqntidy", i32 22,
> +                metadata !"reqntidz", i32 33}
> +
> +!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1}
> +!6 = metadata !{void (float*)* @kernel_func_minctasm,
> +                metadata !"minctasm", i32 42}
> +
> +!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1}
> +!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1}
>
> Added: llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll Fri May  4 15:18:50 2012
> @@ -0,0 +1,72 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
> +
> +;; These tests should run for all targets
> +
> +;;===-- Basic instruction selection tests ---------------------------------===;;
> +
> +
> +;;; f64
> +
> +define double @fadd_f64(double %a, double %b) {
> +; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fadd double %a, %b
> +  ret double %ret
> +}
> +
> +define double @fsub_f64(double %a, double %b) {
> +; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fsub double %a, %b
> +  ret double %ret
> +}
> +
> +define double @fmul_f64(double %a, double %b) {
> +; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fmul double %a, %b
> +  ret double %ret
> +}
> +
> +define double @fdiv_f64(double %a, double %b) {
> +; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fdiv double %a, %b
> +  ret double %ret
> +}
> +
> +;; PTX does not have a floating-point rem instruction
> +
> +
> +;;; f32
> +
> +define float @fadd_f32(float %a, float %b) {
> +; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fadd float %a, %b
> +  ret float %ret
> +}
> +
> +define float @fsub_f32(float %a, float %b) {
> +; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fsub float %a, %b
> +  ret float %ret
> +}
> +
> +define float @fmul_f32(float %a, float %b) {
> +; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fmul float %a, %b
> +  ret float %ret
> +}
> +
> +define float @fdiv_f32(float %a, float %b) {
> +; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fdiv float %a, %b
> +  ret float %ret
> +}
> +
> +;; PTX does not have a floating-point rem instruction
>
> Added: llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll Fri May  4 15:18:50 2012
> @@ -0,0 +1,72 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
> +
> +;; These tests should run for all targets
> +
> +;;===-- Basic instruction selection tests ---------------------------------===;;
> +
> +
> +;;; f64
> +
> +define double @fadd_f64(double %a, double %b) {
> +; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fadd double %a, %b
> +  ret double %ret
> +}
> +
> +define double @fsub_f64(double %a, double %b) {
> +; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fsub double %a, %b
> +  ret double %ret
> +}
> +
> +define double @fmul_f64(double %a, double %b) {
> +; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fmul double %a, %b
> +  ret double %ret
> +}
> +
> +define double @fdiv_f64(double %a, double %b) {
> +; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fdiv double %a, %b
> +  ret double %ret
> +}
> +
> +;; PTX does not have a floating-point rem instruction
> +
> +
> +;;; f32
> +
> +define float @fadd_f32(float %a, float %b) {
> +; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fadd float %a, %b
> +  ret float %ret
> +}
> +
> +define float @fsub_f32(float %a, float %b) {
> +; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fsub float %a, %b
> +  ret float %ret
> +}
> +
> +define float @fmul_f32(float %a, float %b) {
> +; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fmul float %a, %b
> +  ret float %ret
> +}
> +
> +define float @fdiv_f32(float %a, float %b) {
> +; CHECK: div.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
> +; CHECK: ret
> +  %ret = fdiv float %a, %b
> +  ret float %ret
> +}
> +
> +;; PTX does not have a floating-point rem instruction
>
> Added: llvm/trunk/test/CodeGen/NVPTX/arithmetic-int.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/arithmetic-int.ll?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/arithmetic-int.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/arithmetic-int.ll Fri May  4 15:18:50 2012
> @@ -0,0 +1,295 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
> +
> +;; These tests should run for all targets
> +
> +;;===-- Basic instruction selection tests ---------------------------------===;;
> +
> +
> +;;; i64
> +
> +define i64 @add_i64(i64 %a, i64 %b) {
> +; CHECK: add.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = add i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @sub_i64(i64 %a, i64 %b) {
> +; CHECK: sub.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = sub i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @mul_i64(i64 %a, i64 %b) {
> +; CHECK: mul.lo.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = mul i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @sdiv_i64(i64 %a, i64 %b) {
> +; CHECK: div.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = sdiv i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @udiv_i64(i64 %a, i64 %b) {
> +; CHECK: div.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = udiv i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @srem_i64(i64 %a, i64 %b) {
> +; CHECK: rem.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = srem i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @urem_i64(i64 %a, i64 %b) {
> +; CHECK: rem.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = urem i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @and_i64(i64 %a, i64 %b) {
> +; CHECK: and.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = and i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @or_i64(i64 %a, i64 %b) {
> +; CHECK: or.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = or i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @xor_i64(i64 %a, i64 %b) {
> +; CHECK: xor.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
> +; CHECK: ret
> +  %ret = xor i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @shl_i64(i64 %a, i64 %b) {
> +; PTX requires 32-bit shift amount
> +; CHECK: shl.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = shl i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @ashr_i64(i64 %a, i64 %b) {
> +; PTX requires 32-bit shift amount
> +; CHECK: shr.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = ashr i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +define i64 @lshr_i64(i64 %a, i64 %b) {
> +; PTX requires 32-bit shift amount
> +; CHECK: shr.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = lshr i64 %a, %b
> +  ret i64 %ret
> +}
> +
> +
> +;;; i32
> +
> +define i32 @add_i32(i32 %a, i32 %b) {
> +; CHECK: add.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = add i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @sub_i32(i32 %a, i32 %b) {
> +; CHECK: sub.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = sub i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @mul_i32(i32 %a, i32 %b) {
> +; CHECK: mul.lo.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = mul i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @sdiv_i32(i32 %a, i32 %b) {
> +; CHECK: div.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = sdiv i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @udiv_i32(i32 %a, i32 %b) {
> +; CHECK: div.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = udiv i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @srem_i32(i32 %a, i32 %b) {
> +; CHECK: rem.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = srem i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @urem_i32(i32 %a, i32 %b) {
> +; CHECK: rem.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = urem i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @and_i32(i32 %a, i32 %b) {
> +; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = and i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @or_i32(i32 %a, i32 %b) {
> +; CHECK: or.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = or i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @xor_i32(i32 %a, i32 %b) {
> +; CHECK: xor.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = xor i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @shl_i32(i32 %a, i32 %b) {
> +; CHECK: shl.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = shl i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @ashr_i32(i32 %a, i32 %b) {
> +; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = ashr i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +define i32 @lshr_i32(i32 %a, i32 %b) {
> +; CHECK: shr.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = lshr i32 %a, %b
> +  ret i32 %ret
> +}
> +
> +;;; i16
> +
> +define i16 @add_i16(i16 %a, i16 %b) {
> +; CHECK: add.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = add i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @sub_i16(i16 %a, i16 %b) {
> +; CHECK: sub.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = sub i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @mul_i16(i16 %a, i16 %b) {
> +; CHECK: mul.lo.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = mul i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @sdiv_i16(i16 %a, i16 %b) {
> +; CHECK: div.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = sdiv i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @udiv_i16(i16 %a, i16 %b) {
> +; CHECK: div.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = udiv i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @srem_i16(i16 %a, i16 %b) {
> +; CHECK: rem.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = srem i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @urem_i16(i16 %a, i16 %b) {
> +; CHECK: rem.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = urem i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @and_i16(i16 %a, i16 %b) {
> +; CHECK: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = and i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @or_i16(i16 %a, i16 %b) {
> +; CHECK: or.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = or i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @xor_i16(i16 %a, i16 %b) {
> +; CHECK: xor.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
> +; CHECK: ret
> +  %ret = xor i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @shl_i16(i16 %a, i16 %b) {
> +; PTX requires 32-bit shift amount
> +; CHECK: shl.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = shl i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @ashr_i16(i16 %a, i16 %b) {
> +; PTX requires 32-bit shift amount
> +; CHECK: shr.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = ashr i16 %a, %b
> +  ret i16 %ret
> +}
> +
> +define i16 @lshr_i16(i16 %a, i16 %b) {
> +; PTX requires 32-bit shift amount
> +; CHECK: shr.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
> +; CHECK: ret
> +  %ret = lshr i16 %a, %b
> +  ret i16 %ret
> +}
>
> Added: llvm/trunk/test/CodeGen/NVPTX/calling-conv.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/calling-conv.ll?rev=156196&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/calling-conv.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/calling-conv.ll Fri May  4 15:18:50 2012
> @@ -0,0 +1,32 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
> +
> +
> +;; Kernel function using ptx_kernel calling conv
> +
> +; CHECK: .entry kernel_func
> +define ptx_kernel void @kernel_func(float* %a) {
> +; CHECK: ret
> +  ret void
> +}
> +
> +;; Device function
> +; CHECK: .func device_func
> +define void @device_func(float* %a) {
> +; CHECK: ret
> +  ret void
> +}
> +
> +;; Kernel function using NVVM metadata
> +; CHECK: .entry metadata_kernel
> +define void @metadata_kernel(float* %a) {
> +; CHECK: ret
> +  ret void
> +}
> +
> +
> +!nvvm.annotations = !{!1}
> +
> +!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list