[llvm-branch-commits] [llvm-branch] r161895 [2/5] - in /llvm/branches/AMDILBackend/lib/Target: ./ AMDIL/ AMDIL/TargetInfo/

Victor Oliveira Victor.Oliveira at amd.com
Tue Aug 14 14:38:59 PDT 2012


Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1181 @@
+//===-- AMDILIntrinsics.td ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the amdil-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+//------------- Synchronization Functions - OpenCL 6.11.9 --------------------//
+  def int_AMDIL_fence   : GCCBuiltin<"__amdil_mem_fence">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_fence_global   : GCCBuiltin<"__amdil_mem_fence_global">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_fence_local   : GCCBuiltin<"__amdil_mem_fence_local">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_fence_region   : GCCBuiltin<"__amdil_mem_fence_region">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_fence_global_local   : GCCBuiltin<"__amdil_mem_fence_global_local">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_fence_region_global   : GCCBuiltin<"__amdil_mem_fence_region_global">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_fence_region_local   : GCCBuiltin<"__amdil_mem_fence_region_local">,
+        UnaryIntNoRetInt;
+
+  def int_AMDIL_read_fence   : GCCBuiltin<"__amdil_read_mem_fence">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_read_fence_global   : GCCBuiltin<"__amdil_read_mem_fence_global">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_read_fence_local   : GCCBuiltin<"__amdil_read_mem_fence_local">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_read_fence_region   : GCCBuiltin<"__amdil_read_mem_fence_region">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_read_fence_global_local   : GCCBuiltin<"__amdil_read_mem_fence_global_local">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_read_fence_region_global   : GCCBuiltin<"__amdil_read_mem_fence_region_global">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_read_fence_region_local   : GCCBuiltin<"__amdil_read_mem_fence_region_local">,
+        UnaryIntNoRetInt;
+
+  def int_AMDIL_write_fence   : GCCBuiltin<"__amdil_write_mem_fence">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_write_fence_global   : GCCBuiltin<"__amdil_write_mem_fence_global">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_write_fence_local   : GCCBuiltin<"__amdil_write_mem_fence_local">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_write_fence_region   : GCCBuiltin<"__amdil_write_mem_fence_region">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_write_fence_global_local   : GCCBuiltin<"__amdil_write_mem_fence_global_local">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_write_fence_region_global   : GCCBuiltin<"__amdil_write_mem_fence_region_global">,
+        UnaryIntNoRetInt;
+  def int_AMDIL_write_fence_region_local   : GCCBuiltin<"__amdil_write_mem_fence_region_local">,
+        UnaryIntNoRetInt;
+
+  def int_AMDIL_early_exit : GCCBuiltin<"__amdil_early_exit">,
+        UnaryIntNoRetInt;
+
+  def int_AMDIL_cmov_logical  : GCCBuiltin<"__amdil_cmov_logical">,
+          TernaryIntInt;
+  def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat;
+  def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
+
+  def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
+          TernaryIntInt;
+  def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
+          TernaryIntInt;
+  def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
+          UnaryIntInt;
+  def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
+          UnaryIntInt;
+  def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
+          UnaryIntInt;
+  def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
+          UnaryIntInt;
+  def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
+          UnaryIntInt;
+  def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
+                    TernaryIntInt;
+  def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
+                    TernaryIntInt;
+  def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
+                    QuaternaryIntInt;
+  def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
+      TernaryIntInt;
+  def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
+      BinaryIntInt;
+  def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
+          TernaryIntInt;
+  def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
+          TernaryIntInt;
+  def int_AMDIL_mad     : GCCBuiltin<"__amdil_mad">,
+          TernaryIntFloat;
+  def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
+          BinaryIntInt;
+  def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
+          BinaryIntInt;
+  def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
+          BinaryIntInt;
+  def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
+          BinaryIntInt;
+  def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
+          BinaryIntInt;
+  def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
+          BinaryIntInt;
+  def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
+          TernaryIntInt;
+  def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
+          TernaryIntInt;
+  def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
+          BinaryIntInt;
+  def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
+          BinaryIntInt;
+  def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
+          BinaryIntInt;
+  def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
+          BinaryIntInt;
+  def int_AMDIL_min     : GCCBuiltin<"__amdil_min">,
+          BinaryIntFloat;
+  def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
+          BinaryIntInt;
+  def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
+          BinaryIntInt;
+  def int_AMDIL_max     : GCCBuiltin<"__amdil_max">,
+          BinaryIntFloat;
+  def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
+          TernaryIntInt;
+  def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
+          TernaryIntInt;
+  def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
+          TernaryIntInt;
+  def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
+          UnaryIntFloat;
+  def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
+          TernaryIntFloat;
+  def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
+          UnaryIntFloat;
+  def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
+          UnaryIntFloat;
+  def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
+          UnaryIntFloat;
+  def int_AMDIL_round_posinf : GCCBuiltin<"__amdil_round_posinf">,
+          UnaryIntFloat;
+  def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
+          UnaryIntFloat;
+  def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
+          UnaryIntFloat;
+  def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
+          UnaryIntFloat;
+  def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
+          UnaryIntFloat;
+  def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
+          UnaryIntFloat;
+  def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
+          UnaryIntFloat;
+  def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
+          UnaryIntFloat;
+  def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_ftz : GCCBuiltin<"__amdil_ftz">,
+      UnaryIntFloat;
+  def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
+  def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
+  def int_AMDIL_div_precise : GCCBuiltin<"__amdil_div_precise">, BinaryIntFloat;
+  def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+  def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
+          UnaryIntFloat;
+  def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
+          UnaryIntFloat;
+  def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
+          UnaryIntFloat;
+  def int_AMDIL_log : GCCBuiltin<"__amdil_log">,
+          UnaryIntFloat;
+  def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
+          UnaryIntFloat;
+  def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
+          UnaryIntFloat;
+  def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
+          TernaryIntFloat;
+  def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
+          UnaryIntFloat;
+  def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
+          UnaryIntFloat;
+  def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
+          TernaryIntFloat;
+  def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+      Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+           llvm_v4i32_ty, llvm_i32_ty], []>;
+
+  def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
+    Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+  def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+  def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
+      ConvertIntITOF;
+  def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
+      ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+      Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+  def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
+      ConvertIntITOF;
+  def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
+      ConvertIntITOF;
+  def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
+      ConvertIntITOF;
+  def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
+      ConvertIntITOF;
+  def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
+        Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+          llvm_v2f32_ty, llvm_float_ty], []>;
+  def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
+        Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+          llvm_v2f32_ty], []>;
+  def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
+        Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+          llvm_v4f32_ty], []>;
+  def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
+        Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+          llvm_v4f32_ty], []>;
+//===---------------------- Image functions begin ------------------------===//
+  def int_AMDIL_image1d_write : GCCBuiltin<"__amdil_image1d_write">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_read_norm  : GCCBuiltin<"__amdil_image1d_read_norm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_read_unnorm  : GCCBuiltin<"__amdil_image1d_read_unnorm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_info0 : GCCBuiltin<"__amdil_image1d_info0">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+  def int_AMDIL_image1d_info1 : GCCBuiltin<"__amdil_image1d_info1">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_array_read_norm  : GCCBuiltin<"__amdil_image1d_array_read_norm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_array_read_unnorm  : GCCBuiltin<"__amdil_image1d_array_read_unnorm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_array_info0 : GCCBuiltin<"__amdil_image1d_array_info0">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+  def int_AMDIL_image1d_array_info1 : GCCBuiltin<"__amdil_image1d_array_info1">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image1d_buffer_write : GCCBuiltin<"__amdil_image1d_buffer_write">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_buffer_load : GCCBuiltin<"__amdil_image1d_buffer_load">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_buffer_read_norm  : GCCBuiltin<"__amdil_image1d_buffer_read_norm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_buffer_read_unnorm  : GCCBuiltin<"__amdil_image1d_buffer_read_unnorm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image1d_buffer_info0 : GCCBuiltin<"__amdil_image1d_buffer_info0">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+  def int_AMDIL_image1d_buffer_info1 : GCCBuiltin<"__amdil_image1d_buffer_info1">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image2d_write : GCCBuiltin<"__amdil_image2d_write">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image2d_read_norm  : GCCBuiltin<"__amdil_image2d_read_norm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image2d_read_unnorm  : GCCBuiltin<"__amdil_image2d_read_unnorm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image2d_info0 : GCCBuiltin<"__amdil_image2d_info0">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+  def int_AMDIL_image2d_info1 : GCCBuiltin<"__amdil_image2d_info1">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+ def int_AMDIL_image2d_array_write : GCCBuiltin<"__amdil_image2d_array_write">,
+      Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image2d_array_read_norm  : GCCBuiltin<"__amdil_image2d_array_read_norm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image2d_array_read_unnorm  : GCCBuiltin<"__amdil_image2d_array_read_unnorm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image2d_array_info0 : GCCBuiltin<"__amdil_image2d_array_info0">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+  def int_AMDIL_image2d_array_info1 : GCCBuiltin<"__amdil_image2d_array_info1">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+  def int_AMDIL_image3d_write : GCCBuiltin<"__amdil_image3d_write">,
+         Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image3d_read_norm  : GCCBuiltin<"__amdil_image3d_read_norm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image3d_read_unnorm  : GCCBuiltin<"__amdil_image3d_read_unnorm">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_image3d_info0 : GCCBuiltin<"__amdil_image3d_info0">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+  def int_AMDIL_image3d_info1 : GCCBuiltin<"__amdil_image3d_info1">,
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
+
+//===---------------------- Image functions end --------------------------===//
+
+  def int_AMDIL_append_alloc_i32 : GCCBuiltin<"__amdil_append_alloc">,
+      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+  def int_AMDIL_append_consume_i32 : GCCBuiltin<"__amdil_append_consume">,
+      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
+
+  def int_AMDIL_get_global_id : GCCBuiltin<"__amdil_get_global_id_int">,
+      Intrinsic<[llvm_v4i32_ty], [], []>;
+  def int_AMDIL_get_local_id : GCCBuiltin<"__amdil_get_local_id_int">,
+      Intrinsic<[llvm_v4i32_ty], [], []>;
+  def int_AMDIL_get_group_id : GCCBuiltin<"__amdil_get_group_id_int">,
+      Intrinsic<[llvm_v4i32_ty], [], []>;
+  def int_AMDIL_get_num_groups : GCCBuiltin<"__amdil_get_num_groups_int">,
+      Intrinsic<[llvm_v4i32_ty], [], []>;
+  def int_AMDIL_get_local_size : GCCBuiltin<"__amdil_get_local_size_int">,
+      Intrinsic<[llvm_v4i32_ty], [], []>;
+  def int_AMDIL_get_global_size : GCCBuiltin<"__amdil_get_global_size_int">,
+      Intrinsic<[llvm_v4i32_ty], [], []>;
+  def int_AMDIL_get_global_offset : GCCBuiltin<"__amdil_get_global_offset_int">,
+      Intrinsic<[llvm_v4i32_ty], [], []>;
+  def int_AMDIL_get_work_dim : GCCBuiltin<"get_work_dim">,
+      Intrinsic<[llvm_i32_ty], [], []>;
+  def int_AMDIL_get_printf_offset : GCCBuiltin<"__amdil_get_printf_offset">,
+      Intrinsic<[llvm_anyint_ty], []>;
+  def int_AMDIL_get_printf_size : GCCBuiltin<"__amdil_get_printf_size">,
+      Intrinsic<[llvm_anyint_ty], []>;
+
+/// Intrinsics for atomic instructions with no return value
+/// Signed 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gi32_noret : GCCBuiltin<"__atomic_add_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_gi32_noret : GCCBuiltin<"__atomic_sub_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_gi32_noret : GCCBuiltin<"__atomic_rsub_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_gi32_noret : GCCBuiltin<"__atomic_xchg_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_gi32_noret : GCCBuiltin<"__atomic_inc_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_gi32_noret : GCCBuiltin<"__atomic_dec_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_gi32_noret : GCCBuiltin<"__atomic_cmpxchg_gi32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_gi32_noret : GCCBuiltin<"__atomic_min_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_gi32_noret : GCCBuiltin<"__atomic_max_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_gi32_noret : GCCBuiltin<"__atomic_and_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_gi32_noret : GCCBuiltin<"__atomic_or_gi32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_gi32_noret : GCCBuiltin<"__atomic_xor_gi32_noret">,
+    BinaryAtomicIntNoRet;
+
+
+
+/// Unsigned 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gu32_noret : GCCBuiltin<"__atomic_add_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_gu32_noret : GCCBuiltin<"__atomic_sub_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_gu32_noret : GCCBuiltin<"__atomic_rsub_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_gu32_noret : GCCBuiltin<"__atomic_xchg_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_gu32_noret : GCCBuiltin<"__atomic_inc_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_gu32_noret : GCCBuiltin<"__atomic_dec_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_gu32_noret : GCCBuiltin<"__atomic_cmpxchg_gu32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_gu32_noret : GCCBuiltin<"__atomic_min_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_gu32_noret : GCCBuiltin<"__atomic_max_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_gu32_noret : GCCBuiltin<"__atomic_and_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_gu32_noret : GCCBuiltin<"__atomic_or_gu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_gu32_noret : GCCBuiltin<"__atomic_xor_gu32_noret">,
+    BinaryAtomicIntNoRet;
+
+
+/// Intrinsics for atomic instructions with a return value
+/// Signed 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gi32 : GCCBuiltin<"__atomic_add_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_sub_gi32 : GCCBuiltin<"__atomic_sub_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_gi32 : GCCBuiltin<"__atomic_rsub_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_gi32 : GCCBuiltin<"__atomic_xchg_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_inc_gi32 : GCCBuiltin<"__atomic_inc_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_dec_gi32 : GCCBuiltin<"__atomic_dec_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_gi32 : GCCBuiltin<"__atomic_cmpxchg_gi32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_min_gi32 : GCCBuiltin<"__atomic_min_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_max_gi32 : GCCBuiltin<"__atomic_max_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_and_gi32 : GCCBuiltin<"__atomic_and_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_or_gi32 : GCCBuiltin<"__atomic_or_gi32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xor_gi32 : GCCBuiltin<"__atomic_xor_gi32">,
+    BinaryAtomicInt;
+
+/// 32 bit float atomics required by OpenCL
+def int_AMDIL_atomic_xchg_gf32 : GCCBuiltin<"__atomic_xchg_gf32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_gf32_noret : GCCBuiltin<"__atomic_xchg_gf32_noret">,
+    BinaryAtomicIntNoRet;
+
+/// Unsigned 32 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gu32 : GCCBuiltin<"__atomic_add_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_sub_gu32 : GCCBuiltin<"__atomic_sub_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_gu32 : GCCBuiltin<"__atomic_rsub_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_gu32 : GCCBuiltin<"__atomic_xchg_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_inc_gu32 : GCCBuiltin<"__atomic_inc_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_dec_gu32 : GCCBuiltin<"__atomic_dec_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_gu32 : GCCBuiltin<"__atomic_cmpxchg_gu32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_min_gu32 : GCCBuiltin<"__atomic_min_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_max_gu32 : GCCBuiltin<"__atomic_max_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_and_gu32 : GCCBuiltin<"__atomic_and_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_or_gu32 : GCCBuiltin<"__atomic_or_gu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xor_gu32 : GCCBuiltin<"__atomic_xor_gu32">,
+    BinaryAtomicInt;
+
+
+/// Intrinsics for atomic instructions with no return value
+/// Signed 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_li32_noret : GCCBuiltin<"__atomic_add_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_li32_noret : GCCBuiltin<"__atomic_sub_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_li32_noret : GCCBuiltin<"__atomic_rsub_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_li32_noret : GCCBuiltin<"__atomic_xchg_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_li32_noret : GCCBuiltin<"__atomic_inc_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_li32_noret : GCCBuiltin<"__atomic_dec_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_li32_noret : GCCBuiltin<"__atomic_cmpxchg_li32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_li32_noret : GCCBuiltin<"__atomic_min_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_li32_noret : GCCBuiltin<"__atomic_max_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_li32_noret : GCCBuiltin<"__atomic_and_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_li32_noret : GCCBuiltin<"__atomic_or_li32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_li32_noret : GCCBuiltin<"__atomic_mskor_li32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_li32_noret : GCCBuiltin<"__atomic_xor_li32_noret">,
+    BinaryAtomicIntNoRet;
+
+/// Signed 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ri32_noret : GCCBuiltin<"__atomic_add_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_ri32_noret : GCCBuiltin<"__atomic_sub_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_ri32_noret : GCCBuiltin<"__atomic_rsub_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_ri32_noret : GCCBuiltin<"__atomic_xchg_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_ri32_noret : GCCBuiltin<"__atomic_inc_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_ri32_noret : GCCBuiltin<"__atomic_dec_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_ri32_noret : GCCBuiltin<"__atomic_cmpxchg_ri32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_ri32_noret : GCCBuiltin<"__atomic_min_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_ri32_noret : GCCBuiltin<"__atomic_max_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_ri32_noret : GCCBuiltin<"__atomic_and_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_ri32_noret : GCCBuiltin<"__atomic_or_ri32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_ri32_noret : GCCBuiltin<"__atomic_mskor_ri32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_ri32_noret : GCCBuiltin<"__atomic_xor_ri32_noret">,
+    BinaryAtomicIntNoRet;
+
+
+
+/// Unsigned 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_lu32_noret : GCCBuiltin<"__atomic_add_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_lu32_noret : GCCBuiltin<"__atomic_sub_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_lu32_noret : GCCBuiltin<"__atomic_rsub_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_lu32_noret : GCCBuiltin<"__atomic_xchg_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_lu32_noret : GCCBuiltin<"__atomic_inc_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_lu32_noret : GCCBuiltin<"__atomic_dec_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_lu32_noret : GCCBuiltin<"__atomic_cmpxchg_lu32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_lu32_noret : GCCBuiltin<"__atomic_min_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_lu32_noret : GCCBuiltin<"__atomic_max_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_lu32_noret : GCCBuiltin<"__atomic_and_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_lu32_noret : GCCBuiltin<"__atomic_or_lu32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_lu32_noret : GCCBuiltin<"__atomic_mskor_lu32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_lu32_noret : GCCBuiltin<"__atomic_xor_lu32_noret">,
+    BinaryAtomicIntNoRet;
+
+/// Unsigned 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ru32_noret : GCCBuiltin<"__atomic_add_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_sub_ru32_noret : GCCBuiltin<"__atomic_sub_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_rsub_ru32_noret : GCCBuiltin<"__atomic_rsub_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_ru32_noret : GCCBuiltin<"__atomic_xchg_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_inc_ru32_noret : GCCBuiltin<"__atomic_inc_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_dec_ru32_noret : GCCBuiltin<"__atomic_dec_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_cmpxchg_ru32_noret : GCCBuiltin<"__atomic_cmpxchg_ru32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_min_ru32_noret : GCCBuiltin<"__atomic_min_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_max_ru32_noret : GCCBuiltin<"__atomic_max_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_and_ru32_noret : GCCBuiltin<"__atomic_and_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_or_ru32_noret : GCCBuiltin<"__atomic_or_ru32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_mskor_ru32_noret : GCCBuiltin<"__atomic_mskor_ru32_noret">,
+    TernaryAtomicIntNoRet;
+def int_AMDIL_atomic_xor_ru32_noret : GCCBuiltin<"__atomic_xor_ru32_noret">,
+    BinaryAtomicIntNoRet;
+
+def int_AMDIL_get_cycle_count : GCCBuiltin<"__amdil_get_cycle_count">,
+    VoidIntLong;
+
+def int_AMDIL_compute_unit_id : GCCBuiltin<"__amdil_compute_unit_id">,
+    VoidIntInt;
+
+def int_AMDIL_wavefront_id : GCCBuiltin<"__amdil_wavefront_id">,
+    VoidIntInt;
+
+
+/// Intrinsics for atomic instructions with a return value
+/// Signed 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_li32 : GCCBuiltin<"__atomic_add_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_sub_li32 : GCCBuiltin<"__atomic_sub_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_li32 : GCCBuiltin<"__atomic_rsub_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_li32 : GCCBuiltin<"__atomic_xchg_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_inc_li32 : GCCBuiltin<"__atomic_inc_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_dec_li32 : GCCBuiltin<"__atomic_dec_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_li32 : GCCBuiltin<"__atomic_cmpxchg_li32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_min_li32 : GCCBuiltin<"__atomic_min_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_max_li32 : GCCBuiltin<"__atomic_max_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_and_li32 : GCCBuiltin<"__atomic_and_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_or_li32 : GCCBuiltin<"__atomic_or_li32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_li32 : GCCBuiltin<"__atomic_mskor_li32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_xor_li32 : GCCBuiltin<"__atomic_xor_li32">,
+    BinaryAtomicInt;
+
+/// Signed 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ri32 : GCCBuiltin<"__atomic_add_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_sub_ri32 : GCCBuiltin<"__atomic_sub_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_ri32 : GCCBuiltin<"__atomic_rsub_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_ri32 : GCCBuiltin<"__atomic_xchg_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_inc_ri32 : GCCBuiltin<"__atomic_inc_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_dec_ri32 : GCCBuiltin<"__atomic_dec_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_ri32 : GCCBuiltin<"__atomic_cmpxchg_ri32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_min_ri32 : GCCBuiltin<"__atomic_min_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_max_ri32 : GCCBuiltin<"__atomic_max_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_and_ri32 : GCCBuiltin<"__atomic_and_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_or_ri32 : GCCBuiltin<"__atomic_or_ri32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_ri32 : GCCBuiltin<"__atomic_mskor_ri32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_xor_ri32 : GCCBuiltin<"__atomic_xor_ri32">,
+    BinaryAtomicInt;
+
+/// 32 bit float atomics required by OpenCL
+def int_AMDIL_atomic_xchg_lf32 : GCCBuiltin<"__atomic_xchg_lf32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_lf32_noret : GCCBuiltin<"__atomic_xchg_lf32_noret">,
+    BinaryAtomicIntNoRet;
+def int_AMDIL_atomic_xchg_rf32 : GCCBuiltin<"__atomic_xchg_rf32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_rf32_noret : GCCBuiltin<"__atomic_xchg_rf32_noret">,
+    BinaryAtomicIntNoRet;
+
+/// Unsigned 32 bit integer atomics for local address space
+def int_AMDIL_atomic_add_lu32 : GCCBuiltin<"__atomic_add_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_sub_lu32 : GCCBuiltin<"__atomic_sub_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_lu32 : GCCBuiltin<"__atomic_rsub_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_lu32 : GCCBuiltin<"__atomic_xchg_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_inc_lu32 : GCCBuiltin<"__atomic_inc_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_dec_lu32 : GCCBuiltin<"__atomic_dec_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_lu32 : GCCBuiltin<"__atomic_cmpxchg_lu32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_min_lu32 : GCCBuiltin<"__atomic_min_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_max_lu32 : GCCBuiltin<"__atomic_max_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_and_lu32 : GCCBuiltin<"__atomic_and_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_or_lu32 : GCCBuiltin<"__atomic_or_lu32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_lu32 : GCCBuiltin<"__atomic_mskor_lu32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_xor_lu32 : GCCBuiltin<"__atomic_xor_lu32">,
+    BinaryAtomicInt;
+
+/// Unsigned 32 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ru32 : GCCBuiltin<"__atomic_add_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_sub_ru32 : GCCBuiltin<"__atomic_sub_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_rsub_ru32 : GCCBuiltin<"__atomic_rsub_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_xchg_ru32 : GCCBuiltin<"__atomic_xchg_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_inc_ru32 : GCCBuiltin<"__atomic_inc_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_dec_ru32 : GCCBuiltin<"__atomic_dec_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_cmpxchg_ru32 : GCCBuiltin<"__atomic_cmpxchg_ru32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_min_ru32 : GCCBuiltin<"__atomic_min_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_max_ru32 : GCCBuiltin<"__atomic_max_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_and_ru32 : GCCBuiltin<"__atomic_and_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_or_ru32 : GCCBuiltin<"__atomic_or_ru32">,
+    BinaryAtomicInt;
+def int_AMDIL_atomic_mskor_ru32 : GCCBuiltin<"__atomic_mskor_ru32">,
+    TernaryAtomicInt;
+def int_AMDIL_atomic_xor_ru32 : GCCBuiltin<"__atomic_xor_ru32">,
+    BinaryAtomicInt;
+
+/// Semaphore signal/wait/init
+def int_AMDIL_semaphore_init : GCCBuiltin<"__amdil_semaphore_init">,
+    UnaryAtomicIntNoRet;
+def int_AMDIL_semaphore_wait : GCCBuiltin<"__amdil_semaphore_wait">,
+    VoidAtomicIntNoRet;
+def int_AMDIL_semaphore_signal : GCCBuiltin<"__amdil_semaphore_signal">,
+    VoidAtomicIntNoRet;
+def int_AMDIL_semaphore_size   : GCCBuiltin<"__amdil_max_semaphore_size">,
+    VoidIntInt;
+
+/// Intrinsics for atomic instructions with no return value
+/// Signed 64 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gi64_noret : GCCBuiltin<"__atomic_add_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_sub_gi64_noret : GCCBuiltin<"__atomic_sub_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_rsub_gi64_noret : GCCBuiltin<"__atomic_rsub_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xchg_gi64_noret : GCCBuiltin<"__atomic_xchg_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_inc_gi64_noret : GCCBuiltin<"__atomic_inc_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_dec_gi64_noret : GCCBuiltin<"__atomic_dec_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_cmpxchg_gi64_noret : GCCBuiltin<"__atomic_cmpxchg_gi64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_min_gi64_noret : GCCBuiltin<"__atomic_min_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_max_gi64_noret : GCCBuiltin<"__atomic_max_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_and_gi64_noret : GCCBuiltin<"__atomic_and_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_or_gi64_noret : GCCBuiltin<"__atomic_or_gi64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xor_gi64_noret : GCCBuiltin<"__atomic_xor_gi64_noret">,
+    BinaryAtomicLongNoRet;
+
+
+
+/// Unsigned 64 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gu64_noret : GCCBuiltin<"__atomic_add_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_sub_gu64_noret : GCCBuiltin<"__atomic_sub_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_rsub_gu64_noret : GCCBuiltin<"__atomic_rsub_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xchg_gu64_noret : GCCBuiltin<"__atomic_xchg_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_inc_gu64_noret : GCCBuiltin<"__atomic_inc_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_dec_gu64_noret : GCCBuiltin<"__atomic_dec_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_cmpxchg_gu64_noret : GCCBuiltin<"__atomic_cmpxchg_gu64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_min_gu64_noret : GCCBuiltin<"__atomic_min_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_max_gu64_noret : GCCBuiltin<"__atomic_max_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_and_gu64_noret : GCCBuiltin<"__atomic_and_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_or_gu64_noret : GCCBuiltin<"__atomic_or_gu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xor_gu64_noret : GCCBuiltin<"__atomic_xor_gu64_noret">,
+    BinaryAtomicLongNoRet;
+
+
+/// Intrinsics for atomic instructions with a return value
+/// Signed 64 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gi64 : GCCBuiltin<"__atomic_add_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_sub_gi64 : GCCBuiltin<"__atomic_sub_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_rsub_gi64 : GCCBuiltin<"__atomic_rsub_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xchg_gi64 : GCCBuiltin<"__atomic_xchg_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_inc_gi64 : GCCBuiltin<"__atomic_inc_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_dec_gi64 : GCCBuiltin<"__atomic_dec_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_cmpxchg_gi64 : GCCBuiltin<"__atomic_cmpxchg_gi64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_min_gi64 : GCCBuiltin<"__atomic_min_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_max_gi64 : GCCBuiltin<"__atomic_max_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_and_gi64 : GCCBuiltin<"__atomic_and_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_or_gi64 : GCCBuiltin<"__atomic_or_gi64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xor_gi64 : GCCBuiltin<"__atomic_xor_gi64">,
+    BinaryAtomicLong;
+
+/// Unsigned 64 bit integer atomics for global address space
+def int_AMDIL_atomic_add_gu64 : GCCBuiltin<"__atomic_add_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_sub_gu64 : GCCBuiltin<"__atomic_sub_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_rsub_gu64 : GCCBuiltin<"__atomic_rsub_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xchg_gu64 : GCCBuiltin<"__atomic_xchg_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_inc_gu64 : GCCBuiltin<"__atomic_inc_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_dec_gu64 : GCCBuiltin<"__atomic_dec_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_cmpxchg_gu64 : GCCBuiltin<"__atomic_cmpxchg_gu64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_min_gu64 : GCCBuiltin<"__atomic_min_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_max_gu64 : GCCBuiltin<"__atomic_max_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_and_gu64 : GCCBuiltin<"__atomic_and_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_or_gu64 : GCCBuiltin<"__atomic_or_gu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xor_gu64 : GCCBuiltin<"__atomic_xor_gu64">,
+    BinaryAtomicLong;
+
+/// Intrinsics for atomic instructions with no return value
+/// Signed 64 bit integer atomics for local address space
+def int_AMDIL_atomic_add_li64_noret : GCCBuiltin<"__atomic_add_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_sub_li64_noret : GCCBuiltin<"__atomic_sub_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_rsub_li64_noret : GCCBuiltin<"__atomic_rsub_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xchg_li64_noret : GCCBuiltin<"__atomic_xchg_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_inc_li64_noret : GCCBuiltin<"__atomic_inc_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_dec_li64_noret : GCCBuiltin<"__atomic_dec_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_cmpxchg_li64_noret : GCCBuiltin<"__atomic_cmpxchg_li64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_min_li64_noret : GCCBuiltin<"__atomic_min_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_max_li64_noret : GCCBuiltin<"__atomic_max_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_and_li64_noret : GCCBuiltin<"__atomic_and_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_or_li64_noret : GCCBuiltin<"__atomic_or_li64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_mskor_li64_noret : GCCBuiltin<"__atomic_mskor_li64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_xor_li64_noret : GCCBuiltin<"__atomic_xor_li64_noret">,
+    BinaryAtomicLongNoRet;
+
+/// Signed 64 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ri64_noret : GCCBuiltin<"__atomic_add_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_sub_ri64_noret : GCCBuiltin<"__atomic_sub_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_rsub_ri64_noret : GCCBuiltin<"__atomic_rsub_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xchg_ri64_noret : GCCBuiltin<"__atomic_xchg_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_inc_ri64_noret : GCCBuiltin<"__atomic_inc_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_dec_ri64_noret : GCCBuiltin<"__atomic_dec_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_cmpxchg_ri64_noret : GCCBuiltin<"__atomic_cmpxchg_ri64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_min_ri64_noret : GCCBuiltin<"__atomic_min_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_max_ri64_noret : GCCBuiltin<"__atomic_max_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_and_ri64_noret : GCCBuiltin<"__atomic_and_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_or_ri64_noret : GCCBuiltin<"__atomic_or_ri64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_mskor_ri64_noret : GCCBuiltin<"__atomic_mskor_ri64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_xor_ri64_noret : GCCBuiltin<"__atomic_xor_ri64_noret">,
+    BinaryAtomicLongNoRet;
+
+
+
+/// Unsigned 64 bit integer atomics for local address space
+def int_AMDIL_atomic_add_lu64_noret : GCCBuiltin<"__atomic_add_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_sub_lu64_noret : GCCBuiltin<"__atomic_sub_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_rsub_lu64_noret : GCCBuiltin<"__atomic_rsub_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xchg_lu64_noret : GCCBuiltin<"__atomic_xchg_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_inc_lu64_noret : GCCBuiltin<"__atomic_inc_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_dec_lu64_noret : GCCBuiltin<"__atomic_dec_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_cmpxchg_lu64_noret : GCCBuiltin<"__atomic_cmpxchg_lu64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_min_lu64_noret : GCCBuiltin<"__atomic_min_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_max_lu64_noret : GCCBuiltin<"__atomic_max_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_and_lu64_noret : GCCBuiltin<"__atomic_and_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_or_lu64_noret : GCCBuiltin<"__atomic_or_lu64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_mskor_lu64_noret : GCCBuiltin<"__atomic_mskor_lu64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_xor_lu64_noret : GCCBuiltin<"__atomic_xor_lu64_noret">,
+    BinaryAtomicLongNoRet;
+
+/// Unsigned 64 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ru64_noret : GCCBuiltin<"__atomic_add_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_sub_ru64_noret : GCCBuiltin<"__atomic_sub_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_rsub_ru64_noret : GCCBuiltin<"__atomic_rsub_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_xchg_ru64_noret : GCCBuiltin<"__atomic_xchg_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_inc_ru64_noret : GCCBuiltin<"__atomic_inc_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_dec_ru64_noret : GCCBuiltin<"__atomic_dec_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_cmpxchg_ru64_noret : GCCBuiltin<"__atomic_cmpxchg_ru64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_min_ru64_noret : GCCBuiltin<"__atomic_min_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_max_ru64_noret : GCCBuiltin<"__atomic_max_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_and_ru64_noret : GCCBuiltin<"__atomic_and_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_or_ru64_noret : GCCBuiltin<"__atomic_or_ru64_noret">,
+    BinaryAtomicLongNoRet;
+def int_AMDIL_atomic_mskor_ru64_noret : GCCBuiltin<"__atomic_mskor_ru64_noret">,
+    TernaryAtomicLongNoRet;
+def int_AMDIL_atomic_xor_ru64_noret : GCCBuiltin<"__atomic_xor_ru64_noret">,
+    BinaryAtomicLongNoRet;
+
+/// Intrinsics for atomic instructions with a return value
+/// Signed 64 bit integer atomics for local address space
+def int_AMDIL_atomic_add_li64 : GCCBuiltin<"__atomic_add_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_sub_li64 : GCCBuiltin<"__atomic_sub_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_rsub_li64 : GCCBuiltin<"__atomic_rsub_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xchg_li64 : GCCBuiltin<"__atomic_xchg_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_inc_li64 : GCCBuiltin<"__atomic_inc_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_dec_li64 : GCCBuiltin<"__atomic_dec_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_cmpxchg_li64 : GCCBuiltin<"__atomic_cmpxchg_li64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_min_li64 : GCCBuiltin<"__atomic_min_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_max_li64 : GCCBuiltin<"__atomic_max_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_and_li64 : GCCBuiltin<"__atomic_and_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_or_li64 : GCCBuiltin<"__atomic_or_li64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_mskor_li64 : GCCBuiltin<"__atomic_mskor_li64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_xor_li64 : GCCBuiltin<"__atomic_xor_li64">,
+    BinaryAtomicLong;
+
+/// Signed 64 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ri64 : GCCBuiltin<"__atomic_add_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_sub_ri64 : GCCBuiltin<"__atomic_sub_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_rsub_ri64 : GCCBuiltin<"__atomic_rsub_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xchg_ri64 : GCCBuiltin<"__atomic_xchg_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_inc_ri64 : GCCBuiltin<"__atomic_inc_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_dec_ri64 : GCCBuiltin<"__atomic_dec_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_cmpxchg_ri64 : GCCBuiltin<"__atomic_cmpxchg_ri64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_min_ri64 : GCCBuiltin<"__atomic_min_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_max_ri64 : GCCBuiltin<"__atomic_max_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_and_ri64 : GCCBuiltin<"__atomic_and_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_or_ri64 : GCCBuiltin<"__atomic_or_ri64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_mskor_ri64 : GCCBuiltin<"__atomic_mskor_ri64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_xor_ri64 : GCCBuiltin<"__atomic_xor_ri64">,
+    BinaryAtomicLong;
+
+/// Unsigned 64 bit integer atomics for local address space
+def int_AMDIL_atomic_add_lu64 : GCCBuiltin<"__atomic_add_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_sub_lu64 : GCCBuiltin<"__atomic_sub_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_rsub_lu64 : GCCBuiltin<"__atomic_rsub_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xchg_lu64 : GCCBuiltin<"__atomic_xchg_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_inc_lu64 : GCCBuiltin<"__atomic_inc_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_dec_lu64 : GCCBuiltin<"__atomic_dec_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_cmpxchg_lu64 : GCCBuiltin<"__atomic_cmpxchg_lu64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_min_lu64 : GCCBuiltin<"__atomic_min_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_max_lu64 : GCCBuiltin<"__atomic_max_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_and_lu64 : GCCBuiltin<"__atomic_and_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_or_lu64 : GCCBuiltin<"__atomic_or_lu64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_mskor_lu64 : GCCBuiltin<"__atomic_mskor_lu64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_xor_lu64 : GCCBuiltin<"__atomic_xor_lu64">,
+    BinaryAtomicLong;
+
+/// Unsigned 64 bit integer atomics for region address space
+def int_AMDIL_atomic_add_ru64 : GCCBuiltin<"__atomic_add_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_sub_ru64 : GCCBuiltin<"__atomic_sub_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_rsub_ru64 : GCCBuiltin<"__atomic_rsub_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_xchg_ru64 : GCCBuiltin<"__atomic_xchg_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_inc_ru64 : GCCBuiltin<"__atomic_inc_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_dec_ru64 : GCCBuiltin<"__atomic_dec_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_cmpxchg_ru64 : GCCBuiltin<"__atomic_cmpxchg_ru64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_min_ru64 : GCCBuiltin<"__atomic_min_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_max_ru64 : GCCBuiltin<"__atomic_max_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_and_ru64 : GCCBuiltin<"__atomic_and_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_or_ru64 : GCCBuiltin<"__atomic_or_ru64">,
+    BinaryAtomicLong;
+def int_AMDIL_atomic_mskor_ru64 : GCCBuiltin<"__atomic_mskor_ru64">,
+    TernaryAtomicLong;
+def int_AMDIL_atomic_xor_ru64 : GCCBuiltin<"__atomic_xor_ru64">,
+    BinaryAtomicLong;
+}
+let TargetPrefix="AMDIL", isTarget = 1 in {
+def int_AMDIL_min3_i32 : GCCBuiltin<"__amdil_imin3">,
+    TernaryIntInt;
+def int_AMDIL_min3_u32 : GCCBuiltin<"__amdil_umin3">,
+    TernaryIntInt;
+def int_AMDIL_min3     : GCCBuiltin<"__amdil_min3">,
+    TernaryIntFloat;
+def int_AMDIL_max3_i32 : GCCBuiltin<"__amdil_imax3">,
+    TernaryIntInt;
+def int_AMDIL_max3_u32 : GCCBuiltin<"__amdil_umax3">,
+    TernaryIntInt;
+def int_AMDIL_max3     : GCCBuiltin<"__amdil_max3">,
+    TernaryIntFloat;
+def int_AMDIL_med3_i32 : GCCBuiltin<"__amdil_imed3">,
+    TernaryIntInt;
+def int_AMDIL_med3_u32 : GCCBuiltin<"__amdil_umed3">,
+    TernaryIntInt;
+def int_AMDIL_med3     : GCCBuiltin<"__amdil_med3">,
+    TernaryIntFloat;
+  def int_AMDIL_class : GCCBuiltin<"__amdil_class">,
+      Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+  def int_AMDIL_trig_preop_f64 : GCCBuiltin<"__amdil_trig_preop">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_float_ty], []>;
+
+def int_AMDIL_frexp_exp : GCCBuiltin<"__amdil_frexp_exp">,
+    ConvertIntFTOI;
+def int_AMDIL_frexp_mant : GCCBuiltin<"__amdil_frexp_mant">,
+    UnaryIntFloat;
+def int_AMDIL_media_sad16 : GCCBuiltin<"__amdil_sad16">,
+    TernaryIntInt;
+def int_AMDIL_media_sad32 : GCCBuiltin<"__amdil_sad32">,
+    TernaryIntInt;
+
+def int_AMDIL_media_msad     : GCCBuiltin<"__amdil_msad">,
+    TernaryIntInt;
+def int_AMDIL_media_qsad     : GCCBuiltin<"__amdil_qsad">,
+    Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty]>;
+def int_AMDIL_media_mqsad     : GCCBuiltin<"__amdil_mqsad">,
+    Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty]>;
+/// Rounding and flushing control intrinsics
+/// Floating point addition - round to even
+def int_AMDIL_add_rte       : GCCBuiltin<"__amdil_add_rte">, 
+    BinaryIntFloat;
+/// Floating point addition - round to positive infinity
+def int_AMDIL_add_rtp       : GCCBuiltin<"__amdil_add_rtp">, 
+    BinaryIntFloat;
+/// Floating point addition - round to negative infinity
+def int_AMDIL_add_rtn       : GCCBuiltin<"__amdil_add_rtn">, 
+    BinaryIntFloat;
+/// Floating point addition - round to zero
+def int_AMDIL_add_rtz       : GCCBuiltin<"__amdil_add_rtz">, 
+    BinaryIntFloat;
+/// Floating point subtraction - round to even
+def int_AMDIL_sub_rte       : GCCBuiltin<"__amdil_sub_rte">, 
+    BinaryIntFloat;
+/// Floating point subtraction - round to positive infinity
+def int_AMDIL_sub_rtp       : GCCBuiltin<"__amdil_sub_rtp">, 
+    BinaryIntFloat;
+/// Floating point subtraction - round to negative infinity
+def int_AMDIL_sub_rtn       : GCCBuiltin<"__amdil_sub_rtn">, 
+    BinaryIntFloat;
+/// Floating point subtraction - round to zero
+def int_AMDIL_sub_rtz       : GCCBuiltin<"__amdil_sub_rtz">, 
+    BinaryIntFloat;
+/// Floating point multiplication - round to even
+def int_AMDIL_mul_rte       : GCCBuiltin<"__amdil_mul_rte">, 
+    BinaryIntFloat;
+/// Floating point multiplication - round to positive infinity
+def int_AMDIL_mul_rtp       : GCCBuiltin<"__amdil_mul_rtp">, 
+    BinaryIntFloat;
+/// Floating point multiplication - round to negative infinity
+def int_AMDIL_mul_rtn       : GCCBuiltin<"__amdil_mul_rtn">, 
+    BinaryIntFloat;
+/// Floating point multiplication - round to zero
+def int_AMDIL_mul_rtz       : GCCBuiltin<"__amdil_mul_rtz">, 
+    BinaryIntFloat;
+/// Floating point mad - round to even
+def int_AMDIL_mad_rte       : GCCBuiltin<"__amdil_mad_rte">, 
+    TernaryIntFloat;
+/// Floating point mad - round to positive infinity
+def int_AMDIL_mad_rtp       : GCCBuiltin<"__amdil_mad_rtp">, 
+    TernaryIntFloat;
+/// Floating point mad - round to negative infinity
+def int_AMDIL_mad_rtn       : GCCBuiltin<"__amdil_mad_rtn">, 
+    TernaryIntFloat;
+/// Floating point mad - round to zero
+def int_AMDIL_mad_rtz       : GCCBuiltin<"__amdil_mad_rtz">, 
+    TernaryIntFloat;
+/// Floating point fused multiply-fma - round to even
+def int_AMDIL_fma_rte       : GCCBuiltin<"__amdil_fma_rte">, 
+    TernaryIntFloat;
+/// Floating point fused multiply-fma - round to positive infinity
+def int_AMDIL_fma_rtp       : GCCBuiltin<"__amdil_fma_rtp">, 
+    TernaryIntFloat;
+/// Floating point fused multiply-fma - round to negative infinity
+def int_AMDIL_fma_rtn       : GCCBuiltin<"__amdil_fma_rtn">, 
+    TernaryIntFloat;
+/// Floating point fused multiply-fma - round to zero
+def int_AMDIL_fma_rtz       : GCCBuiltin<"__amdil_fma_rtz">, 
+    TernaryIntFloat;
+
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,105 @@
+//===-- AMDILKernel.h -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of a AMDILKernel object and the various subclasses that are
+// used.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_KERNEL_H_
+#define _AMDIL_KERNEL_H_
+#include "AMDIL.h"
+#include "llvm/Value.h"
+#include "llvm/Constant.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineFunction.h"
+namespace llvm
+{
+class AMDILSubtarget;
+class AMDILTargetMachine;
+/// structure that holds information for a single local/region address array
+typedef struct _AMDILArrayMemRec {
+  uint32_t vecSize; // size of each vector
+  uint32_t offset;  // offset into the memory section
+  uint32_t align;   // alignment
+  bool isHW;        // flag to specify if HW is used or SW is used
+  bool isRegion;    // flag to specify if GDS is used or not
+} AMDILArrayMem;
+
+/// structure that holds information about a constant address
+/// space pointer that is a kernel argument
+typedef struct _AMDILConstPtrRec {
+  const llvm::Value *base;
+  uint32_t size;
+  uint32_t offset;
+  uint32_t align; // alignment
+  uint32_t cbNum; // value of 0 means that it does not use hw CB
+  bool isArray; // flag to specify that this is an array
+  bool isArgument; // flag to specify that this is for a kernel argument
+  bool usesHardware; // flag to specify if hardware CB is used or not
+  std::string name;
+} AMDILConstPtr;
+
+/// Structure that holds information for all local/region address
+/// arrays in the kernel
+typedef struct _AMDILLocalArgRec {
+  llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS> local;
+  std::string name; // Kernel Name
+} AMDILLocalArg;
+
+
+
+/// Structure that holds information for each kernel argument
+typedef struct _AMDILkernelArgRec {
+  uint32_t reqGroupSize[3]; // x,y,z sizes for group.
+  uint32_t reqRegionSize[3]; // x,y,z sizes for region.
+  llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo; // information about argumetns.
+  bool mHasRWG; // true if reqd_work_group_size is specified.
+  bool mHasRWR; // true if reqd_work_region_size is specified.
+} AMDILKernelAttr;
+
+/// Structure that holds information for each kernel
+class AMDILKernel
+{
+public:
+  AMDILKernel(const std::string& name, bool clKernel) :
+    curSize(0),
+    curRSize(0),
+    curHWSize(0),
+    curHWRSize(0),
+    constSize(0),
+    mKernel(clKernel),
+    mName(name),
+    sgv(NULL),
+    lvgv(NULL),
+    rvgv(NULL) {
+    memset(constSizes, 0, sizeof(constSizes));
+  }
+  uint32_t curSize; // Current size of local memory, hardware + software emulated
+  uint32_t curRSize; // Current size of region memory, hardware + software emulated
+  uint32_t curHWSize; // Current size of hardware local memory
+  uint32_t curHWRSize; // Current size of hardware region memory
+  uint32_t constSize; // Current size of software constant memory
+  bool mKernel; // Flag to specify if this represents an OpenCL kernel or not
+  std::string mName; // Name of current kernel
+  AMDILKernelAttr *sgv; // pointer to kernel attributes
+  AMDILLocalArg *lvgv; // pointer to local attributes
+  AMDILLocalArg *rvgv; // pointer to region attributes
+  llvm::SmallVector<struct _AMDILConstPtrRec, DEFAULT_VEC_SLOTS> constPtr; // vector containing constant pointer information
+  uint32_t constSizes[HW_MAX_NUM_CB]; // Size of each constant buffer
+  llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly; // set that specifies the read-only images for the kernel
+  llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly; // set that specifies the write-only images for the kernel
+  llvm::SmallVector<std::pair<uint32_t, const llvm::Constant *>,
+       DEFAULT_VEC_SLOTS> CPOffsets; // Vector of constant pool offsets
+  typedef llvm::SmallVector<struct _AMDILConstPtrRec, DEFAULT_VEC_SLOTS>::iterator constptr_iterator; // iterator for constant pointers
+  typedef llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator arraymem_iterator; // iterator for the memory array
+}; // AMDILKernel
+} // end llvm namespace
+#endif // _AMDIL_KERNEL_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1451 @@
+//===-- AMDILKernelManager.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILKernelManager.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILAsmPrinter.h"
+#include "AMDILDeviceInfo.h"
+#include "AMDILDevices.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILKernel.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdio>
+#include <ostream>
+#include <algorithm>
+#include <string>
+#include <queue>
+#include <list>
+#include <utility>
+using namespace llvm;
+#define NUM_EXTRA_SLOTS_PER_IMAGE 1
+
+void
+printRegName(AMDILAsmPrinter *RegNames, unsigned reg, OSTREAM_TYPE &O, bool dst, bool dupe = false)
+{
+  if (reg >= AMDIL::Rx1 && reg < AMDIL::Rxy1) {
+    O << RegNames->getRegisterName(reg) << ".x,";
+  } else if (reg >= AMDIL::Ry1 && reg < AMDIL::Rz1) {
+    O << RegNames->getRegisterName(reg) << ".y,";
+  } else if (reg >= AMDIL::Rz1 && reg < AMDIL::Rzw1) {
+    O << RegNames->getRegisterName(reg) << ".z,";
+  } else if (reg >= AMDIL::Rw1 && reg < AMDIL::Rx1) {
+    O << RegNames->getRegisterName(reg) << ".w,";
+  } else if (reg >= AMDIL::Rxy1 && reg < AMDIL::Ry1) {
+    O << RegNames->getRegisterName(reg) << ((dst) ? ".xy__," :
+                                            (dupe ? ".xyxy," : ".xy00,"));
+  } else if (reg >= AMDIL::Rzw1 && reg < AMDIL::SDP) {
+    O << RegNames->getRegisterName(reg) << ((dst) ? ".__zw," :
+                                            (dupe ? ".zwzw," : ".00zw,"));
+  } else {
+    O << RegNames->getRegisterName(reg) << ",";
+  }
+}
+const char*
+getFirstComponent(unsigned reg, unsigned fcall)
+{
+  if (reg >= AMDIL::Rx1 && reg < AMDIL::Rxy1) {
+    return ".x";
+  } else if (reg >= AMDIL::Ry1 && reg < AMDIL::Rz1) {
+    return ".x";
+  } else if (reg >= AMDIL::Rz1 && reg < AMDIL::Rzw1) {
+    return ".x";
+  } else if (reg >= AMDIL::Rw1 && reg < AMDIL::Rx1) {
+    return ".x";
+  } else if (reg >= AMDIL::Rxy1 && reg < AMDIL::Ry1) {
+    switch (fcall) {
+    case 1090:
+    case 1091:
+    case 1092:
+      return ".xx";
+    default:
+      return ".xy";
+    };
+  } else if (reg >= AMDIL::Rzw1 && reg < AMDIL::SDP) {
+    switch (fcall) {
+    case 1090:
+    case 1091:
+    case 1092:
+      return ".00xx";
+    default:
+      return ".00xy";
+    };
+  } else {
+    switch (fcall) {
+    case 1090:
+    case 1091:
+      return ".xxxx";
+    case 1092:
+    case 1093:
+      return ".xxyy";
+    default:
+      return ".xyzw";
+    };
+  }
+}
+static bool errorPrint(const char *ptr, OSTREAM_TYPE &O)
+{
+  if (ptr[0] == 'E') {
+    O << ";error:" << ptr << "\n";
+  } else {
+    O << ";warning:" << ptr << "\n";
+  }
+  return false;
+}
+static bool semaPrint(uint32_t val, OSTREAM_TYPE &O)
+{
+  O << "dcl_semaphore_id(" << val << ")\n";
+  return false;
+}
+static bool arenaPrint(uint32_t val, OSTREAM_TYPE &O)
+{
+  if (val >= ARENA_SEGMENT_RESERVED_UAVS) {
+    O << "dcl_arena_uav_id(" << val << ")\n";
+  }
+  return false;
+}
+
+static bool uavPrint(uint32_t val, OSTREAM_TYPE &O)
+{
+  if (val < 8 || val == 11) {
+    O << "dcl_raw_uav_id(" << val << ")\n";
+  }
+  return false;
+}
+
+static bool uavPrintSI(uint32_t val, OSTREAM_TYPE &O)
+{
+  O << "dcl_typeless_uav_id(" << val << ")_stride(4)_length(4)_access(read_write)\n";
+  return false;
+}
+
+static bool
+printfPrint(std::pair<const std::string, PrintfInfo *> &data, OSTREAM_TYPE &O)
+{
+  O << ";printf_fmt:" << data.second->getPrintfID();
+  // Number of operands
+  O << ":" << data.second->getNumOperands();
+  // Size of each operand
+  for (size_t i = 0, e = data.second->getNumOperands(); i < e; ++i) {
+    O << ":" << (data.second->getOperandID(i) >> 3);
+  }
+  const char *ptr = data.first.c_str();
+  uint32_t size = data.first.size() - 1;
+  // The format string size
+  O << ":" << size << ":";
+  for (size_t i = 0; i < size; ++i) {
+    if (ptr[i] == '\r') {
+      O << "\\r";
+    } else if (ptr[i] == '\n') {
+      O << "\\n";
+    } else {
+      O << ptr[i];
+    }
+  }
+  O << ";\n";   // c_str() is cheap way to trim
+  return false;
+}
+
+
+void AMDILKernelManager::updatePtrArg(Function::const_arg_iterator Ip,
+                                      int numWriteImages, int raw_uav_buffer,
+                                      int counter, bool isKernel,
+                                      const Function *F)
+{
+  assert(F && "Cannot pass a NULL Pointer to F!");
+  assert(Ip->getType()->isPointerTy() &&
+         "Argument must be a pointer to be passed into this function!\n");
+  std::string ptrArg(";pointer:");
+  const char *symTab = "NoSymTab";
+  uint32_t ptrID = getUAVID(Ip);
+  PointerType *PT = cast<PointerType>(Ip->getType());
+  uint32_t Align = 4;
+  const char *MemType = "uav";
+  if (PT->getElementType()->isSized()) {
+    Align = mTM->getTargetData()->getTypeAllocSize(PT->getElementType());
+    if ((Align & (Align - 1))) Align = NextPowerOf2(Align);
+  }
+  ptrArg += Ip->getName().str() + ":" + getTypeName(PT, symTab, mMFI,
+            mMFI->isSignedIntType(Ip)) + ":1:1:" +
+            itostr(counter * 16) + ":";
+  if (mSTM->overridesFlatAS()) {
+    MemType = "flat";
+    ptrID = 0;
+  } else {
+    switch (PT->getAddressSpace()) {
+    case AMDILAS::FLAT_ADDRESS:
+      if (!mSTM->device()->isSupported(AMDILDeviceInfo::FlatMem)) {
+        mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_FLAT_SUPPORT]);
+      }
+      MemType = "flat";
+      ptrID = 0;
+      break;
+    case AMDILAS::ADDRESS_NONE:
+      //O << "No Address space qualifier!";
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+      assert(1);
+      break;
+    case AMDILAS::GLOBAL_ADDRESS:
+      if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+        if (ptrID >= ARENA_SEGMENT_RESERVED_UAVS) {
+          ptrID = 8;
+        }
+      }
+      mMFI->uav_insert(ptrID);
+      break;
+    case AMDILAS::CONSTANT_ADDRESS: {
+      if (isKernel && mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+        const AMDILKernel* t = mAMI->getKernel(F->getName());
+        if (mAMI->usesHWConstant(t, Ip->getName())) {
+          MemType = /*(isSI) ? "uc\0" :*/ "hc\0";
+          ptrID = mAMI->getConstPtrCB(t, Ip->getName());
+          mMFI->setUsesConstant();
+        } else {
+          MemType = "c\0";
+          mMFI->uav_insert(ptrID);
+        }
+      } else {
+        MemType = "c\0";
+        mMFI->uav_insert(ptrID);
+      }
+      break;
+    }
+    default:
+    case AMDILAS::PRIVATE_ADDRESS:
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+        MemType = (mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV))
+                  ? "up\0" : "hp\0";
+        mMFI->setUsesScratch();
+      } else {
+        MemType = "p\0";
+        mMFI->uav_insert(ptrID);
+      }
+      break;
+    case AMDILAS::REGION_ADDRESS:
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+        MemType = "hr\0";
+        ptrID = 0;
+        mMFI->setUsesGDS();
+      } else {
+        MemType = "r\0";
+        mMFI->uav_insert(ptrID);
+      }
+      break;
+    case AMDILAS::LOCAL_ADDRESS:
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+        MemType = "hl\0";
+        ptrID = 1;
+        mMFI->setUsesLDS();
+      } else {
+        MemType = "l\0";
+        mMFI->uav_insert(ptrID);
+      }
+      break;
+    };
+  }
+  ptrArg += std::string(MemType) + ":";
+  ptrArg += itostr(ptrID) + ":";
+  ptrArg += itostr(Align) + ":";
+  const Value* ptr = Ip;
+  if (mMFI->read_ptr_count(ptr)) {
+    ptrArg += "RO";
+    // FIXME: add write-only pointer detection.
+    //} else if (mMFI->write_ptr_count(ptr)) {
+    //  ptrArg += "WO";
+  } else {
+    ptrArg += "RW";
+  }
+  ptrArg += (mMFI->isVolatilePointer(Ip)) ? ":1" : ":0";
+  ptrArg += (mMFI->isRestrictPointer(Ip)) ? ":1" : ":0";
+  mMFI->addMetadata(ptrArg, true);
+}
+
+AMDILKernelManager::AMDILKernelManager(AMDILTargetMachine *TM)
+{
+  mTM = TM;
+  mSTM = mTM->getSubtargetImpl();
+  mMFI = NULL;
+  mAMI = NULL;
+  mMF = NULL;
+  clear();
+}
+
+AMDILKernelManager::~AMDILKernelManager()
+{
+  clear();
+}
+
+void
+AMDILKernelManager::setMF(MachineFunction *MF)
+{
+  mMF = MF;
+  mMFI = MF->getInfo<AMDILMachineFunctionInfo>();
+  mAMI = &(MF->getMMI().getObjFileInfo<AMDILModuleInfo>());
+}
+
+void AMDILKernelManager::clear()
+{
+  mUniqueID = 0;
+  mWasKernel = false;
+  mHasImageWrite = false;
+  mHasOutputInst = false;
+}
+
+bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI)
+{
+  return (MI->getOpcode() == AMDIL::RETURN && wasKernel() && !mHasImageWrite
+          && !mHasOutputInst);
+}
+
+void AMDILKernelManager::processArgMetadata(OSTREAM_TYPE &O,
+    uint32_t buf,
+    bool isKernel)
+{
+  const Function *F = mMF->getFunction();
+  const char * symTab = "NoSymTab";
+  Function::const_arg_iterator Ip = F->arg_begin();
+  Function::const_arg_iterator Ep = F->arg_end();
+
+  if (F->hasStructRetAttr()) {
+    assert(Ip != Ep && "Invalid struct return fucntion!");
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+    ++Ip;
+  }
+  uint32_t mCBSize = 0;
+  int raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+  bool MultiUAV = mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV);
+  bool ArenaSegment =
+    mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment);
+  int numWriteImages = mMFI->get_num_write_images();
+  if (numWriteImages == OPENCL_MAX_WRITE_IMAGES || MultiUAV || ArenaSegment) {
+    if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+      raw_uav_buffer = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+    }
+  }
+  uint32_t CounterNum = 0;
+  uint32_t SemaNum = 0;
+  uint32_t ROArg = 0;
+  uint32_t WOArg = 0;
+  uint32_t NumArg = 0;
+  while (Ip != Ep) {
+    Type *cType = Ip->getType();
+    if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) {
+      std::string argMeta(";value:");
+      argMeta += Ip->getName().str() + ":" + getTypeName(cType, symTab, mMFI
+                 , mMFI->isSignedIntType(Ip)) + ":";
+      int bitsize = cType->getPrimitiveSizeInBits();
+      int numEle = 1;
+      if (cType->getTypeID() == Type::VectorTyID) {
+        numEle = cast<VectorType>(cType)->getNumElements();
+      }
+      argMeta += itostr(numEle) + ":1:" + itostr(mCBSize << 4);
+      mMFI->addMetadata(argMeta, true);
+
+      // FIXME: simplify
+      if ((bitsize / numEle) < 32) {
+        bitsize = numEle >> 2;
+      } else {
+        bitsize >>= 7;
+      }
+      if (!bitsize) {
+        bitsize = 1;
+      }
+
+      mCBSize += bitsize;
+    } else if (const PointerType *PT = dyn_cast<PointerType>(cType)) {
+      Type *CT = PT->getElementType();
+      const StructType *ST = dyn_cast<StructType>(CT);
+      if (ST && ST->isOpaque()) {
+        StringRef name = ST->getName();
+        bool i1d  = name.startswith( "struct._image1d_t" );
+        bool i1da = name.startswith( "struct._image1d_array_t" );
+        bool i1db = name.startswith( "struct._image1d_buffer_t" );
+        bool i2d  = name.startswith( "struct._image2d_t" );
+        bool i2da = name.startswith( "struct._image2d_array_t" );
+        bool i3d  = name.startswith( "struct._image3d_t" );
+        bool c32  = name.startswith( "struct._counter32_t" );
+        bool c64  = name.startswith( "struct._counter64_t" );
+        bool sema = name.startswith( "struct._sema_t" );
+        if (i1d || i1da || i1db || i2d | i2da || i3d) {
+          if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+            std::string imageArg(";image:");
+            imageArg += Ip->getName().str() + ":";
+            if (i1d)       imageArg += "1D:";
+            else if (i1da) imageArg += "1DA:";
+            else if (i1db) imageArg += "1DB:";
+            else if (i2d)  imageArg += "2D:";
+            else if (i2da) imageArg += "2DA:";
+            else if (i3d)  imageArg += "3D:";
+
+            if (isKernel) {
+              if (mAMI->isReadOnlyImage (mMF->getFunction()->getName(),
+                                         (ROArg + WOArg))) {
+                imageArg += "RO:" + itostr(ROArg);
+                O << "dcl_resource_id(" << ROArg << ")_type(";
+                if (i1d)       O << "1d";
+                else if (i1da) O << "1darray";
+                else if (i1db) O << "buffer";
+                else if (i2d)  O << "2d";
+                else if (i2da) O << "2darray";
+                else if (i3d)  O << "3d";
+                O << ")_fmtx(unknown)_fmty(unknown)"
+                  << "_fmtz(unknown)_fmtw(unknown)\n";
+                ++ROArg;
+              } else if (mAMI->isWriteOnlyImage(mMF->getFunction()->getName(),
+                                                (ROArg + WOArg))) {
+                uint32_t offset = 0;
+                offset += WOArg;
+                imageArg += "WO:" + itostr(offset & 0x7);
+                O << "dcl_uav_id(" << ((offset) & 0x7) << ")_type(";
+                if (i1d)       O << "1d";
+                else if (i1da) O << "1darray";
+                else if (i1db) O << "buffer";
+                else if (i2d)  O << "2d";
+                else if (i2da) O << "2darray";
+                else if (i3d)  O << "3d";
+                O << ")_fmtx(uint)\n";
+                ++WOArg;
+              } else {
+                imageArg += "RW:" + itostr(ROArg + WOArg);
+              }
+            }
+            imageArg += ":1:" + itostr(mCBSize * 16);
+            mMFI->addMetadata(imageArg, true);
+            mMFI->addi32Literal(mCBSize);
+            mCBSize += NUM_EXTRA_SLOTS_PER_IMAGE + 1;
+          } else {
+            mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+          }
+        } else if (c32 || c64) {
+          std::string counterArg(";counter:");
+          counterArg += Ip->getName().str() + ":"
+                        + itostr(c32 ? 32 : 64) + ":"
+                        + itostr(CounterNum++) + ":1:" + itostr(mCBSize * 16);
+          mMFI->addMetadata(counterArg, true);
+          ++mCBSize;
+        } else if (sema) {
+          std::string semaArg(";sema:");
+          semaArg += Ip->getName().str() + ":" + itostr(SemaNum++)
+                     + ":1:" + itostr(mCBSize * 16);
+          mMFI->addMetadata(semaArg, true);
+          ++mCBSize;
+        } else {
+          updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel,
+                       F);
+          ++mCBSize;
+        }
+      } else if (CT->getTypeID() == Type::StructTyID
+                 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+        const TargetData *td = mTM->getTargetData();
+        const StructLayout *sl = td->getStructLayout(dyn_cast<StructType>(CT));
+        int bytesize = sl->getSizeInBytes();
+        int reservedsize = (bytesize + 15) & ~15;
+        int numSlots = reservedsize >> 4;
+        if (!numSlots) {
+          numSlots = 1;
+        }
+        std::string structArg(";value:");
+        structArg += Ip->getName().str() + ":struct:"
+                     + itostr(bytesize) + ":1:" + itostr(mCBSize * 16);
+        mMFI->addMetadata(structArg, true);
+        mCBSize += numSlots;
+      } else if (CT->isIntOrIntVectorTy()
+                 || CT->isFPOrFPVectorTy()
+                 || CT->getTypeID() == Type::ArrayTyID
+                 || CT->getTypeID() == Type::PointerTyID
+                 || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+        updatePtrArg(Ip, numWriteImages, raw_uav_buffer, mCBSize, isKernel, F);
+        ++mCBSize;
+      } else {
+        assert(0 && "Cannot process current pointer argument");
+        mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+      }
+    } else {
+      assert(0 && "Cannot process current kernel argument");
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+    }
+    if (mMFI->isConstantArgument(Ip)) {
+      std::string constArg(";constarg:");
+      constArg += itostr(NumArg) + ":" + Ip->getName().str();
+      mMFI->addMetadata(constArg, true);
+    }
+    ++NumArg;
+    ++Ip;
+  }
+}
+
+void AMDILKernelManager::printHeader(AMDILAsmPrinter *AsmPrinter,
+                                     OSTREAM_TYPE &O,
+                                     const std::string &name)
+{
+  mName = name;
+  std::string kernelName;
+  kernelName = (mSTM->isApple()) ? "__OpenCL_" + name + "_kernel"
+               :  name;
+  int kernelId = mAMI->getOrCreateFunctionID(kernelName);
+  O << "func " << kernelId << " ; " << kernelName << "\n";
+  if (mSTM->is64bit()) {
+    O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].xy\n";
+  } else {
+    O << "mov " << AsmPrinter->getRegisterName(AMDIL::SDP) << ", cb0[8].x\n";
+  }
+  O << "mov " << AsmPrinter->getRegisterName(AMDIL::SP) << ", l1.0000\n";
+}
+
+void AMDILKernelManager::printGroupSize(OSTREAM_TYPE& O)
+{
+  // The HD4XXX generation of hardware does not support a 3D launch, so we need
+  // to use dcl_num_thread_per_group to specify the launch size. If the launch
+  // size is specified via a kernel attribute, we print it here. Otherwise we
+  // use the the default size.
+  const AMDILKernel *kernel = mAMI->getKernel(mName);
+  if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    if (kernel && kernel->sgv
+        && (kernel->sgv->mHasRWG
+            || !mMFI->usesLDS())) {
+      // if the user has specified what the required workgroup size is then we
+      // need to compile for that size and that size only.  Otherwise we compile
+      // for the max workgroup size that is passed in as an option to the
+      // backend.
+      O << "dcl_num_thread_per_group "
+        << kernel->sgv->reqGroupSize[0] << ", "
+        << kernel->sgv->reqGroupSize[1] << ", "
+        << kernel->sgv->reqGroupSize[2] << "          \n";
+    } else {
+      // If the kernel uses local memory, then the kernel is being
+      // compiled in single wavefront mode. So we have to generate code slightly
+      // different.
+      O << "dcl_num_thread_per_group "
+        << mSTM->device()->getWavefrontSize()
+        << ", 1, 1       \n";
+    }
+  } else {
+    // Otherwise we generate for devices that support 3D launch natively.  If
+    // the reqd_workgroup_size attribute was specified, then we can specify the
+    // exact launch dimensions.
+    if (kernel && kernel->sgv) {
+      if (kernel->sgv->mHasRWG) {
+        O << "dcl_num_thread_per_group "
+          << kernel->sgv->reqGroupSize[0] << ", "
+          << kernel->sgv->reqGroupSize[1] << ", "
+          << kernel->sgv->reqGroupSize[2] << "          \n";
+      } else {
+        // Otherwise we specify the largest workgroup size that can be launched.
+        O << "dcl_max_thread_per_group " <<
+          kernel->sgv->reqGroupSize[0]
+          * kernel->sgv->reqGroupSize[1]
+          * kernel->sgv->reqGroupSize[2] << " \n";
+      }
+    } else {
+      O << "dcl_max_thread_per_group " << mSTM->device()->getWavefrontSize() << "\n";
+    }
+  }
+  // Now that we have specified the workgroup size, lets declare the local
+  // memory size. If we are using hardware and we know the value at compile
+  // time, then we need to declare the correct value. Otherwise we should just
+  // declare the maximum size.
+  if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+    size_t kernelLocalSize = (kernel->curHWSize + 3) & ~3;
+    if (kernelLocalSize > mSTM->device()->getMaxLDSSize()) {
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_LOCAL_RESOURCES]);
+    }
+    // If there is a local pointer as a kernel argument, we don't know the size
+    // at compile time, so we reserve all of the space.
+    if (mMFI->usesLDS() && (mMFI->hasLDSArg() || !kernelLocalSize)) {
+      O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
+        << mSTM->device()->getMaxLDSSize() << "\n";
+      mMFI->setUsesMem(AMDILDevice::LDS_ID);
+    } else if (kernelLocalSize) {
+      // We know the size, so lets declare it correctly.
+      O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
+        << kernelLocalSize << "\n";
+      mMFI->setUsesMem(AMDILDevice::LDS_ID);
+    }
+  }
+  // If the device supports the region memory extension, which maps to our
+  // hardware GDS memory, then lets declare it so we can use it later on.
+  if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
+    size_t kernelGDSSize = (kernel->curHWRSize + 3) & ~3;
+    if (kernelGDSSize > mSTM->device()->getMaxGDSSize()) {
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]);
+    }
+    // If there is a region pointer as a kernel argument, we don't know the size
+    // at compile time, so we reserved all of the space.
+    if (mMFI->usesGDS() && (mMFI->hasGDSArg() || !kernelGDSSize)) {
+      O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
+        ") " << mSTM->device()->getMaxGDSSize() << "\n";
+      mMFI->setUsesMem(AMDILDevice::GDS_ID);
+    } else if (kernelGDSSize) {
+      // We know the size, so lets declare it.
+      O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
+        ") " << kernelGDSSize << "\n";
+      mMFI->setUsesMem(AMDILDevice::GDS_ID);
+    }
+  }
+}
+
+void
+AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O)
+{
+  // If we are a HD4XXX generation device, then we only support a single uav
+  // surface, so we declare it and leave
+  if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    O << "dcl_raw_uav_id("
+      << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+      << ")\n";
+    mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+    getIntrinsicSetup(AsmPrinter, O);
+    return;
+  }
+  // If we are supporting multiple uav's view the MultiUAV capability, then we
+  // need to print out the declarations here. MultiUAV conflicts with write
+  // images, so they only use 8 - NumWriteImages uav's. Therefor only pointers
+  // with ID's < 8 will get printed.
+  if (mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+    binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrint, O);
+    mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+  }
+  // If arena segments are supported, then we should emit them now.  Arena
+  // segments are similiar to MultiUAV, except ArenaSegments are virtual and up
+  // to 1024 of them can coexist. These are more compiler hints for CAL and thus
+  // cannot overlap in any form.  Each ID maps to a seperate piece of memory and
+  // CAL determines whether the load/stores should go to the fast path/slow path
+  // based on the usage and instruction.
+  if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+    binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), arenaPrint, O);
+  }
+
+  if (mMFI->sema_size() && !mSTM->device()->usesHardware(AMDILDeviceInfo::Semaphore)) {
+    mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_SEMAPHORE_SUPPORT]);
+  } else {
+    binaryForEach(mMFI->sema_begin(), mMFI->sema_end(), semaPrint, O);
+  }
+  // Now that we have printed out all of the arena and multi uav declaration,
+  // now we must print out the default raw uav id. This always exists on HD5XXX
+  // and HD6XXX hardware. The reason is that the hardware supports 12 UAV's and
+  // 11 are taken up by MultiUAV/Write Images and Arena.  However, if we do not
+  // have UAV 11 as the raw UAV and there are 8 write images, we must revert
+  // everything to the arena and not print out the default raw uav id.
+  if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD5XXX
+      || mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX) {
+    if ((mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) < 11 &&
+         mMFI->get_num_write_images()
+         != OPENCL_MAX_WRITE_IMAGES
+         && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV))
+        || mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
+      if (!mMFI->usesMem(AMDILDevice::RAW_UAV_ID)
+          && mMFI->uav_count(mSTM->device()->
+                             getResourceID(AMDILDevice::RAW_UAV_ID))) {
+        O << "dcl_raw_uav_id("
+          << mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+        O << ")\n";
+        mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+      }
+    }
+    // If we have not printed out the arena ID yet, then do so here.
+    if (!mMFI->usesMem(AMDILDevice::ARENA_UAV_ID)
+        && mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaUAV)) {
+      O << "dcl_arena_uav_id("
+        << mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID) << ")\n";
+      mMFI->setUsesMem(AMDILDevice::ARENA_UAV_ID);
+    }
+  } else if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX
+             && !mSTM->overridesFlatAS()) {
+    binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), uavPrintSI, O);
+    mMFI->setUsesMem(AMDILDevice::RAW_UAV_ID);
+  }
+  getIntrinsicSetup(AsmPrinter, O);
+}
+
+void AMDILKernelManager::getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter,
+    OSTREAM_TYPE &O)
+{
+  O << "mov r0.__z_, vThreadGrpIdFlat0.x\n"
+    << "mov r1022.xyz0, vTidInGrp0.xyz\n";
+  if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) {
+    O << "mov r1023.xyz0, vThreadGrpId0.xyz\n";
+  } else {
+    O << "imul r0.___w, cb0[2].x, cb0[2].y\n"
+      // Calculates the local id.
+      // Calculates the group id.
+      << "umod r1023.x___, r0.z, cb0[2].x\n"
+      << "udiv r1023._y__, r0.z, cb0[2].x\n"
+      << "umod r1023._y__, r1023.y, cb0[2].y\n"
+      << "udiv r1023.__z_, r0.z, r0.w\n";
+  }
+  // Calculates the global id.
+  const AMDILKernel *kernel = mAMI->getKernel(mName);
+  if (kernel && kernel->sgv && kernel->sgv->mHasRWG) {
+    // Anytime we declare a literal, we need to reserve it, if it is not emitted
+    // in emitLiterals.
+    O << "dcl_literal l9, "
+      << kernel->sgv->reqGroupSize[0] << ", "
+      << kernel->sgv->reqGroupSize[1] << ", "
+      << kernel->sgv->reqGroupSize[2] << ", "
+      << "0xFFFFFFFF\n";
+    O << "imad r1021.xyz0, r1023.xyzz, l9.xyzz, r1022.xyzz\n";
+  } else {
+    O << "dcl_literal l9, "
+      << mSTM->getDefaultSize(0) << ", "
+      << mSTM->getDefaultSize(1) << ", "
+      << mSTM->getDefaultSize(2) << ", "
+      << "0xFFFFFFFF\n";
+    // This umax is added so that on SI or later architectures, the
+    // ISA generator can do value range analysis to determine that cb0[1]
+    // is a positive value or not.
+    if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+      O << "umin r1023.xyz0, r1023.xyzz, l9.w\n";
+      O << "umin r1021.xyz0, cb0[1].xyzz, l2.x\n";
+      O << "imad r1021.xyz0, r1023.xyzz, r1021.xyzz, r1022.xyzz\n";
+    } else {
+      O << "imad r1021.xyz0, r1023.xyzz, cb0[1].xyzz, r1022.xyzz\n";
+    }
+  }
+
+  // These umax's are added so that on SI or later architectures, the
+  // ISA generator can do value range analysis to determine that cb0[1]
+  // is a positive value or not.
+  // Add the global/group offset for multi-launch support.
+  if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+    O << "umin r1024.xyz0, cb0[6].xyzz, l9.w\n"
+      << "iadd r1021.xyz0, r1021.xyz0, r1024.xyz0\n"
+      << "umin r1024.xyz0, cb0[7].xyzz, l9.w\n"
+      << "iadd r1023.xyz0, r1023.xyz0, r1024.xyz0\n";
+  } else {
+    O << "iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0\n"
+      << "iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0\n";
+  }
+  // moves the flat group id.
+  O << "mov r1023.___w, r0.z\n";
+  if (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
+    if (mSTM->is64bit()) {
+      O << "umul " << AsmPrinter->getRegisterName(AMDIL::T2)
+        << ".x0__, r1023.w, cb0[4].z\n"
+        << "i64add " << AsmPrinter->getRegisterName(AMDIL::T2)
+        << ".xy__, " << AsmPrinter->getRegisterName(AMDIL::T2)
+        << ".xyyy, cb0[4].xyyy\n";
+
+    } else {
+      O << "imad " << AsmPrinter->getRegisterName(AMDIL::T2)
+        << ".x___, r1023.w, cb0[4].y, cb0[4].x\n";
+    }
+  }
+  // Shift the flat group id to be in bytes instead of dwords.
+  O << "ishl r1023.___w, r1023.w, l0.z\n";
+  if (mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) {
+    if (mSTM->is64bit()) {
+      O << "umul " << AsmPrinter->getRegisterName(AMDIL::T1)
+        << ".x0__, vAbsTidFlat.x, cb0[3].z\n"
+        << "i64add " << AsmPrinter->getRegisterName(AMDIL::T1)
+        << ".xy__, " << AsmPrinter->getRegisterName(AMDIL::T1)
+        << ".xyyy, cb0[3].xyyy\n";
+
+    } else {
+      O << "imad " << AsmPrinter->getRegisterName(AMDIL::T1)
+        << ".x___, vAbsTidFlat.x, cb0[3].y, cb0[3].x\n";
+    }
+  } else {
+    O << "mov " << AsmPrinter->getRegisterName(AMDIL::T1) << ".x___, l0.0000\n";
+  }
+  if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+    O << "udiv r1024.xyz_, r1021.xyzz, cb0[10].xyzz\n";
+    if (kernel && kernel->sgv && kernel->sgv->mHasRWR) {
+      O << "dcl_literal l10,"
+        << kernel->sgv->reqRegionSize[0] << ", "
+        << kernel->sgv->reqRegionSize[1] << ", "
+        << kernel->sgv->reqRegionSize[2] << ", "
+        << "0\n"
+        << "imad r1025.xyz0, r1023.xyzz, l10.xyzz, r1022.xyzz\n";
+    } else {
+      O << "imad r1025.xyz0, r1023.xyzz, cb0[10].xyzz, r1022.xyzz\n";
+    }
+  }
+}
+
+void AMDILKernelManager::printFooter(OSTREAM_TYPE &O)
+{
+  O << "ret\n";
+  if (mSTM->isApple()) {
+    O << "endfunc ; __OpenCL_" << mName << "_kernel\n";
+  } else {
+    O << "endfunc ; " << mName << "\n";
+  }
+}
+
+void
+AMDILKernelManager::printMetaData(OSTREAM_TYPE &O, uint32_t id, bool kernel)
+{
+  if (kernel) {
+    int kernelId = (mSTM->isApple())
+                   ? mAMI->getOrCreateFunctionID("__OpenCL_" + mName + "_kernel")
+                   : mAMI->getOrCreateFunctionID(mName);
+    mMFI->addCalledFunc(id);
+    mUniqueID = kernelId;
+    mIsKernel = true;
+  }
+  printKernelArgs(O);
+  if (kernel) {
+    mIsKernel = false;
+    mMFI->eraseCalledFunc(id);
+    mUniqueID = id;
+  }
+}
+
+void AMDILKernelManager::setKernel(bool kernel)
+{
+  mIsKernel = kernel;
+  if (kernel) {
+    mWasKernel = mIsKernel;
+  }
+}
+
+void AMDILKernelManager::setID(uint32_t id)
+{
+  mUniqueID = id;
+}
+
+void AMDILKernelManager::setName(const std::string &name)
+{
+  mName = name;
+}
+
+bool AMDILKernelManager::wasKernel()
+{
+  return mWasKernel;
+}
+
+void AMDILKernelManager::setImageWrite()
+{
+  mHasImageWrite = true;
+}
+
+void AMDILKernelManager::setOutputInst()
+{
+  mHasOutputInst = true;
+}
+
+void AMDILKernelManager::printConstantToRegMapping(
+  AMDILAsmPrinter *RegNames,
+  unsigned &LII,
+  OSTREAM_TYPE &O,
+  uint32_t &Counter,
+  uint32_t Buffer,
+  uint32_t n,
+  const char *lit,
+  uint32_t fcall,
+  bool isImage,
+  bool isHWCB)
+{
+  // TODO: This needs to be enabled or SC will never statically index into the
+  // CB when a pointer is used.
+  if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem) && isHWCB) {
+    O << "mov ";
+    printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+    O << " l5.x\n";
+    ++LII;
+    Counter++;
+    return;
+  }
+  for (uint32_t x = 0; x < n; ++x) {
+    uint32_t reg = mMFI->getArgReg(LII);
+    O << "mov ";
+    if (isImage) {
+      printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+      O << " l" << mMFI->getIntLits(Counter++) << "\n";
+    } else {
+      printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+      O << " cb" <<Buffer<< "[" <<Counter++<< "]"
+        << getFirstComponent(mMFI->getArgReg(LII), fcall) << "\n";
+    }
+    switch(fcall) {
+    case 1093:
+    case 1092:
+      O << "ishr ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+      O << " ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, false);
+      O << " l3.0y0y\n";
+      if (!lit) {
+        O << "ishl " << RegNames->getRegisterName(reg) << ", ";
+        O << RegNames->getRegisterName(reg)<< ", l3.z\n";
+        O << "ishr " << RegNames->getRegisterName(reg) << ", ";
+        O << RegNames->getRegisterName(reg)<< ", l3.z\n";
+      }
+      break;
+    case 1091:
+      O << "ishr ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+      O << " ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, false);
+      O << " l3.0zyx\n";
+      if (!lit) {
+        O << "ishl " << RegNames->getRegisterName(reg) << ", ";
+        O << RegNames->getRegisterName(reg)<< ", l3.x\n";
+        O << "ishr " << RegNames->getRegisterName(reg) << ", ";
+        O << RegNames->getRegisterName(reg)<< ", l3.x\n";
+      }
+      break;
+    case 1090:
+      O << "ishr ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+      O << " ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, false);
+      O << " l3.0z0z\n";
+      if (!lit) {
+        O << "ishl " << RegNames->getRegisterName(reg) << ", ";
+        O << RegNames->getRegisterName(reg)<< ", l3.x\n";
+        O << "ishr " << RegNames->getRegisterName(reg) << ", ";
+        O << RegNames->getRegisterName(reg)<< ", l3.x\n";
+      }
+      break;
+    default:
+      break;
+    };
+    if (lit) {
+      O << "ishl " ;
+      printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+      O << " ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, false, true);
+      O << " " << lit << "\nishr ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, true);
+      O << " ";
+      printRegName(RegNames, mMFI->getArgReg(LII), O, false, true);
+      O << " " << lit << "\n";
+    }
+    ++LII;
+    if (isImage) {
+      Counter += NUM_EXTRA_SLOTS_PER_IMAGE;
+    }
+  }
+}
+
+void
+AMDILKernelManager::printCopyStructPrivate(const StructType *ST,
+    OSTREAM_TYPE &O,
+    size_t stackSize,
+    uint32_t Buffer,
+    uint32_t mLitIdx,
+    uint32_t &Counter)
+{
+  size_t n = ((stackSize + 15) & ~15) >> 4;
+  for (size_t x = 0; x < n; ++x) {
+    if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateUAV)) {
+      O << "uav_raw_store_id(" <<
+        mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+        << ") mem0, r0.x, cb" << Buffer << "[" << Counter++ << "]\n";
+    } else if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
+      O << "ishr r0.y, r0.x, l0.x\n";
+      O << "mov x" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+        <<"[r0.y], cb" << Buffer << "[" << Counter++ << "]\n";
+    } else {
+      O << "uav_raw_store_id(" <<
+        mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)
+        << ") mem0, r0.x, cb" << Buffer << "[" << Counter++ << "]\n";
+    }
+    O << "iadd r0.x, r0.x, l" << mLitIdx << ".z\n";
+  }
+}
+
+void AMDILKernelManager::printKernelArgs(OSTREAM_TYPE &O)
+{
+  std::string version(";version:");
+  version += itostr(mSTM->supportMetadata30() ? AMDIL_MAJOR_VERSION : 2) + ":"
+             + itostr(AMDIL_MINOR_VERSION) + ":"
+             + itostr(mSTM->supportMetadata30()
+                      ? AMDIL_REVISION_NUMBER : AMDIL_20_REVISION_NUMBER);
+  const AMDILKernel *kernel = mAMI->getKernel(
+                                (mSTM->isApple() && !mIsKernel)
+                                ?  "__OpenCL_" + mName + "_kernel" : mName);
+  bool isKernel = (kernel) ? kernel->mKernel : false;
+  if (mSTM->isApple()) {
+    if (isKernel) {
+      O << ";ARGSTART:__OpenCL_" <<mName<< "_kernel\n";
+    } else {
+      O << ";ARGSTART:" <<mName<< "\n";
+    }
+  } else {
+    O << ";ARGSTART:" <<mName<< "\n";
+  }
+  if (isKernel) {
+    O << version << "\n";
+    O << ";device:" <<mSTM->getDeviceName() << "\n";
+  }
+  O << ";uniqueid:" <<mUniqueID<< "\n";
+
+  if (kernel) {
+    size_t region = kernel->curRSize;
+    size_t hwregion = ((kernel->curHWRSize + 3) & (~0x3));
+    bool usehwregion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
+    if (!mSTM->overridesFlatAS()) {
+      size_t local = kernel->curSize;
+      size_t hwlocal = ((kernel->curHWSize + 3) & (~0x3));
+      bool usehwlocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
+      bool usehwprivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
+      bool useuavprivate = mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV);
+      if (isKernel) {
+        O << ";memory:" << ((usehwprivate) ?
+                    (useuavprivate) ? "uav" : "hw" : "" ) << "private:"
+          <<(((mMFI->getStackSize() + 15) & (~0xF)))<< "\n";
+      }
+      O << ";memory:" << ((usehwlocal) ? "hw" : "") << "local:"
+        << ((usehwlocal) ? hwlocal : hwlocal + local) << "\n";
+    }
+    if (mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
+      O << ";memory:" << ((usehwregion) ? "hw" : "") << "region:"
+        << ((usehwregion) ? hwregion : hwregion + region) << "\n";
+    }
+
+    if (kernel && isKernel && kernel->sgv) {
+      if (kernel->sgv->mHasRWG) {
+        O << ";cws:"
+          << kernel->sgv->reqGroupSize[0] << ":"
+          << kernel->sgv->reqGroupSize[1] << ":"
+          << kernel->sgv->reqGroupSize[2] << "\n";
+      }
+      if (kernel->sgv->mHasRWR) {
+        O << ";crs:"
+          << kernel->sgv->reqRegionSize[0] << ":"
+          << kernel->sgv->reqRegionSize[1] << ":"
+          << kernel->sgv->reqRegionSize[2] << "\n";
+      }
+    }
+  }
+  if (isKernel) {
+    for (std::vector<std::string>::iterator ib = mMFI->kernel_md_begin(),
+         ie = mMFI->kernel_md_end(); ib != ie; ++ib) {
+      O << (*ib) << "\n";
+    }
+  }
+  for (std::set<std::string>::iterator ib = mMFI->func_md_begin(),
+       ie = mMFI->func_md_end(); ib != ie; ++ib) {
+    O << (*ib) << "\n";
+  }
+  if (!mMFI->func_empty()) {
+    O << ";function:" << mMFI->func_size();
+    binaryForEach(mMFI->func_begin(), mMFI->func_end(), commaPrint, O);
+    O << "\n";
+  }
+
+  if (!mSTM->device()->isSupported(AMDILDeviceInfo::MacroDB)
+      && !mMFI->intr_empty()) {
+    O << ";intrinsic:" << mMFI->intr_size();
+    binaryForEach(mMFI->intr_begin(), mMFI->intr_end(), commaPrint, O);
+    O << "\n";
+  }
+
+  if (!isKernel) {
+    binaryForEach(mMFI->printf_begin(), mMFI->printf_end(), printfPrint, O);
+    mMF->getMMI().getObjFileInfo<AMDILModuleInfo>().add_printf_offset(
+      mMFI->printf_size());
+  } else {
+    for (StringMap<SamplerInfo>::iterator
+         smb = mMFI->sampler_begin(),
+         sme = mMFI->sampler_end(); smb != sme; ++ smb) {
+      O << ";sampler:" << (*smb).second.name << ":" << (*smb).second.idx
+        << ":" << ((*smb).second.val == (uint32_t)-1 ? 0 : 1)
+        << ":" << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val : 0)
+        << "\n";
+    }
+  }
+  if (mSTM->is64bit()) {
+    O << ";memory:64bitABI\n";
+  }
+
+  if (!mMFI->errors_empty()) {
+    binaryForEach(mMFI->errors_begin(), mMFI->errors_end(), errorPrint, O);
+  }
+  // This has to come last
+  if (isKernel && !mSTM->overridesFlatAS()) {
+    uint32_t id = (mMFI->uav_size() ? *(mMFI->uav_begin()) : 0);
+    if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+      if (mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) >
+          mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+        if (mMFI->uav_size() == 1) {
+          if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+              && *(mMFI->uav_begin()) >= ARENA_SEGMENT_RESERVED_UAVS) {
+            id = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+          }
+        } else if (mMFI->uav_count(mSTM->device()->
+                                   getResourceID(AMDILDevice::RAW_UAV_ID))) {
+          id = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+        } else {
+          id = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+        }
+      } else if ((mMFI->get_num_write_images()) !=
+                 OPENCL_MAX_WRITE_IMAGES
+                 && !mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+                 && mMFI->uav_count(mSTM->device()->
+                                    getResourceID(AMDILDevice::RAW_UAV_ID))) {
+        id = mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);;
+      } else if (mMFI->uav_size() > 1) {
+        id = mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+      }
+    } else if (mSTM->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+      if (!mSTM->overridesFlatAS()) {
+        id = mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
+        if (mMFI->uav_size() && !mMFI->uav_count(id)) {
+          id = (*mMFI->uav_begin());
+        }
+      }
+    }
+    O << ";uavid:" << id  << "\n";
+  }
+  if (isKernel) {
+    O << ";privateid:" << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+      << "\n";
+  }
+  if (isKernel) {
+    std::string argKernel = "llvm.argtypename.annotations.";
+    argKernel.append(mName);
+    GlobalVariable *GV = mMF->getFunction()->getParent()
+                         ->getGlobalVariable(argKernel);
+    if (GV && GV->hasInitializer()) {
+      const ConstantArray *nameArray
+      = dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+      if (nameArray) {
+        for (unsigned x = 0, y = nameArray->getNumOperands(); x < y; ++x) {
+          const GlobalVariable *gV= dyn_cast_or_null<GlobalVariable>(
+                                      nameArray->getOperand(x)->getOperand(0));
+          const ConstantDataArray *argName =
+            dyn_cast_or_null<ConstantDataArray>(gV->getInitializer());
+          if (!argName) {
+            continue;
+          }
+          std::string argStr = argName->getAsString();
+          O << ";reflection:" << x << ":" << argStr.substr(0, argStr.length()-1) << "\n";
+        }
+      }
+    }
+  }
+  if (mSTM->isApple()) {
+    if (isKernel) {
+      O << ";ARGEND:__OpenCL_" << mName << "_kernel\n";
+    } else {
+      O << ";ARGEND:" << mName << "\n";
+    }
+  } else {
+    O << ";ARGEND:" << mName << "\n";
+  }
+}
+
+void AMDILKernelManager::printArgCopies(OSTREAM_TYPE &O,
+                                        AMDILAsmPrinter *RegNames)
+{
+  Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+  Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+  uint32_t Counter = 0;
+
+  if (mMFI->getArgSize()) {
+    O << "dcl_cb cb1";
+    O << "[" << (mMFI->getArgSize() >> 4) << "]\n";
+    mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
+  }
+  const Function *F = mMF->getFunction();
+  // Get the stack size
+  uint32_t stackSize = mMFI->getStackSize();
+  uint32_t privateSize = mMFI->getScratchSize();
+  uint32_t stackOffset = (privateSize + 15) & (~0xF);
+  if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem) && !mSTM->overridesFlatAS()) {
+    // TODO: If the size is too large, we need to fall back to software emulated
+    // instead of using the hardware capability.
+    int size = (((((stackSize != privateSize) ? stackSize + privateSize :  stackSize)
+                  + 15) & (~0xF)) >> 4)
+               + (mSTM->device()->isSupported(AMDILDeviceInfo::Debug) ? 1 : 0);
+    if (size > 4096) {
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_PRIVATE_RESOURCES]);
+    }
+    if (size) {
+      // For any stack variables, we need to declare the literals for them so that
+      // we can use them when we copy our data to the stack.
+      // Anytime we declare a literal, we need to reserve it, if it is not emitted
+      // in emitLiterals.
+      uint32_t resid =  mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+      if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateUAV)) {
+        O << "dcl_typeless_uav_id(" << resid
+          << ")_stride(4)_length(" << (size << 4 )<< ")_access(private)\n";
+        if (mSTM->device()->isSupported(AMDILDeviceInfo::Debug)) {
+          int newSize = (size - 1) << 4;
+          mMFI->addReservedLiterals(1);
+          O << "dcl_literal l" << mMFI->getNumLiterals() << ", "
+            << newSize << ","
+            << newSize << ","
+            << newSize << ","
+            << newSize << "\n";
+          O << "uav_raw_store_id(" << resid << ") mem0, l"
+            << mMFI->getNumLiterals()<< ", r1023\n";
+        }
+      } else {
+        O << "dcl_indexed_temp_array x"
+          << mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "["
+          << size << "]\n";
+        if (mSTM->device()->isSupported(AMDILDeviceInfo::Debug)) {
+          O << "mov x" << resid << "[" << size - 1 << "], r1023\n";
+        }
+      }
+      mMFI->addReservedLiterals(1);
+      O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize << ", "
+        << privateSize << ", 16, " << ((stackSize == privateSize) ? 0 : stackOffset) << "\n"
+        << "iadd r0.x, " << RegNames->getRegisterName(AMDIL::T1) << ".x, l"
+        << mMFI->getNumLiterals() << ".w\n";
+
+      O << "mov " << RegNames->getRegisterName(AMDIL::FP)
+        << ", l" << mMFI->getNumLiterals() << ".0\n";
+    }
+  }
+  I = mMF->getFunction()->arg_begin();
+  unsigned curReg = 0;
+  for (I = mMF->getFunction()->arg_begin(); I != Ie; ++I) {
+    Type *curType = I->getType();
+    unsigned int Buffer = 1;
+    O << "; Kernel arg setup: " << I->getName().str() << "\n";
+    if (curType->isIntegerTy() || curType->isFloatingPointTy()) {
+      switch (curType->getPrimitiveSizeInBits()) {
+      default:
+        printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+        break;
+      case 16:
+        printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1,
+                                  "l3.y" );
+        break;
+      case 8:
+        printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x" );
+        break;
+      }
+    } else if (const VectorType *VT = dyn_cast<VectorType>(curType)) {
+      Type *ET = VT->getElementType();
+      int numEle = VT->getNumElements();
+      switch (ET->getPrimitiveSizeInBits()) {
+      default:
+        printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+                                  (numEle+2) >> 2);
+        break;
+      case 64:
+        if (numEle == 3) {
+          O << "mov ";
+          printRegName(RegNames, mMFI->getArgReg(curReg++), O, true);
+          O << " cb" << Buffer << "[" << Counter << "].xy\n";
+          O << "mov ";
+          printRegName(RegNames, mMFI->getArgReg(curReg++), O, true);
+          O << " cb" << Buffer << "[" << Counter << "].zw\n";
+          ++Counter;
+          O << "mov ";
+          printRegName(RegNames, mMFI->getArgReg(curReg++), O, true);
+          O << " cb" << Buffer << "[" << Counter << "].xy\n";
+          Counter++;
+        } else {
+          printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+                                    (numEle) >> 1);
+        }
+        break;
+      case 16: {
+        switch (numEle) {
+        default:
+          printConstantToRegMapping(RegNames, curReg, O, Counter,
+                                    Buffer, (numEle+2) >> 2, "l3.y", 1093);
+          break;
+        case 2:
+          printConstantToRegMapping(RegNames, curReg, O, Counter,
+                                    Buffer, 1, "l3.y", 1092);
+          break;
+        }
+        break;
+      }
+      case 8: {
+        switch (numEle) {
+        default:
+          printConstantToRegMapping(RegNames, curReg, O, Counter,
+                                    Buffer, (numEle+2) >> 2, "l3.x", 1091);
+          break;
+        case 2:
+          printConstantToRegMapping(RegNames, curReg, O, Counter,
+                                    Buffer, 1, "l3.x", 1090);
+          break;
+        }
+        break;
+      }
+      }
+    } else if (const PointerType *PT = dyn_cast<PointerType>(curType)) {
+      Type *CT = PT->getElementType();
+      const StructType *ST = dyn_cast<StructType>(CT);
+      if (ST && ST->isOpaque()) {
+        bool i1d  = ST->getName().startswith("struct._image1d_t");
+        bool i1da = ST->getName().startswith("struct._image1d_array_t");
+        bool i1db = ST->getName().startswith("struct._image1d_buffer_t");
+        bool i2d  = ST->getName().startswith("struct._image2d_t");
+        bool i2da = ST->getName().startswith("struct._image2d_array_t");
+        bool i3d  = ST->getName().startswith("struct._image3d_t");
+        bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+        if (is_image) {
+          if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+            printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+                                      1, NULL, 0, is_image);
+          } else {
+            mMFI->addErrorMsg(
+              amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+            ++curReg;
+          }
+        } else {
+          printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+        }
+      } else if (CT->isStructTy()
+                 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+        StructType *ST = dyn_cast<StructType>(CT);
+        bool i1d  = ST->getName().startswith("struct._image1d_t");
+        bool i1da = ST->getName().startswith("struct._image1d_array_t");
+        bool i1db = ST->getName().startswith("struct._image1d_buffer_t");
+        bool i2d  = ST->getName().startswith("struct._image2d_t");
+        bool i2da = ST->getName().startswith("struct._image2d_array_t");
+        bool i3d  = ST->getName().startswith("struct._image3d_t");
+        bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+        if (is_image) {
+          if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+            printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+                                      1, NULL, 0, is_image);
+          } else {
+            mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+            ++curReg;
+          }
+        } else {
+          const TargetData* TD = mTM->getTargetData();
+          size_t structSize
+          = TD->RoundUpAlignment(TD->getTypeAllocSize(ST), 16);
+
+          stackOffset += structSize;
+          O << "mov ";
+          printRegName(RegNames, mMFI->getArgReg(curReg), O, true);
+          O << " r0.x\n";
+          printCopyStructPrivate(ST, O, structSize, Buffer, mMFI->getNumLiterals(),
+                                 Counter);
+          ++curReg;
+        }
+      } else if (CT->isIntOrIntVectorTy()
+                 || CT->isFPOrFPVectorTy()
+                 || CT->isArrayTy()
+                 || CT->isPointerTy()
+                 || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+        if (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+          const AMDILKernel* krnl = mAMI->getKernel(F->getName());
+          printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer,
+                                    1, NULL, 0, false,
+                                    mAMI->usesHWConstant(krnl, I->getName()));
+        } else if (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+          // TODO: If we are region address space, the first region pointer, no
+          // array pointers exist, and hardware RegionMem is enabled then we can
+          // zero out register as the initial offset is zero.
+          printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+        } else if (PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+          // TODO: If we are local address space, the first local pointer, no
+          // array pointers exist, and hardware LocalMem is enabled then we can
+          // zero out register as the initial offset is zero.
+          printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+        } else {
+          printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1);
+        }
+      } else {
+        assert(0 && "Current type is not supported!");
+        mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+        ++curReg;
+      }
+    } else {
+      assert(0 && "Current type is not supported!");
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+      ++curReg;
+    }
+  }
+  if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+    const AMDILKernel* krnl = mAMI->getKernel(F->getName());
+    uint32_t constNum = 0;
+    for (uint32_t x = 0; x < mSTM->device()->getMaxNumCBs(); ++x) {
+      if (krnl->constSizes[x]) {
+        O << "dcl_cb cb" << x + CB_BASE_OFFSET;
+        O << "[" << (((krnl->constSizes[x] + 15) & ~15) >> 4) << "]\n";
+        ++constNum;
+        mMFI->setUsesMem(AMDILDevice::CONSTANT_ID);
+      }
+    }
+    // TODO: If we run out of constant resources, we need to push some of the
+    // constant pointers to the software emulated section.
+    if (constNum > mSTM->device()->getMaxNumCBs()) {
+      assert(0 && "Max constant buffer limit passed!");
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_CONSTANT_RESOURCES]);
+    }
+  }
+}
+
+void AMDILKernelManager::emitLiterals(OSTREAM_TYPE &O)
+{
+  char buffer[256];
+  std::map<uint32_t, uint32_t>::iterator ilb, ile;
+  for (ilb = mMFI->begin_32(), ile = mMFI->end_32(); ilb != ile; ++ilb) {
+    uint32_t a = ilb->first;
+    O << "dcl_literal l" <<ilb->second<< ", ";
+    sprintf(buffer, "0x%08X, 0x%08X, 0x%08X, 0x%08X", a, a, a, a);
+    O << buffer << "; f32:i32 " << ilb->first << "\n";
+  }
+  std::map<uint64_t, uint32_t>::iterator llb, lle;
+  for (llb = mMFI->begin_64(), lle = mMFI->end_64(); llb != lle; ++llb) {
+    uint32_t v[2];
+    uint64_t a = llb->first;
+    memcpy(v, &a, sizeof(uint64_t));
+    O << "dcl_literal l" <<llb->second<< ", ";
+    sprintf(buffer, "0x%08X, 0x%08X, 0x%08X, 0x%08X; f64:i64 ",
+            v[0], v[1], v[0], v[1]);
+    O << buffer << llb->first << "\n";
+  }
+  std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator vlb, vle;
+  for (vlb = mMFI->begin_128(), vle = mMFI->end_128(); vlb != vle; ++vlb) {
+    uint32_t v[2][2];
+    uint64_t a = vlb->first.first;
+    uint64_t b = vlb->first.second;
+    memcpy(v[0], &a, sizeof(uint64_t));
+    memcpy(v[1], &b, sizeof(uint64_t));
+    O << "dcl_literal l" << vlb->second << ", ";
+    sprintf(buffer, "0x%08X, 0x%08X, 0x%08X, 0x%08X; f128:i128 ",
+            v[0][0], v[0][1], v[1][0], v[1][1]);
+    O << buffer << vlb->first.first << vlb->first.second << "\n";
+  }
+}
+
+// If the value is not known, then the uav is set, otherwise the mValueIDMap
+// is used.
+void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID)
+{
+  if (value) {
+    mValueIDMap[value] = ID;
+  }
+}
+
+uint32_t AMDILKernelManager::getUAVID(const Value *value)
+{
+  if (mValueIDMap.find(value) != mValueIDMap.end()) {
+    return mValueIDMap[value];
+  }
+
+  if (mSTM->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+    return mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+  } else {
+    return mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+  }
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,171 @@
+//===-- AMDILKernelManager.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Class that handles the metadata/abi management for the.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILKERNELMANAGER_H_
+#define _AMDILKERNELMANAGER_H_
+#include "AMDIL.h"
+#include "AMDILDevice.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <string>
+#include <set>
+#include <map>
+#define IMAGETYPE_2D 0
+#define IMAGETYPE_3D 1
+#define RESERVED_LIT_COUNT 6
+
+namespace llvm
+{
+class AMDILSubtarget;
+class AMDILMachineFunctionInfo;
+class AMDILModuleInfo;
+class AMDILTargetMachine;
+class AMDILAsmPrinter;
+class StructType;
+class Value;
+class TypeSymbolTable;
+class MachineFunction;
+class MachineInstr;
+class ConstantFP;
+class PrintfInfo;
+
+
+class AMDILKernelManager
+{
+public:
+  typedef enum {
+    RELEASE_ONLY,
+    DEBUG_ONLY,
+    ALWAYS
+  } ErrorMsgEnum;
+  AMDILKernelManager(AMDILTargetMachine *TM);
+  virtual ~AMDILKernelManager();
+
+  /// Clear the state of the KernelManager putting it in its most initial state.
+  void clear();
+  void setMF(MachineFunction *MF);
+
+  /// Process the specific kernel parsing out the parameter information for the
+  /// kernel.
+  void processArgMetadata(OSTREAM_TYPE &O,
+                          uint32_t buf, bool kernel);
+
+
+  /// Prints the header for the kernel which includes the groupsize declaration
+  /// and calculation of the local/group/global id's.
+  void printHeader(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O,
+                   const std::string &name);
+
+  virtual void printDecls(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O);
+  virtual void printGroupSize(OSTREAM_TYPE &O);
+
+  /// Copies the data from the runtime setup constant buffers into registers so
+  /// that the program can correctly access memory or data that was set by the
+  /// host program.
+  void printArgCopies(OSTREAM_TYPE &O, AMDILAsmPrinter* RegNames);
+
+  /// Prints out the end of the function.
+  void printFooter(OSTREAM_TYPE &O);
+
+  /// Prints out the metadata for the specific function depending if it is a
+  /// kernel or not.
+  void printMetaData(OSTREAM_TYPE &O, uint32_t id, bool isKernel = false);
+
+  /// Set bool value on whether to consider the function a kernel or a normal
+  /// function.
+  void setKernel(bool kernel);
+
+  /// Set the unique ID of the kernel/function.
+  void setID(uint32_t id);
+
+  /// Set the name of the kernel/function.
+  void setName(const std::string &name);
+
+  /// Flag that specifies whether this function has a kernel wrapper.
+  bool wasKernel();
+
+  void getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O);
+
+  // Returns whether a compiler needs to insert a write to memory or not.
+  bool useCompilerWrite(const MachineInstr *MI);
+
+  // Set the flag that there exists an image write.
+  void setImageWrite();
+  void setOutputInst();
+
+  void emitLiterals(OSTREAM_TYPE &O);
+
+  // Set the uav id for the specific pointer value.  If value is NULL, then the
+  // ID sets the default ID.
+  void setUAVID(const Value *value, uint32_t ID);
+
+  // Get the UAV id for the specific pointer value.
+  uint32_t getUAVID(const Value *value);
+
+private:
+
+  /// Helper function that prints the actual metadata and should only be called
+  /// by printMetaData.
+  void printKernelArgs(OSTREAM_TYPE &O);
+  void printCopyStructPrivate(const StructType *ST,
+                              OSTREAM_TYPE &O,
+                              size_t stackSize,
+                              uint32_t Buffer,
+                              uint32_t mLitIdx,
+                              uint32_t &counter);
+  virtual void
+  printConstantToRegMapping(AMDILAsmPrinter *RegNames,
+                            unsigned &LII,
+                            OSTREAM_TYPE &O,
+                            uint32_t &counter,
+                            uint32_t Buffer,
+                            uint32_t n,
+                            const char *lit = NULL,
+                            uint32_t fcall = 0,
+                            bool isImage = false,
+                            bool isHWCB = false);
+  void updatePtrArg(llvm::Function::const_arg_iterator Ip,
+                    int numWriteImages,
+                    int raw_uav_buffer,
+                    int counter,
+                    bool isKernel,
+                    const Function *F);
+  /// Name of the current kernel.
+  std::string mName;
+  uint32_t mUniqueID;
+  bool mIsKernel;
+  bool mWasKernel;
+  bool mCompilerWrite;
+  /// Flag to specify if an image write has occured or not in order to not add a
+  /// compiler specific write if no other writes to memory occured.
+  bool mHasImageWrite;
+  bool mHasOutputInst;
+
+  /// Map from const Value * to UAV ID.
+  std::map<const Value *, uint32_t> mValueIDMap;
+
+  AMDILTargetMachine * mTM;
+  const AMDILSubtarget * mSTM;
+  /// This is the global offset of the printf string id's.
+  MachineFunction *mMF;
+  AMDILMachineFunctionInfo *mMFI;
+  AMDILModuleInfo *mAMI;
+}; // class AMDILKernelManager
+
+} // llvm namespace
+#endif // _AMDILKERNELMANAGER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMPC.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMPC.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMPC.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMPC.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,22 @@
+//===-- AMDILLLVMPC.h -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_LLVM_PC_H_
+#define _AMDIL_LLVM_PC_H_
+#include <string>
+#define ASMPRINTER_KERNEL_NAME kernelName
+#define ASMPRINTER_RETURN_TYPE static AsmPrinter*
+#define LINEAR_MASK 0x20
+#define NORM_MASK 0x1
+
+#endif // _AMDIL_LLVM_PC_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMVersion.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMVersion.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMVersion.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLLVMVersion.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,24 @@
+//===-- AMDILLLVMVersion.h ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_LLVM_VERSION_H_
+#define _AMDIL_LLVM_VERSION_H_
+
+// Macro that expands into the correct type for output streams
+#define OSTREAM_TYPE llvm::raw_ostream
+
+// AMDILAsmPrinter.cpp macros
+#define AMDIL_ASM_PRINTER_ARGUMENTS TargetMachine& TM, MCStreamer &Streamer
+#define ASM_PRINTER_ARGUMENTS TM, Streamer
+
+#endif // _AMDIL_LLVM_VERSION_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,148 @@
+//===-- AMDILLiteralManager.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "literal_manager"
+#include "AMDIL.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+
+// AMDIL Literal Manager traverses through all of the LOADCONST instructions and
+// converts them from an immediate value to the literal index. The literal index
+// is valid IL, but the immediate values are not. The Immediate values must be
+// aggregated and declared for clarity and to reduce the number of literals that
+// are used. It is also illegal to declare the same literal twice, so this keeps
+// that from occuring.
+
+namespace
+{
+class AMDILLiteralManager : public MachineFunctionPass
+{
+public:
+  static char ID;
+  AMDILLiteralManager(TargetMachine &tm, CodeGenOpt::Level OL);
+  virtual const char *getPassName() const;
+
+  bool runOnMachineFunction(MachineFunction &MF);
+private:
+  bool trackLiterals(MachineBasicBlock::iterator *bbb);
+  TargetMachine &TM;
+  const AMDILSubtarget *mSTM;
+  AMDILKernelManager *mKM;
+  AMDILMachineFunctionInfo *mMFI;
+  int32_t mLitIdx;
+  bool mChanged;
+};
+char AMDILLiteralManager::ID = 0;
+}
+
+namespace llvm
+{
+FunctionPass *
+createAMDILLiteralManager(TargetMachine &tm, CodeGenOpt::Level OL)
+{
+  return new AMDILLiteralManager(tm, OL);
+}
+
+}
+
+AMDILLiteralManager::AMDILLiteralManager(TargetMachine &tm,
+    CodeGenOpt::Level OL)
+  : MachineFunctionPass(ID),
+    TM(tm)
+{
+}
+
+bool AMDILLiteralManager::runOnMachineFunction(MachineFunction &MF)
+{
+  mChanged = false;
+  mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+  const AMDILTargetMachine *amdtm =
+    reinterpret_cast<const AMDILTargetMachine *>(&TM);
+  mSTM = dynamic_cast<const AMDILSubtarget *>(amdtm->getSubtargetImpl());
+  mKM = const_cast<AMDILKernelManager *>(mSTM->getKernelManager());
+  safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+                    std::bind1st(std::mem_fun(&AMDILLiteralManager::trackLiterals), this));
+  return mChanged;
+}
+
+bool AMDILLiteralManager::trackLiterals(MachineBasicBlock::iterator *bbb)
+{
+  MachineInstr *MI = *bbb;
+  uint32_t Opcode = MI->getOpcode();
+  switch(Opcode) {
+  default:
+    return false;
+  case AMDIL::VCREATE_v2i8:
+  case AMDIL::VCREATE_v2i16:
+  case AMDIL::VCREATE_v2i32:
+  case AMDIL::VCREATE_v2i64:
+  case AMDIL::VCREATE_v2f32:
+  case AMDIL::VCREATE_v2f64:
+  case AMDIL::VCREATE_v4i8:
+  case AMDIL::VCREATE_v4i16:
+  case AMDIL::VCREATE_v4i32:
+  case AMDIL::VCREATE_v4f32:
+  case AMDIL::LOADCONST_i8:
+  case AMDIL::LOADCONST_i16:
+  case AMDIL::LOADCONST_i32:
+  case AMDIL::LOADCONST_i64:
+  case AMDIL::LOADCONST_f32:
+  case AMDIL::LOADCONST_f64:
+    break;
+  };
+  MachineOperand &dstOp = MI->getOperand(0);
+  MachineOperand &litOp = MI->getOperand(1);
+  if (!litOp.isImm() && !litOp.isFPImm()) {
+    return false;
+  }
+  if (!dstOp.isReg()) {
+    return false;
+  }
+  // Change the literal to the correct index for each literal that is found.
+  if (litOp.isImm()) {
+    int64_t immVal = litOp.getImm();
+    uint32_t idx = MI->getOpcode() == AMDIL::LOADCONST_i64
+                   ? mMFI->addi64Literal(immVal)
+                   : mMFI->addi32Literal(static_cast<int>(immVal), Opcode);
+    litOp.ChangeToImmediate(idx);
+    return false;
+  }
+
+  if (litOp.isFPImm()) {
+    const ConstantFP *fpVal = litOp.getFPImm();
+    uint32_t idx = MI->getOpcode() == AMDIL::LOADCONST_f64
+                   ? mMFI->addf64Literal(fpVal)
+                   : mMFI->addf32Literal(fpVal);
+    litOp.ChangeToImmediate(idx);
+    return false;
+  }
+
+  return false;
+}
+
+const char* AMDILLiteralManager::getPassName() const
+{
+  return "AMDIL Literal Manager";
+}
+
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,109 @@
+//===-- AMDILMCAsmInfo.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILMCAsmInfo.h"
+#include "AMDILLLVMPC.h"
+#ifndef NULL
+#define NULL 0
+#endif
+
+using namespace llvm;
+AMDILMCAsmInfo::AMDILMCAsmInfo(const Triple &Triple) : MCAsmInfo()
+{
+  //===------------------------------------------------------------------===//
+  HasSubsectionsViaSymbols = true;
+  HasMachoZeroFillDirective = false;
+  HasMachoTBSSDirective = false;
+  HasStaticCtorDtorReferenceInStaticMode = false;
+  LinkerRequiresNonEmptyDwarfLines = true;
+  MaxInstLength = 16;
+  PCSymbol = "$";
+  SeparatorString = "\n";
+  CommentColumn = 40;
+  CommentString = ";";
+  LabelSuffix = ":";
+  GlobalPrefix = "@";
+  PrivateGlobalPrefix = ";.";
+  LinkerPrivateGlobalPrefix = "!";
+  InlineAsmStart = ";#ASMSTART";
+  InlineAsmEnd = ";#ASMEND";
+  AssemblerDialect = 0;
+  AllowQuotesInName = false;
+  AllowNameToStartWithDigit = false;
+  AllowPeriodsInName = false;
+
+  //===--- Data Emission Directives -------------------------------------===//
+  ZeroDirective = ".zero";
+  AsciiDirective = ".ascii\t";
+  AscizDirective = ".asciz\t";
+  Data8bitsDirective = ".byte\t";
+  Data16bitsDirective = ".short\t";
+  Data32bitsDirective = ".long\t";
+  Data64bitsDirective = ".quad\t";
+  GPRel32Directive = NULL;
+  SunStyleELFSectionSwitchSyntax = true;
+  UsesELFSectionDirectiveForBSS = true;
+  HasMicrosoftFastStdCallMangling = false;
+
+  //===--- Alignment Information ----------------------------------------===//
+  AlignDirective = ";.align\t";
+  AlignmentIsInBytes = true;
+  TextAlignFillValue = 0;
+
+  //===--- Global Variable Emission Directives --------------------------===//
+  GlobalDirective = ".global";
+  ExternDirective = ".extern";
+  HasSetDirective = false;
+  // TODO: This makes the symbol definition have the math instead
+  // of the symbol use. This could be disabled and handled as it
+  // would simplify the patching code in AMDILMDParser.cpp.
+  HasAggressiveSymbolFolding = true;
+  LCOMMDirectiveType = LCOMM::None;
+  COMMDirectiveAlignmentIsInBytes = false;
+  // TODO: This generates .type @__OpenCL_<name>_kernel, at function
+  // and .size @__OpenCL_<name>_kernel, ;.<tmp>- at __OpenCL_<name>_kernel,
+  // which is not handled in AMDILMDParser.cpp.
+  HasDotTypeDotSizeDirective = false;
+  HasSingleParameterDotFile = true;
+  HasNoDeadStrip = true;
+  HasSymbolResolver = false;
+  WeakRefDirective = ".weakref\t";
+  WeakDefDirective = ".weakdef\t";
+  LinkOnceDirective = NULL;
+  HiddenVisibilityAttr = MCSA_Hidden;
+  HiddenDeclarationVisibilityAttr = MCSA_Hidden;
+  ProtectedVisibilityAttr = MCSA_Protected;
+
+  //===--- Dwarf Emission Directives -----------------------------------===//
+  HasLEB128 = true;
+  SupportsDebugInformation = true;
+  ExceptionsType = ExceptionHandling::None;
+  DwarfUsesInlineInfoSection = false;
+  DwarfSectionOffsetDirective = ".offset";
+  //DwarfUsesLabelOffsetForRanges = true;
+
+  //===--- CBE Asm Translation Table -----------------------------------===//
+  //AsmTransCBE = NULL;
+}
+const char*
+AMDILMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
+{
+  switch (AS) {
+  default:
+    return NULL;
+  case 0:
+    return NULL;
+  };
+  return NULL;
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,31 @@
+//===-- AMDILMCAsmInfo.h --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILMCASMINFO_H_
+#define AMDILMCASMINFO_H_
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "AMDILLLVMPC.h"
+namespace llvm
+{
+class Triple;
+
+class AMDILMCAsmInfo : public MCAsmInfo
+{
+public:
+  AMDILMCAsmInfo(const Triple &Triple);
+  const char*
+  getDataASDirective(unsigned int Size, unsigned int AS) const;
+};
+} // namespace llvm
+#endif // AMDILMCASMINFO_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,164 @@
+//===-- AMDILMCCodeEmitter.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "amdil-emitter"
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+
+
+using namespace llvm;
+#if 0
+namespace
+{
+class AMDILMCCodeEmitter : public MCCodeEmitter
+{
+  AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT
+  void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  MCContext &Ctx;
+  bool Is64BitMode;
+public:
+  AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit);
+  ~AMDILMCCodeEmitter();
+  unsigned getNumFixupKinds() const;
+  const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const;
+  static unsigned GetAMDILRegNum(const MCOperand &MO);
+  void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const;
+  void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) const;
+  void EmitImmediate(const MCOperand &Disp, unsigned ImmSize,
+                     MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os,
+                     SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+}; // class AMDILMCCodeEmitter
+}; // anonymous namespace
+
+namespace llvm
+{
+MCCodeEmitter *createAMDILMCCodeEmitter(const Target &,
+                                        TargetMachine &TM, MCContext &Ctx)
+{
+  return new AMDILMCCodeEmitter(TM, Ctx, false);
+}
+}
+
+AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx
+                                       , bool is64Bit)
+  : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx)
+{
+  Is64BitMode = is64Bit;
+}
+
+AMDILMCCodeEmitter::~AMDILMCCodeEmitter()
+{
+}
+
+unsigned
+AMDILMCCodeEmitter::getNumFixupKinds() const
+{
+  return 0;
+}
+
+const MCFixupKindInfo &
+AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const
+{
+//  const static MCFixupKindInfo Infos[] = {};
+  if (Kind < FirstTargetFixupKind) {
+    return MCCodeEmitter::getFixupKindInfo(Kind);
+  }
+  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+         "Invalid kind!");
+  return MCCodeEmitter::getFixupKindInfo(Kind);
+// return Infos[Kind - FirstTargetFixupKind];
+
+}
+
+void
+AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte,
+                             raw_ostream &OS) const
+{
+  OS << (char) C;
+  ++CurByte;
+}
+void
+AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                                 raw_ostream &OS) const
+{
+  // Output the constant in little endian byte order
+  for (unsigned i = 0; i != Size; ++i) {
+    EmitByte(Val & 255, CurByte, OS);
+    Val >>= 8;
+  }
+}
+void
+AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize,
+                                  MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
+                                  SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const
+{
+  // If this is a simple integer displacement that doesn't require a relocation
+  // emit it now.
+  if (DispOp.isImm()) {
+    EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS);
+  }
+
+  // If we have an immoffset, add it to the expression
+  const MCExpr *Expr = DispOp.getExpr();
+
+  if (ImmOffset) {
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(ImmOffset, Ctx), Ctx);
+  }
+  // Emit a symbolic constant as a fixup and 4 zeros.
+  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+  // TODO: Why the 4 zeros?
+  EmitConstant(0, ImmSize, CurByte, OS);
+}
+
+void
+AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                      SmallVectorImpl<MCFixup> &Fixups) const
+{
+#if 0
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = TII.get(Opcode);
+  unsigned TSFlags = Desc.TSFlags;
+
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+
+  unsigned NumOps = Desc.getNumOperands();
+  unsigned CurOp = 0;
+
+  unsigned char BaseOpcode = 0;
+#ifndef NDEBUG
+  // FIXME: Verify.
+  if (// !Desc.isVariadic() &&
+    CurOp != NumOps) {
+    errs() << "Cannot encode all operands of: ";
+    MI.dump();
+    errs() << '\n';
+    abort();
+  }
+#endif
+#endif
+}
+#endif

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,739 @@
+//===-- AMDILMachineFunctionInfo.cpp --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cstdio>
+#include <ostream>
+#include <algorithm>
+#include <string>
+#include <queue>
+#include <list>
+#include <utility>
+using namespace llvm;
+
+static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl, const std::string &arg)
+{
+  if (!krnl) {
+    return NULL;
+  }
+  llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
+  for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end();
+       begin != end; ++begin) {
+    if (!strcmp(begin->name.data(),arg.c_str())) {
+      return &(*begin);
+    }
+  }
+  return NULL;
+}
+
+void PrintfInfo::addOperand(size_t idx, uint32_t size)
+{
+  mOperands.resize((unsigned)(idx + 1));
+  mOperands[(unsigned)idx] = size;
+}
+
+uint32_t PrintfInfo::getPrintfID()
+{
+  return mPrintfID;
+}
+
+void PrintfInfo::setPrintfID(uint32_t id)
+{
+  mPrintfID = id;
+}
+
+size_t PrintfInfo::getNumOperands()
+{
+  return mOperands.size();
+}
+
+uint32_t PrintfInfo::getOperandID(uint32_t idx)
+{
+  return mOperands[idx];
+}
+
+AMDILMachineFunctionInfo::AMDILMachineFunctionInfo()
+  : CalleeSavedFrameSize(0), BytesToPopOnReturn(0),
+    DecorationStyle(None), ReturnAddrIndex(0),
+    TailCallReturnAddrDelta(0),
+    SRetReturnReg(0), mReservedLits(11),
+    UsesLDS(false), LDSArg(false),
+    UsesGDS(false), GDSArg(false),
+    UsesScratch(false), ScratchArg(false),
+    UsesConstant(false), ConstantArg(false)
+{
+  memset(mUsedMem, 0, sizeof(mUsedMem));
+  mMF = NULL;
+  mKernel = NULL;
+  mScratchSize = -1;
+  mArgSize = -1;
+  mStackSize = -1;
+}
+
+AMDILMachineFunctionInfo::AMDILMachineFunctionInfo(MachineFunction& MF)
+  : CalleeSavedFrameSize(0), BytesToPopOnReturn(0),
+    DecorationStyle(None), ReturnAddrIndex(0),
+    TailCallReturnAddrDelta(0),
+    SRetReturnReg(0), mReservedLits(11),
+    UsesLDS(false), LDSArg(false),
+    UsesGDS(false), GDSArg(false),
+    UsesScratch(false), ScratchArg(false),
+    UsesConstant(false), ConstantArg(false)
+{
+  memset(mUsedMem, 0, sizeof(mUsedMem));
+  const Function *F = MF.getFunction();
+  mMF = &MF;
+  MachineModuleInfo &mmi = MF.getMMI();
+  const AMDILTargetMachine *TM =
+    reinterpret_cast<const AMDILTargetMachine*>(&MF.getTarget());
+  AMDILModuleInfo *AMI = &(mmi.getObjFileInfo<AMDILModuleInfo>());
+  AMI->processModule(mmi.getModule(), TM);
+  for (Module::const_iterator I = F->getParent()->begin(),
+       E = F->getParent()->end(); I != E; ++I) {
+    // Map all the known names to a unique number
+    AMI->getOrCreateFunctionID(I->getName());
+  }
+  mSTM = TM->getSubtargetImpl();
+  mKernel = AMI->getKernel(F->getName());
+
+  mScratchSize = -1;
+  mArgSize = -1;
+  mStackSize = -1;
+}
+
+AMDILMachineFunctionInfo::~AMDILMachineFunctionInfo()
+{
+  for (std::map<std::string, PrintfInfo*>::iterator pfb = printf_begin(),
+       pfe = printf_end(); pfb != pfe; ++pfb) {
+    delete pfb->second;
+  }
+}
+unsigned int
+AMDILMachineFunctionInfo::getCalleeSavedFrameSize() const
+{
+  return CalleeSavedFrameSize;
+}
+void
+AMDILMachineFunctionInfo::setCalleeSavedFrameSize(unsigned int bytes)
+{
+  CalleeSavedFrameSize = bytes;
+}
+unsigned int
+AMDILMachineFunctionInfo::getBytesToPopOnReturn() const
+{
+  return BytesToPopOnReturn;
+}
+void
+AMDILMachineFunctionInfo::setBytesToPopOnReturn(unsigned int bytes)
+{
+  BytesToPopOnReturn = bytes;
+}
+NameDecorationStyle
+AMDILMachineFunctionInfo::getDecorationStyle() const
+{
+  return DecorationStyle;
+}
+void
+AMDILMachineFunctionInfo::setDecorationStyle(NameDecorationStyle style)
+{
+  DecorationStyle = style;
+}
+int
+AMDILMachineFunctionInfo::getRAIndex() const
+{
+  return ReturnAddrIndex;
+}
+void
+AMDILMachineFunctionInfo::setRAIndex(int index)
+{
+  ReturnAddrIndex = index;
+}
+int
+AMDILMachineFunctionInfo::getTCReturnAddrDelta() const
+{
+  return TailCallReturnAddrDelta;
+}
+void
+AMDILMachineFunctionInfo::setTCReturnAddrDelta(int delta)
+{
+  TailCallReturnAddrDelta = delta;
+}
+unsigned int
+AMDILMachineFunctionInfo::getSRetReturnReg() const
+{
+  return SRetReturnReg;
+}
+void
+AMDILMachineFunctionInfo::setSRetReturnReg(unsigned int reg)
+{
+  SRetReturnReg = reg;
+}
+
+bool
+AMDILMachineFunctionInfo::usesHWConstant(std::string name) const
+{
+  const AMDILConstPtr *curConst = getConstPtr(mKernel, name);
+  if (curConst) {
+    return curConst->usesHardware;
+  } else {
+    return false;
+  }
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getLocal(uint32_t dim)
+{
+  if (mKernel && mKernel->sgv) {
+    AMDILKernelAttr *sgv = mKernel->sgv;
+    switch (dim) {
+    default:
+      break;
+    case 0:
+    case 1:
+    case 2:
+      return sgv->reqGroupSize[dim];
+      break;
+    case 3:
+      return sgv->reqGroupSize[0] * sgv->reqGroupSize[1] * sgv->reqGroupSize[2];
+    };
+  }
+  switch (dim) {
+  default:
+    return 1;
+  case 3:
+    return mSTM->getDefaultSize(0) *
+           mSTM->getDefaultSize(1) *
+           mSTM->getDefaultSize(2);
+  case 2:
+  case 1:
+  case 0:
+    return mSTM->getDefaultSize(dim);
+    break;
+  };
+  return 1;
+}
+bool
+AMDILMachineFunctionInfo::isKernel() const
+{
+  return mKernel != NULL && mKernel->mKernel;
+}
+
+AMDILKernel*
+AMDILMachineFunctionInfo::getKernel()
+{
+  return mKernel;
+}
+
+std::string
+AMDILMachineFunctionInfo::getName()
+{
+  if (mMF) {
+    return mMF->getFunction()->getName();
+  } else {
+    return "";
+  }
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getArgSize()
+{
+  if (mArgSize == -1) {
+    const AMDILTargetMachine *TM =
+      reinterpret_cast<const AMDILTargetMachine*>(&mMF->getTarget());
+    Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+    Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+    uint32_t Counter = 0;
+    while (I != Ie) {
+      Type* curType = I->getType();
+      if (curType->isIntegerTy() || curType->isFloatingPointTy()) {
+        ++Counter;
+      } else if (const VectorType *VT = dyn_cast<VectorType>(curType)) {
+        Type *ET = VT->getElementType();
+        int numEle = VT->getNumElements();
+        switch (ET->getPrimitiveSizeInBits()) {
+        default:
+          if (numEle == 3) {
+            Counter++;
+          } else {
+            Counter += ((numEle + 2) >> 2);
+          }
+          break;
+        case 64:
+          if (numEle == 3) {
+            Counter += 2;
+          } else {
+            Counter += (numEle >> 1);
+          }
+          break;
+        case 16:
+        case 8:
+          switch (numEle) {
+          default:
+            Counter += ((numEle + 2) >> 2);
+          case 2:
+            Counter++;
+            break;
+          }
+          break;
+        }
+      } else if (const PointerType *PT = dyn_cast<PointerType>(curType)) {
+        Type *CT = PT->getElementType();
+        const StructType *ST = dyn_cast<StructType>(CT);
+        if (ST && ST->isOpaque()) {
+          bool i1d  = ST->getName().startswith("struct._image1d_t");
+          bool i1da = ST->getName().startswith("struct._image1d_array_t");
+          bool i1db = ST->getName().startswith("struct._image1d_buffer_t");
+          bool i2d  = ST->getName().startswith("struct._image2d_t");
+          bool i2da = ST->getName().startswith("struct._image2d_array_t");
+          bool i3d  = ST->getName().startswith("struct._image3d_t");
+          bool is_image = i1d || i1da || i1db || i2d || i2da || i3d;
+          if (is_image) {
+            if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
+              Counter += 2;
+            } else {
+              addErrorMsg(amd::CompilerErrorMessage[NO_IMAGE_SUPPORT]);
+            }
+          } else {
+            Counter++;
+          }
+        } else if (CT->isStructTy()
+                   && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+          StructType *ST = dyn_cast<StructType>(CT);
+          const TargetData* TD = TM->getTargetData();
+          Counter += TD->RoundUpAlignment(TD->getTypeAllocSize(ST), 16) >> 4;
+        } else if (CT->isIntOrIntVectorTy()
+                   || CT->isFPOrFPVectorTy()
+                   || CT->isArrayTy()
+                   || CT->isPointerTy()
+                   || PT->getAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+          ++Counter;
+        } else {
+          assert(0 && "Current type is not supported!");
+          addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+        }
+      } else {
+        assert(0 && "Current type is not supported!");
+        addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
+      }
+      ++I;
+    }
+    // Convert from slots to bytes by multiplying by 16(shift by 4).
+    mArgSize = Counter << 4;
+  }
+  return (uint32_t)mArgSize;
+}
+uint32_t
+AMDILMachineFunctionInfo::getScratchSize()
+{
+  const AMDILTargetMachine *TM =
+    reinterpret_cast<const AMDILTargetMachine*>(&mMF->getTarget());
+  if (mScratchSize == -1) {
+    mScratchSize = 0;
+    Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+    Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+    while (I != Ie) {
+      Type *curType = I->getType();
+      const TargetData* TD = TM->getTargetData();
+      mScratchSize += TD->RoundUpAlignment(TD->getTypeAllocSize(curType), 16);
+      ++I;
+    }
+    mScratchSize += ((mScratchSize + 15) & ~15);
+  }
+  return (uint32_t)mScratchSize;
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getStackSize()
+{
+  if (mStackSize == -1) {
+    uint32_t privSize = 0;
+    const MachineFrameInfo *MFI = mMF->getFrameInfo();
+    privSize = MFI->getOffsetAdjustment() + MFI->getStackSize();
+    const AMDILTargetMachine *TM =
+      reinterpret_cast<const AMDILTargetMachine*>(&mMF->getTarget());
+    bool addStackSize = TM->getOptLevel() == CodeGenOpt::None;
+    Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
+    Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
+    while (I != Ie) {
+      Type *curType = I->getType();
+      ++I;
+      if (dyn_cast<PointerType>(curType)) {
+        Type *CT = dyn_cast<PointerType>(curType)->getElementType();
+        if (CT->isStructTy()
+            && dyn_cast<PointerType>(curType)->getAddressSpace()
+            == AMDILAS::PRIVATE_ADDRESS) {
+          addStackSize = true;
+        }
+      }
+    }
+    if (addStackSize) {
+      privSize += getScratchSize();
+    }
+    mStackSize = privSize;
+  }
+  return (uint32_t)mStackSize;
+
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addi32Literal(uint32_t val, int Opcode)
+{
+  // Since we have emulated 16/8/1 bit register types with a 32bit real
+  // register, we need to sign extend the constants to 32bits in order for
+  // comparisons against the constants to work correctly, this fixes some issues
+  // we had in conformance failing for saturation.
+  if (Opcode == AMDIL::LOADCONST_i16) {
+    val = (((int32_t)val << 16) >> 16);
+  } else if (Opcode == AMDIL::LOADCONST_i8) {
+    val = (((int32_t)val << 24) >> 24);
+  }
+  if (mIntLits.find(val) == mIntLits.end()) {
+    mIntLits[val] = getNumLiterals();
+  }
+  return mIntLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addi64Literal(uint64_t val)
+{
+  if (mLongLits.find(val) == mLongLits.end()) {
+    mLongLits[val] = getNumLiterals();
+  }
+  return mLongLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addi128Literal(uint64_t val_lo, uint64_t val_hi)
+{
+  std::pair<uint64_t, uint64_t> a;
+  a.first = val_lo;
+  a.second = val_hi;
+  if (mVecLits.find(a) == mVecLits.end()) {
+    mVecLits[a] = getNumLiterals();
+  }
+  return mVecLits[a];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addf32Literal(uint32_t val)
+{
+  if (mIntLits.find(val) == mIntLits.end()) {
+    mIntLits[val] = getNumLiterals();
+  }
+  return mIntLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addf32Literal(const ConstantFP *CFP)
+{
+  uint32_t val = (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+  if (mIntLits.find(val) == mIntLits.end()) {
+    mIntLits[val] = getNumLiterals();
+  }
+  return mIntLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addf64Literal(uint64_t val)
+{
+  if (mLongLits.find(val) == mLongLits.end()) {
+    mLongLits[val] = getNumLiterals();
+  }
+  return mLongLits[val];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addf64Literal(const ConstantFP *CFP)
+{
+  union dtol_union {
+    double d;
+    uint64_t ul;
+  } dval;
+  const APFloat &APF = CFP->getValueAPF();
+  if (&APF.getSemantics() == (const llvm::fltSemantics *)&APFloat::IEEEsingle) {
+    float fval = APF.convertToFloat();
+    dval.d = (double)fval;
+  } else {
+    dval.d = APF.convertToDouble();
+  }
+  if (mLongLits.find(dval.ul) == mLongLits.end()) {
+    mLongLits[dval.ul] = getNumLiterals();
+  }
+  return mLongLits[dval.ul];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getIntLits(uint32_t offset)
+{
+  return mIntLits[offset];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getLongLits(uint64_t offset)
+{
+  return mLongLits[offset];
+}
+
+uint32_t
+AMDILMachineFunctionInfo::getVecLits(uint64_t low64, uint64_t high64)
+{
+  return mVecLits[std::pair<uint64_t, uint64_t>(low64, high64)];
+}
+
+size_t
+AMDILMachineFunctionInfo::getNumLiterals() const
+{
+  return mLongLits.size() + mIntLits.size() + mVecLits.size() + mReservedLits;
+}
+
+void
+AMDILMachineFunctionInfo::addReservedLiterals(uint32_t size)
+{
+  mReservedLits += size;
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addSampler(std::string name, uint32_t val)
+{
+  if (mSamplerMap.find(name) != mSamplerMap.end()) {
+    SamplerInfo newVal = mSamplerMap[name];
+    newVal.val = val;
+    mSamplerMap[name] = newVal;
+    return mSamplerMap[name].idx;
+  } else {
+    SamplerInfo curVal;
+    curVal.name = name;
+    curVal.val = val;
+    curVal.idx = mSamplerMap.size();
+    mSamplerMap[name] = curVal;
+    return curVal.idx;
+  }
+}
+
+void
+AMDILMachineFunctionInfo::setUsesMem(unsigned id)
+{
+  assert(id < AMDILDevice::MAX_IDS &&
+         "Must set the ID to be less than MAX_IDS!");
+  mUsedMem[id] = true;
+}
+
+bool
+AMDILMachineFunctionInfo::usesMem(unsigned id)
+{
+  assert(id < AMDILDevice::MAX_IDS &&
+         "Must set the ID to be less than MAX_IDS!");
+  return mUsedMem[id];
+}
+
+void
+AMDILMachineFunctionInfo::addErrorMsg(const char *msg, ErrorMsgEnum val)
+{
+  if (val == DEBUG_ONLY) {
+#if defined(DEBUG) || defined(_DEBUG)
+    mErrors.insert(msg);
+#endif
+  }  else if (val == RELEASE_ONLY) {
+#if !defined(DEBUG) && !defined(_DEBUG)
+    mErrors.insert(msg);
+#endif
+  } else if (val == ALWAYS) {
+    mErrors.insert(msg);
+  }
+}
+
+uint32_t
+AMDILMachineFunctionInfo::addPrintfString(std::string &name, unsigned offset)
+{
+  if (mPrintfMap.find(name) != mPrintfMap.end()) {
+    return mPrintfMap[name]->getPrintfID();
+  } else {
+    PrintfInfo *info = new PrintfInfo;
+    info->setPrintfID(mPrintfMap.size() + offset);
+    mPrintfMap[name] = info;
+    return info->getPrintfID();
+  }
+}
+
+void
+AMDILMachineFunctionInfo::addPrintfOperand(std::string &name,
+    size_t idx,
+    uint32_t size)
+{
+  mPrintfMap[name]->addOperand(idx, size);
+}
+
+void
+AMDILMachineFunctionInfo::addMetadata(const char *md, bool kernelOnly)
+{
+  addMetadata(std::string(md), kernelOnly);
+}
+
+void
+AMDILMachineFunctionInfo::addMetadata(std::string md, bool kernelOnly)
+{
+  if (kernelOnly) {
+    mMetadataKernel.push_back(md);
+  } else {
+    mMetadataFunc.insert(md);
+  }
+}
+
+size_t
+AMDILMachineFunctionInfo::get_num_write_images()
+{
+  return write_image3d_size() + write_image2d_size()
+         + write_image2d_array_size() + write_image1d_array_size()
+         + write_image1d_size() + write_image1d_buffer_size();
+}
+
+bool
+AMDILMachineFunctionInfo::isSignedIntType(const Value* ptr)
+{
+  if (!mSTM->supportMetadata30()) return true;
+  std::string signedNames = "llvm.signedOrSignedpointee.annotations.";
+  std::string argName = ptr->getName();
+  if (!mMF) return false;
+  signedNames += mMF->getFunction()->getName();
+  const GlobalVariable *GV =
+    mMF->getFunction()->getParent()->getGlobalVariable(signedNames);
+  if (!GV || !GV->hasInitializer()) return false;
+  const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA) return false;
+  for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+    const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
+    if (!nameField) continue;
+
+    const GlobalVariable *nameGV =
+      dyn_cast<GlobalVariable>(nameField->getOperand(0));
+    if (!nameGV || !nameGV->hasInitializer()) continue;
+
+    const ConstantDataArray *nameArray =
+      dyn_cast<ConstantDataArray>(nameGV->getInitializer());
+    if (!nameArray) continue;
+
+    std::string nameStr = nameArray->getAsString();
+    // We don't want to include the newline
+    if (!nameStr.compare(0, nameStr.length()-1, argName)) return true;
+  }
+  return false;
+}
+bool
+AMDILMachineFunctionInfo::isVolatilePointer(const Value* ptr)
+{
+  if (!mSTM->supportMetadata30()) return false;
+  std::string signedNames = "llvm.volatilepointer.annotations.";
+  std::string argName = ptr->getName();
+  if (!mMF) return false;
+  signedNames += mMF->getFunction()->getName();
+  const GlobalVariable *GV =
+    mMF->getFunction()->getParent()->getGlobalVariable(signedNames);
+  if (!GV || !GV->hasInitializer()) return false;
+  const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA) return false;
+  for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+    const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
+    if (!nameField) continue;
+
+    const GlobalVariable *nameGV =
+      dyn_cast<GlobalVariable>(nameField->getOperand(0));
+    if (!nameGV || !nameGV->hasInitializer()) continue;
+
+    const ConstantDataArray *nameArray =
+      dyn_cast<ConstantDataArray>(nameGV->getInitializer());
+    if (!nameArray) continue;
+
+    std::string nameStr = nameArray->getAsString();
+    // We don't want to include the newline
+    if (!nameStr.compare(0, nameStr.length()-1, argName)) return true;
+  }
+  return false;
+}
+bool
+AMDILMachineFunctionInfo::isRestrictPointer(const Value* ptr)
+{
+  if (!mSTM->supportMetadata30()) return false;
+  std::string signedNames = "llvm.restrictpointer.annotations.";
+  std::string argName = ptr->getName();
+  if (!mMF) return false;
+  signedNames += mMF->getFunction()->getName();
+  const GlobalVariable *GV =
+    mMF->getFunction()->getParent()->getGlobalVariable(signedNames);
+  if (!GV || !GV->hasInitializer()) return false;
+  const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA) return false;
+  for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+    const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
+    if (!nameField) continue;
+
+    const GlobalVariable *nameGV =
+      dyn_cast<GlobalVariable>(nameField->getOperand(0));
+    if (!nameGV || !nameGV->hasInitializer()) continue;
+
+    const ConstantDataArray *nameArray =
+      dyn_cast<ConstantDataArray>(nameGV->getInitializer());
+    if (!nameArray) continue;
+
+    std::string nameStr = nameArray->getAsString();
+    // We don't want to include the newline
+    if (!nameStr.compare(0, nameStr.length()-1, argName)) return true;
+  }
+  return false;
+}
+
+bool
+AMDILMachineFunctionInfo::isConstantArgument(const Value* ptr)
+{
+  if (!mSTM->supportMetadata30()) return false;
+  std::string signedNames = "llvm.argtypeconst.annotations.";
+  std::string argName = ptr->getName();
+  if (!mMF) return false;
+  signedNames += mMF->getFunction()->getName();
+  const GlobalVariable *GV =
+    mMF->getFunction()->getParent()->getGlobalVariable(signedNames);
+  if (!GV || !GV->hasInitializer()) return false;
+  const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA) return false;
+  for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+    const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
+    if (!nameField) continue;
+
+    const GlobalVariable *nameGV =
+      dyn_cast<GlobalVariable>(nameField->getOperand(0));
+    if (!nameGV || !nameGV->hasInitializer()) continue;
+
+    const ConstantDataArray *nameArray =
+      dyn_cast<ConstantDataArray>(nameGV->getInitializer());
+    if (!nameArray) continue;
+
+    std::string nameStr = nameArray->getAsString();
+    // We don't want to include the newline
+    if (!nameStr.compare(0, nameStr.length()-1, argName)) return true;
+  }
+  return false;
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,722 @@
+//===-- AMDILMachineFunctionInfo.h ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AMDIL-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILMACHINEFUNCTIONINFO_H_
+#define _AMDILMACHINEFUNCTIONINFO_H_
+#include "AMDIL.h"
+#include "AMDILDevice.h"
+#include "AMDILKernel.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <string>
+#include <set>
+#include <map>
+namespace llvm
+{
+class AMDILSubtarget;
+class PrintfInfo
+{
+  uint32_t mPrintfID;
+  SmallVector<uint32_t, DEFAULT_VEC_SLOTS> mOperands;
+public:
+  void addOperand(size_t idx, uint32_t size);
+  uint32_t getPrintfID();
+  void setPrintfID(uint32_t idx);
+  size_t getNumOperands();
+  uint32_t getOperandID(uint32_t idx);
+}; // class PrintfInfo
+
+enum NameDecorationStyle {
+  None,
+  StdCall,
+  FastCall
+};
+typedef struct SamplerInfoRec {
+  std::string name; // The name of the sampler
+  uint32_t val; // The value of the sampler
+  uint32_t idx; // The sampler resource id
+} SamplerInfo;
+// Some typedefs that will help with using the various iterators
+// of the machine function info class.
+typedef std::map<uint32_t, uint32_t>::iterator lit32_iterator;
+typedef std::map<uint64_t, uint32_t>::iterator lit64_iterator;
+typedef std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator
+lit128_iterator;
+typedef StringMap<SamplerInfo>::iterator sampler_iterator;
+typedef DenseSet<uint32_t>::iterator func_iterator;
+typedef DenseSet<uint32_t>::iterator intr_iterator;
+typedef DenseSet<uint32_t>::iterator uav_iterator;
+typedef DenseSet<uint32_t>::iterator sema_iterator;
+typedef DenseSet<uint32_t>::iterator read_image1d_iterator;
+typedef DenseSet<uint32_t>::iterator write_image1d_iterator;
+typedef DenseSet<uint32_t>::iterator read_image1d_array_iterator;
+typedef DenseSet<uint32_t>::iterator write_image1d_array_iterator;
+typedef DenseSet<uint32_t>::iterator read_image1d_buffer_iterator;
+typedef DenseSet<uint32_t>::iterator write_image1d_buffer_iterator;
+typedef DenseSet<uint32_t>::iterator read_image2d_iterator;
+typedef DenseSet<uint32_t>::iterator write_image2d_iterator;
+typedef DenseSet<uint32_t>::iterator read_image2d_array_iterator;
+typedef DenseSet<uint32_t>::iterator write_image2d_array_iterator;
+typedef DenseSet<uint32_t>::iterator read_image3d_iterator;
+typedef DenseSet<uint32_t>::iterator write_image3d_iterator;
+typedef DenseSet<const Value*>::iterator read_ptr_iterator;
+typedef DenseSet<const char*>::iterator error_iterator;
+typedef std::map<std::string, PrintfInfo*>::iterator printf_iterator;
+typedef std::set<std::string>::iterator func_md_iterator;
+typedef std::vector<std::string>::iterator kernel_md_iterator;
+// AMDILMachineFunctionInfo - This class is
+// derived from MachineFunction private
+// amdil target-specific information for each MachineFunction
+class AMDILMachineFunctionInfo : public MachineFunctionInfo
+{
+  // CalleeSavedFrameSize - Size of the callee-saved
+  // register portion of the
+  // stack frame in bytes.
+  unsigned int CalleeSavedFrameSize;
+  // BytesToPopOnReturn - Number of bytes function pops on return.
+  // Used on windows platform for stdcall & fastcall name decoration
+  unsigned int BytesToPopOnReturn;
+  // DecorationStyle - If the function requires additional
+  // name decoration,
+  // DecorationStyle holds the right way to do so.
+  NameDecorationStyle DecorationStyle;
+  // ReturnAddrIndex - FrameIndex for return slot.
+  int ReturnAddrIndex;
+
+  // TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
+  // Used for creating an area before the register spill area
+  // on the stack
+  // the returnaddr can be savely move to this area
+  int TailCallReturnAddrDelta;
+
+  // SRetReturnReg - Some subtargets require that sret lowering includes
+  // returning the value of the returned struct in a register.
+  // This field holds the virtual register into which the sret
+  // argument is passed.
+  unsigned int SRetReturnReg;
+
+  // The size in bytes required to host all of the kernel arguments.
+  // -1 means this value has not been determined yet.
+  int32_t mArgSize;
+
+  // The size in bytes required to host the stack and the kernel arguments
+  // in private memory.
+  // -1 means this value has not been determined yet.
+  int32_t mScratchSize;
+
+  // The size in bytes required to host the the kernel arguments
+  // on the stack.
+  // -1 means this value has not been determined yet.
+  int32_t mStackSize;
+
+  /// A map of constant to literal mapping for all of the 32bit or
+  /// smaller literals in the current function.
+  std::map<uint32_t, uint32_t> mIntLits;
+
+  /// A map of constant to literal mapping for all of the 64bit
+  /// literals in the current function.
+  std::map<uint64_t, uint32_t> mLongLits;
+
+  /// A map of constant to literal mapping for all of the 128bit
+  /// literals in the current function.
+  std::map<std::pair<uint64_t, uint64_t>, uint32_t> mVecLits;
+
+  /// The number of literals that should be reserved.
+  /// TODO: Remove this when the wrapper emitter is added.
+  uint32_t mReservedLits;
+
+  /// A map of name to sampler information that is used to emit
+  /// metadata to the IL stream that the runtimes can use for
+  /// hardware setup.
+  StringMap<SamplerInfo> mSamplerMap;
+
+  /// Array of flags to specify if a specific memory type is used or not.
+  bool mUsedMem[AMDILDevice::MAX_IDS];
+
+  /// Set of all functions that this function calls.
+  DenseSet<uint32_t> mFuncs;
+
+  /// Set of all intrinsics that this function calls.
+  DenseSet<uint32_t> mIntrs;
+
+  /// Set of all write only 1D images.
+  DenseSet<uint32_t> mWO1D;
+  /// Set of all read only 1D images.
+  DenseSet<uint32_t> mRO1D;
+  /// Set of all write only 1D image arrays.
+  DenseSet<uint32_t> mWO1DA;
+  /// Set of all read only 1D image arrays.
+  DenseSet<uint32_t> mRO1DA;
+  /// Set of all write only 1D image buffers.
+  DenseSet<uint32_t> mWO1DB;
+  /// Set of all read only 1D image buffers.
+  DenseSet<uint32_t> mRO1DB;
+  /// Set of all write only 2D images.
+  DenseSet<uint32_t> mWO2D;
+  /// Set of all read only 2D images.
+  DenseSet<uint32_t> mRO2D;
+  /// Set of all write only 2D image arrays.
+  DenseSet<uint32_t> mWO2DA;
+  /// Set of all read only 2D image arrays.
+  DenseSet<uint32_t> mRO2DA;
+  /// Set of all read only 3D images.
+  DenseSet<uint32_t> mRO3D;
+  /// Set of all write only 3D images.
+  DenseSet<uint32_t> mWO3D;
+  /// Set of all the raw uavs.
+  DenseSet<uint32_t> mRawUAV;
+  /// Set of all the arena uavs.
+  DenseSet<uint32_t> mArenaUAV;
+
+  /// Set of all semaphores
+  DenseSet<uint32_t> mSemaphore;
+
+  /// Set of all the read-only pointers
+  DenseSet<const Value*> mReadPtr;
+
+  /// A set of all errors that occured in the backend for this function.
+  DenseSet<const char *> mErrors;
+
+  /// A mapping of printf data and the printf string
+  std::map<std::string, PrintfInfo*> mPrintfMap;
+
+  /// A set of all of the metadata that is used for the current function.
+  std::set<std::string> mMetadataFunc;
+
+  /// A set of all of the metadata that is used for the function wrapper.
+  std::vector<std::string> mMetadataKernel;
+
+  SmallVector<unsigned, 16> mArgRegs;
+
+  /// Information about the kernel, NULL if the function is not a kernel.
+  AMDILKernel *mKernel;
+
+  /// Pointer to the machine function that this information belongs to.
+  MachineFunction *mMF;
+
+  /// Pointer to the subtarget for this function.
+  const AMDILSubtarget *mSTM;
+public:
+  AMDILMachineFunctionInfo();
+  AMDILMachineFunctionInfo(MachineFunction &MF);
+  virtual ~AMDILMachineFunctionInfo();
+  unsigned int
+  getCalleeSavedFrameSize() const;
+  void
+  setCalleeSavedFrameSize(unsigned int bytes);
+
+  unsigned int
+  getBytesToPopOnReturn() const;
+  void
+  setBytesToPopOnReturn (unsigned int bytes);
+
+  NameDecorationStyle
+  getDecorationStyle() const;
+  void
+  setDecorationStyle(NameDecorationStyle style);
+
+  int
+  getRAIndex() const;
+  void
+  setRAIndex(int Index);
+
+  int
+  getTCReturnAddrDelta() const;
+  void
+  setTCReturnAddrDelta(int delta);
+
+  unsigned int
+  getSRetReturnReg() const;
+  void
+  setSRetReturnReg(unsigned int Reg);
+
+#define AS_SET_GET(A) \
+    private: \
+      bool Uses##A;\
+      bool A##Arg; \
+    public: \
+      void setUses##A() { Uses##A = true; }\
+      bool uses##A() const { return Uses##A; }\
+      void setHas##A##Arg() { A##Arg = true; setUses##A(); }\
+      bool has##A##Arg() { return A##Arg; }
+
+  AS_SET_GET(LDS)
+  AS_SET_GET(GDS)
+  AS_SET_GET(Scratch)
+  AS_SET_GET(Constant)
+
+  bool
+  usesHWConstant(std::string name) const;
+  uint32_t
+  getLocal(uint32_t);
+  bool
+  isKernel() const;
+  AMDILKernel*
+  getKernel();
+
+  std::string
+  getName();
+
+  /// Get the size in bytes that are required to host all of
+  /// arguments based on the argument alignment rules in the AMDIL
+  /// Metadata spec.
+  uint32_t getArgSize();
+
+  /// Get the size in bytes that are required to host all of
+  /// arguments and stack memory in scratch.
+  uint32_t getScratchSize();
+
+  /// Get the size in bytes that is required to host all of
+  /// the arguments on the stack.
+  uint32_t getStackSize();
+
+  ///
+  /// @param val value to add the lookup table
+  /// @param Opcode opcode of the literal instruction
+  /// @brief adds the specified value of the type represented by the
+  /// Opcode
+  /// to the literal to integer and integer to literal mappings.
+  ///
+  /// Add a 32bit integer value to the literal table.
+  uint32_t addi32Literal(uint32_t val, int Opcode = AMDIL::LOADCONST_i32);
+
+  /// Add a 32bit floating point value to the literal table.
+  uint32_t addf32Literal(const ConstantFP *CFP);
+
+  /// Add a 32bit floating point value to the literal table.
+  uint32_t addf32Literal(uint32_t val);
+
+  /// Add a 64bit integer value to the literal table.
+  uint32_t addi64Literal(uint64_t val);
+
+  /// Add a 128 bit integer value to the literal table.
+  uint32_t addi128Literal(uint64_t val_lo, uint64_t val_hi);
+
+  /// Add a 64bit floating point literal as a 64bit integer value.
+  uint32_t addf64Literal(const ConstantFP *CFP);
+
+  /// Add a 64bit floating point literal as a 64bit integer value.
+  uint32_t addf64Literal(uint64_t val);
+
+  /// Get the number of literals that have currently been allocated.
+  size_t getNumLiterals() const;
+
+  /// Get the literal ID of an Integer literal of the given offset.
+  uint32_t getIntLits(uint32_t lit);
+
+  /// Get the literal ID of a Long literal of the given offset.
+  uint32_t getLongLits(uint64_t lit);
+
+  /// Get the literal ID of a Long literal of the given offset.
+  uint32_t getVecLits(uint64_t low64, uint64_t high64);
+
+  /// Add some literals to the number of reserved literals.
+  void addReservedLiterals(uint32_t);
+
+  // Functions that return iterators to the beginning and end
+  // of the various literal maps.
+  // Functions that return the beginning and end of the 32bit literal map
+  lit32_iterator begin_32() {
+    return mIntLits.begin();
+  }
+  lit32_iterator end_32() {
+    return mIntLits.end();
+  }
+
+  // Functions that return the beginning and end of the 64bit literal map
+  lit64_iterator begin_64() {
+    return mLongLits.begin();
+  }
+  lit64_iterator end_64() {
+    return mLongLits.end();
+  }
+
+  // Functions that return the beginning and end of the 2x64bit literal map
+  lit128_iterator begin_128() {
+    return mVecLits.begin();
+  }
+  lit128_iterator end_128() {
+    return mVecLits.end();
+  }
+
+  // Add a sampler to the set of known samplers for the current kernel.
+  uint32_t addSampler(std::string name, uint32_t value);
+
+  // Iterators that point to the beginning and end of the sampler map.
+  sampler_iterator sampler_begin() {
+    return mSamplerMap.begin();
+  }
+  sampler_iterator sampler_end() {
+    return mSamplerMap.end();
+  }
+
+
+  /// Set the flag for the memory ID to true for the current function.
+  void setUsesMem(unsigned);
+  /// Retrieve the flag for the memory ID.
+  bool usesMem(unsigned);
+
+  /// Add called functions to the set of all functions this function calls.
+  void addCalledFunc(uint32_t id) {
+    mFuncs.insert(id);
+  }
+  void eraseCalledFunc(uint32_t id) {
+    mFuncs.erase(id);
+  }
+  size_t func_size() {
+    return mFuncs.size();
+  }
+  bool func_empty() {
+    return mFuncs.empty();
+  }
+  func_iterator func_begin() {
+    return mFuncs.begin();
+  }
+  func_iterator func_end() {
+    return mFuncs.end();
+  }
+
+  /// Add called intrinsics to the set of all intrinscis this function calls.
+  void addCalledIntr(uint32_t id) {
+    mIntrs.insert(id);
+  }
+  size_t intr_size() {
+    return mIntrs.size();
+  }
+  bool intr_empty() {
+    return mIntrs.empty();
+  }
+  intr_iterator intr_begin() {
+    return mIntrs.begin();
+  }
+  intr_iterator intr_end() {
+    return mIntrs.end();
+  }
+
+  /// Add a 1D read_only image id.
+  void addROImage1D(uint32_t id) {
+    mRO1D.insert(id);
+  }
+  size_t read_image1d_size() {
+    return mRO1D.size();
+  }
+  read_image1d_iterator read_image1d_begin() {
+    return mRO1D.begin();
+  }
+  read_image1d_iterator read_image1d_end() {
+    return mRO1D.end();
+  }
+
+  /// Add a 1D write_only image id.
+  void addWOImage1D(uint32_t id) {
+    mWO1D.insert(id);
+  }
+  size_t write_image1d_size() {
+    return mWO1D.size();
+  }
+  write_image1d_iterator write_image1d_begin() {
+    return mWO1D.begin();
+  }
+  write_image1d_iterator write_image1d_end() {
+    return mWO1D.end();
+  }
+
+  /// Add a 1D read_only image id.
+  void addROImage1DArray(uint32_t id) {
+    mRO1DA.insert(id);
+  }
+  size_t read_image1d_array_size() {
+    return mRO1DA.size();
+  }
+  read_image1d_array_iterator read_image1d_array_begin() {
+    return mRO1DA.begin();
+  }
+  read_image1d_array_iterator read_image1d_array_end() {
+    return mRO1DA.end();
+  }
+
+  /// Add a 1D write_only image id.
+  void addWOImage1DArray(uint32_t id) {
+    mWO1DA.insert(id);
+  }
+  size_t write_image1d_array_size() {
+    return mWO1DA.size();
+  }
+  write_image1d_array_iterator write_image1d_array_begin() {
+    return mWO1DA.begin();
+  }
+  write_image1d_array_iterator write_image1d_array_end() {
+    return mWO1DA.end();
+  }
+
+  /// Add a 1D read_only image id.
+  void addROImage1DBuffer(uint32_t id) {
+    mRO1DB.insert(id);
+  }
+  size_t read_image1d_buffer_size() {
+    return mRO1DB.size();
+  }
+  read_image1d_buffer_iterator read_image1d_buffer_begin() {
+    return mRO1DB.begin();
+  }
+  read_image1d_buffer_iterator read_image1d_buffer_end() {
+    return mRO1DB.end();
+  }
+
+  /// Add a 1D write_only image id.
+  void addWOImage1DBuffer(uint32_t id) {
+    mWO1DB.insert(id);
+  }
+  size_t write_image1d_buffer_size() {
+    return mWO1DB.size();
+  }
+  write_image1d_buffer_iterator write_image1d_buffer_begin() {
+    return mWO1DB.begin();
+  }
+  write_image1d_buffer_iterator write_image1d_buffer_end() {
+    return mWO1DB.end();
+  }
+
+  /// Add a 2D read_only image id.
+  void addROImage2D(uint32_t id) {
+    mRO2D.insert(id);
+  }
+  size_t read_image2d_size() {
+    return mRO2D.size();
+  }
+  read_image2d_iterator read_image2d_begin() {
+    return mRO2D.begin();
+  }
+  read_image2d_iterator read_image2d_end() {
+    return mRO2D.end();
+  }
+
+  /// Add a 2D write_only image id.
+  void addWOImage2D(uint32_t id) {
+    mWO2D.insert(id);
+  }
+  size_t write_image2d_size() {
+    return mWO2D.size();
+  }
+  write_image2d_iterator write_image2d_begin() {
+    return mWO2D.begin();
+  }
+  write_image2d_iterator write_image2d_end() {
+    return mWO2D.end();
+  }
+
+  /// Add a 2D read_only image array id.
+  void addROImage2DArray(uint32_t id) {
+    mRO2DA.insert(id);
+  }
+  size_t read_image2d_array_size() {
+    return mRO2DA.size();
+  }
+  read_image2d_array_iterator read_image2d_array_begin() {
+    return mRO2DA.begin();
+  }
+  read_image2d_array_iterator read_image2d_array_end() {
+    return mRO2DA.end();
+  }
+
+  /// Add a 2D write_only image id.
+  void addWOImage2DArray(uint32_t id) {
+    mWO2DA.insert(id);
+  }
+  size_t write_image2d_array_size() {
+    return mWO2DA.size();
+  }
+  write_image2d_array_iterator write_image2d_array_begin() {
+    return mWO2DA.begin();
+  }
+  write_image2d_array_iterator write_image2d_array_end() {
+    return mWO2D.end();
+  }
+
+  /// Add a 3D read_only image id.
+  void addROImage3D(uint32_t id) {
+    mRO3D.insert(id);
+  }
+  size_t read_image3d_size() {
+    return mRO3D.size();
+  }
+  read_image3d_iterator read_image3d_begin() {
+    return mRO3D.begin();
+  }
+  read_image3d_iterator read_image3d_end() {
+    return mRO3D.end();
+  }
+
+  /// Add a 3D write_only image id.
+  void addWOImage3D(uint32_t id) {
+    mWO3D.insert(id);
+  }
+  size_t write_image3d_size() {
+    return mWO3D.size();
+  }
+  write_image3d_iterator write_image3d_begin() {
+    return mWO3D.begin();
+  }
+  write_image3d_iterator write_image3d_end() {
+    return mWO3D.end();
+  }
+
+  size_t get_num_write_images();
+
+  /// Add a semaphore
+  void sema_insert(uint32_t id) {
+    mSemaphore.insert(id);
+  }
+  bool sema_count(uint32_t id) {
+    return mSemaphore.count(id);
+  }
+  size_t sema_size() {
+    return mSemaphore.size();
+  }
+  sema_iterator sema_begin() {
+    return mSemaphore.begin();
+  }
+  sema_iterator sema_end() {
+    return mSemaphore.end();
+  }
+
+  /// Add a raw uav id.
+  void uav_insert(uint32_t id) {
+    mRawUAV.insert(id);
+  }
+  bool uav_count(uint32_t id) {
+    return mRawUAV.count(id);
+  }
+  size_t uav_size() {
+    return mRawUAV.size();
+  }
+  uav_iterator uav_begin() {
+    return mRawUAV.begin();
+  }
+  uav_iterator uav_end() {
+    return mRawUAV.end();
+  }
+
+  /// Add an arena uav id.
+  void arena_insert(uint32_t id) {
+    mArenaUAV.insert(id);
+  }
+  bool arena_count(uint32_t id) {
+    return mArenaUAV.count(id);
+  }
+  size_t arena_size() {
+    return mArenaUAV.size();
+  }
+  uav_iterator arena_begin() {
+    return mArenaUAV.begin();
+  }
+  uav_iterator arena_end() {
+    return mArenaUAV.end();
+  }
+
+  /// Add a pointer to the known set of read-only pointers
+  void add_read_ptr(const Value* ptr) {
+    mReadPtr.insert(ptr);
+  }
+  bool read_ptr_count(const Value* ptr) {
+    return mReadPtr.count(ptr);
+  }
+  bool read_size() {
+    return mReadPtr.size();
+  }
+  read_ptr_iterator read_ptr_begin() {
+    return mReadPtr.begin();
+  }
+  read_ptr_iterator read_ptr_end() {
+    return mReadPtr.end();
+  }
+
+  // Add an error to the output for the current function.
+  typedef enum {
+    RELEASE_ONLY, /// Only emit error message in release mode.
+    DEBUG_ONLY, /// Only emit error message in debug mode.
+    ALWAYS /// Always emit the error message.
+  } ErrorMsgEnum;
+  /// Add an error message to the set of all error messages.
+  void addErrorMsg(const char* msg, ErrorMsgEnum val = ALWAYS);
+  bool errors_empty() {
+    return mErrors.empty();
+  }
+  error_iterator errors_begin() {
+    return mErrors.begin();
+  }
+  error_iterator errors_end() {
+    return mErrors.end();
+  }
+
+  /// Add a string to the printf map
+  uint32_t addPrintfString(std::string &name, unsigned offset);
+  /// Add a operand to the printf string
+  void addPrintfOperand(std::string &name, size_t idx, uint32_t size);
+  bool printf_empty() {
+    return mPrintfMap.empty();
+  }
+  size_t printf_size() {
+    return mPrintfMap.size();
+  }
+  printf_iterator printf_begin() {
+    return mPrintfMap.begin();
+  }
+  printf_iterator printf_end() {
+    return mPrintfMap.end();
+  }
+
+  /// Add a string to the metadata set for a function/kernel wrapper
+  void addMetadata(const char *md, bool kernelOnly = false);
+  void addMetadata(std::string md, bool kernelOnly = false);
+  func_md_iterator func_md_begin() {
+    return mMetadataFunc.begin();
+  }
+  func_md_iterator func_md_end() {
+    return mMetadataFunc.end();
+  }
+  kernel_md_iterator kernel_md_begin() {
+    return mMetadataKernel.begin();
+  }
+  kernel_md_iterator kernel_md_end() {
+    return mMetadataKernel.end();
+  }
+
+  /// Query to find out if we are a signed or unsigned integer type.
+  bool isSignedIntType(const Value* ptr);
+
+  /// Query to find out if we are a volatile pointer.
+  bool isVolatilePointer(const Value* ptr);
+
+  /// Query to find out if we are a restrict pointer.
+  bool isRestrictPointer(const Value* ptr);
+
+  /// Query to find out if we are a constant argument.
+  bool isConstantArgument(const Value* ptr);
+
+  /// add/retrieve the argument registers numbers
+  void addArgReg(unsigned arg) {
+    mArgRegs.push_back(arg);
+  }
+  unsigned getArgReg(unsigned arg) {
+    return (arg < mArgRegs.size()) ? mArgRegs[arg] : arg;
+  }
+};
+} // llvm namespace
+#endif // _AMDILMACHINEFUNCTIONINFO_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,632 @@
+//===-- AMDILMachinePeephole.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine_peephole"
+#if !defined(NDEBUG) && !defined(USE_APPLE)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+
+#include "AMDIL.h"
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+
+using namespace llvm;
+namespace
+{
+class AMDILMachinePeephole : public MachineFunctionPass
+{
+  typedef std::map<uint32_t, uint32_t> Reg2RegMap;
+public:
+  static char ID;
+  AMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL);
+  // virtual ~AMDILMachinePeephole();
+  virtual const char*
+  getPassName() const;
+  virtual bool
+  runOnMachineFunction(MachineFunction &MF);
+private:
+  void insertFence(MachineBasicBlock::iterator &MIB);
+  inline bool useSWByteShortReg(short opRegClassID);
+  inline uint32_t genVReg(uint32_t regType) const;
+  inline MachineInstrBuilder
+  generateMachineInst(uint32_t opcode,
+                      uint32_t dst,
+                      MachineBasicBlock::iterator &MIB) const;
+  inline MachineInstrBuilder
+  generateMachineInst(uint32_t opcode,
+                      uint32_t dst,
+                      uint32_t src1,
+                      MachineBasicBlock::iterator &MIB) const;
+  inline MachineInstrBuilder
+  generateMachineInst(uint32_t opcode,
+                      uint32_t dst,
+                      uint32_t src1,
+                      uint32_t src2,
+                      MachineBasicBlock::iterator &MIB) const;
+  MachineInstr* findExtendInstruction(MachineOperand& op, bool isSigned);
+  void getExtendOpcodes(uint32_t regClassID,
+                        bool isSigned,
+                        int64_t& constVal,
+                        int& promoteOp,
+                        int& demoteOp,
+                        int& binaryAndOp,
+                        int& shlOp,
+                        int& shrOp,
+                        uint32_t& intRegClassID);
+  uint32_t addExtendInstruction(MachineBasicBlock::iterator &MIB,
+                                uint32_t reg,
+                                bool isSigned);
+  void extendOperand(MachineBasicBlock::iterator &MIB,
+                     uint32_t opIdx,
+                     bool isSigned);
+  void zeroExtend(MachineBasicBlock::iterator &MIB, uint32_t opIdx) {
+    extendOperand(MIB, opIdx, false);
+  }
+  void signExtend(MachineBasicBlock::iterator &MIB, uint32_t opIdx) {
+    extendOperand(MIB, opIdx, true);
+  }
+
+  TargetMachine &TM;
+  MachineFunction* MFP;
+  bool mDebug;
+  // map from a register to its sign-extention
+  Reg2RegMap sextMap;
+  // map from a register to its zero-extention
+  Reg2RegMap zextMap;
+}; // AMDILMachinePeephole
+char AMDILMachinePeephole::ID = 0;
+} // anonymous namespace
+
+namespace llvm
+{
+FunctionPass*
+createAMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL)
+{
+  return new AMDILMachinePeephole(tm, OL);
+}
+} // llvm namespace
+
+AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL)
+  : MachineFunctionPass(ID), TM(tm), MFP(NULL), sextMap(), zextMap()
+{
+  mDebug = DEBUGME;
+}
+
+bool
+AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
+{
+  MFP = &MF;
+  sextMap.clear();
+  zextMap.clear();
+  bool Changed = false;
+  const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+  for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end();
+       MBB != MBE; ++MBB) {
+    MachineBasicBlock *mb = MBB;
+    for (MachineBasicBlock::iterator MIB = mb->begin(), MIE = mb->end();
+         MIB != MIE; ++MIB) {
+      MachineInstr *mi = MIB;
+      switch (mi->getOpcode()) {
+      default:
+        if (isAtomicInst(TM,mi)) {
+          // If we don't support the hardware accellerated address spaces,
+          // then the atomic needs to be transformed to the global atomic.
+          if (strstr(TM.getInstrInfo()->getName(mi->getOpcode()), "_L_")
+              && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
+            BuildMI(*mb, MIB, mi->getDebugLoc(),
+                    TM.getInstrInfo()->get(AMDIL::ADD_i32), AMDIL::R1011)
+            .addReg(mi->getOperand(1).getReg())
+            .addReg(AMDIL::T2);
+            mi->getOperand(1).setReg(AMDIL::R1011);
+            mi->setDesc(
+              TM.getInstrInfo()->get(
+                (mi->getOpcode() - AMDIL::ATOM_L_ADD) + AMDIL::ATOM_G_ADD));
+          } else if (strstr(TM.getInstrInfo()->getName(mi->getOpcode()), "_R_")
+                     && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem)) {
+            assert(!"Software region memory is not supported!");
+            mi->setDesc(
+              TM.getInstrInfo()->get(
+                (mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD));
+          }
+        } else if ((isLoadInst(TM, mi) || isStoreInst(TM, mi)) && isVolatileInst(mi)) {
+          insertFence(MIB);
+        }
+        continue;
+        break;
+
+        // Implement software emulated i8/i16 types by sign/zero extending
+        // i8/i16 type operands of instructions.
+        // To avoid generating too many sign/zero extensions, we do this only
+        // where its needed:
+        // sign/zero-extend i8/i16 type operands if the bits in the
+        // upper bits affects the result of the instruction
+        ExpandCaseToByteShortScalarTypes(AMDIL::CONTINUEC)
+        ExpandCaseToByteShortScalarTypes(AMDIL::BREAKC)
+        // ExpandCaseToByteShortScalarTypes(AMDIL::BRANCH_COND)
+        ExpandCaseToByteShortScalarTypes(AMDIL::IFC) {
+          // we are not generating the above currently:
+          assert(0 && "unexpected instruction");
+          break;
+        }
+        ExpandCaseToByteShortScalarTypes(AMDIL::BREAK_LOGICALNZ)
+        ExpandCaseToByteShortScalarTypes(AMDIL::BREAK_LOGICALZ)
+        ExpandCaseToByteShortScalarTypes(AMDIL::CONTINUE_LOGICALNZ)
+        ExpandCaseToByteShortScalarTypes(AMDIL::CONTINUE_LOGICALZ)
+        ExpandCaseToByteShortScalarTypes(AMDIL::IF_LOGICALNZ)
+        ExpandCaseToByteShortScalarTypes(AMDIL::IF_LOGICALZ) {
+          short opRegClassID = mi->getDesc().OpInfo[0].RegClass;
+          if (useSWByteShortReg(opRegClassID)) {
+            zeroExtend(MIB, 0);
+          }
+          break;
+        }
+        ExpandCaseToByteShortScalarTypes(AMDIL::SELECTBIN)
+        ExpandCaseToByteShortTypes(AMDIL::CMOVLOG)
+        ExpandCaseToByteShortTypes(AMDIL::CMOV)
+        // ExpandCaseToByteShortTypes(AMDIL::EADD)
+        // find first hi/low bit
+        ExpandCaseToByteShortTypes(AMDIL::IFFB_HI)
+        ExpandCaseToByteShortTypes(AMDIL::IFFB_LO)
+        ExpandCaseToByteShortTypes(AMDIL::USHR)
+        ExpandCaseToByteShortTypes(AMDIL::USHRVEC) {
+          short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
+          if (useSWByteShortReg(opRegClassID)) {
+            zeroExtend(MIB, 1);
+          }
+          break;
+        }
+        ExpandCaseToByteShortTypes(AMDIL::NEGATE)
+        ExpandCaseToByteShortTypes(AMDIL::SHR)
+        ExpandCaseToByteShortTypes(AMDIL::SHRVEC) {
+          short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
+          if (useSWByteShortReg(opRegClassID)) {
+            signExtend(MIB, 1);
+          }
+          break;
+        }
+        ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__sdiv)
+        ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__smod)
+        ExpandCaseToByteShortTypes(AMDIL::DIV_INF)
+        ExpandCaseToByteShortTypes(AMDIL::SMAX)
+        ExpandCaseToByteShortTypes(AMDIL::SMULHI)
+        ExpandCaseToByteShortTypes(AMDIL::SMUL) {
+          short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
+          assert(opRegClassID == mi->getDesc().OpInfo[2].RegClass
+                 && "instruction ops have different type");
+          if (useSWByteShortReg(opRegClassID)) {
+            signExtend(MIB, 1);
+            signExtend(MIB, 2);
+          }
+          break;
+        }
+        ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__udiv)
+        ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__umod)
+        ExpandCaseToByteShortTypes(AMDIL::UDIV)
+        ExpandCaseToByteShortTypes(AMDIL::UMULHI) {
+          short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
+          assert(opRegClassID == mi->getDesc().OpInfo[2].RegClass
+                 && "instruction ops have different type");
+          if (useSWByteShortReg(opRegClassID)) {
+            zeroExtend(MIB, 1);
+            zeroExtend(MIB, 2);
+          }
+          break;
+        }
+        // This works around a restriction in AMDIL where the
+        // result of a comparison can only be in the lower
+        // 2 components.
+      case AMDIL::LEQ:
+      case AMDIL::LGE:
+      case AMDIL::LLE:
+      case AMDIL::LGT:
+      case AMDIL::LLT:
+      case AMDIL::LNE:
+      case AMDIL::ULLE:
+      case AMDIL::ULGT:
+      case AMDIL::ULGE:
+      case AMDIL::ULLT: {
+        if (isZWComponentReg(mi->getOperand(0).getReg())) {
+          MachineInstr *newmi = BuildMI(MF, mi->getDebugLoc(),
+                                        TM.getInstrInfo()->get(AMDIL::MOVE_i64),
+                                        mi->getOperand(0).getReg()).addReg(AMDIL::Rxy1000);
+          mi->getOperand(0).setReg(AMDIL::Rxy1000);
+          mi->getParent()->insertAfter(MIB, newmi);
+        }
+      }
+      break;
+      }
+    }
+  }
+  return Changed;
+}
+
+const char*
+AMDILMachinePeephole::getPassName() const
+{
+  return "AMDIL Generic Machine Peephole Optimization Pass";
+}
+
+void
+AMDILMachinePeephole::insertFence(MachineBasicBlock::iterator &MIB)
+{
+  MachineInstr *MI = MIB;
+  MachineInstr *fence = BuildMI(*(MI->getParent()->getParent()),
+                                MI->getDebugLoc(),
+                                TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+
+  MI->getParent()->insert(MIB, fence);
+  fence = BuildMI(*(MI->getParent()->getParent()),
+                  MI->getDebugLoc(),
+                  TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+  MIB = MI->getParent()->insertAfter(MIB, fence);
+}
+
+// returns if the given register class is software emulated byte or short
+bool AMDILMachinePeephole::useSWByteShortReg(short opRegClassID)
+{
+  if ((opRegClassID == AMDIL::GPRI16RegClassID
+       || opRegClassID == AMDIL::GPRV2I16RegClassID
+       || opRegClassID == AMDIL::GPRV4I16RegClassID)
+      && TM.getSubtarget<AMDILSubtarget>()
+      .device()->usesSoftware(AMDILDeviceInfo::ShortOps)) {
+    return true;
+  }
+  if ((opRegClassID == AMDIL::GPRI8RegClassID
+       || opRegClassID == AMDIL::GPRV2I8RegClassID
+       || opRegClassID == AMDIL::GPRV4I8RegClassID)
+      && TM.getSubtarget<AMDILSubtarget>()
+      .device()->usesSoftware(AMDILDeviceInfo::ByteOps)) {
+    return true;
+  }
+  return false;
+}
+
+uint32_t AMDILMachinePeephole::genVReg(uint32_t regType) const
+{
+  return MFP->getRegInfo().createVirtualRegister(getRegClassFromID(regType));
+}
+
+MachineInstrBuilder
+AMDILMachinePeephole::generateMachineInst(uint32_t opcode,
+    uint32_t dst,
+    MachineBasicBlock::iterator &MIB)
+const
+{
+  MachineInstr* mi = MIB;
+  MachineBasicBlock* mb = mi->getParent();
+  return BuildMI(*mb, MIB, mi->getDebugLoc(),
+                 TM.getInstrInfo()->get(opcode), dst);
+}
+
+MachineInstrBuilder
+AMDILMachinePeephole::generateMachineInst(uint32_t opcode,
+    uint32_t dst,
+    uint32_t src1,
+    MachineBasicBlock::iterator &MIB)
+const
+{
+  return generateMachineInst(opcode, dst, MIB).addReg(src1);
+}
+
+MachineInstrBuilder
+AMDILMachinePeephole::generateMachineInst(uint32_t opcode,
+    uint32_t dst,
+    uint32_t src1,
+    uint32_t src2,
+    MachineBasicBlock::iterator &MIB)
+const
+{
+  return generateMachineInst(opcode, dst, src1, MIB).addReg(src2);
+}
+
+// Find a MachineInstr that uses the given register and has the given opcode.
+// Return NULL if not found.
+static inline MachineInstr* findRegUse(uint32_t reg, int opcode,
+                                       const MachineRegisterInfo& MRI)
+{
+  for (MachineRegisterInfo::use_iterator it = MRI.use_begin(reg),
+       end = MRI.use_end();
+       it != end;
+       ++it) {
+    MachineInstr& useInst = *it;
+    if (useInst.getOpcode() == opcode) {
+      for (MachineInstr::mop_iterator it2 = useInst.operands_begin(),
+           end2 = useInst.operands_end();
+           it2 != end2; ++it2) {
+        MachineOperand& op = *it2;
+        if (op.isUse() && op.isReg() && op.getReg() == reg) {
+          return &useInst;
+        }
+      }
+    }
+  }
+  return NULL;
+}
+
+// Find a MachineInstr that uses the given register and immediate,
+// and has the given opcode.
+// Return NULL if not found.
+static inline MachineInstr* findRegUse(uint32_t reg, int opcode, int64_t imm,
+                                       const MachineRegisterInfo& MRI)
+{
+  for (MachineRegisterInfo::use_iterator it = MRI.use_begin(reg),
+       end = MRI.use_end();
+       it != end;
+       ++it) {
+    MachineInstr& useInst = *it;
+    if (useInst.getOpcode() == opcode) {
+      bool foundRegUse = false;
+      bool foundImmUse = false;
+      for (MachineInstr::mop_iterator it2 = useInst.operands_begin(),
+           end2 = useInst.operands_end();
+           it2 != end2; ++it2) {
+        MachineOperand& op = *it2;
+        if (op.isUse() && op.isReg() && op.getReg() == reg) {
+          foundRegUse = true;
+        } else if (op.isUse() && op.isImm() && op.getImm() == imm) {
+          foundImmUse = true;
+        }
+      }
+      if (foundRegUse && foundImmUse) {
+        return &useInst;
+      }
+    }
+  }
+  return NULL;
+}
+
+// returns if the given MachineInstr defines exactly 1 register operand
+static inline bool hasSingleRegDef(MachineInstr& inst)
+{
+  size_t nDefs = 0;
+  for (MachineInstr::mop_iterator it = inst.operands_begin(),
+       end = inst.operands_end();
+       it != end; ++it) {
+    MachineOperand& op = *it;
+    if (!op.isDef() || !op.isReg()) {
+      continue;
+    }
+    ++nDefs;
+    if (nDefs > 1) {
+      return false;
+    }
+  }
+  return nDefs == 1;
+}
+
+// returns the first register this MachineInstr defines
+static inline uint32_t firstDefReg(MachineInstr& inst)
+{
+  for (MachineInstr::mop_iterator it = inst.operands_begin(),
+       end = inst.operands_end();
+       it != end; ++it) {
+    MachineOperand& op = *it;
+    if (op.isDef() && op.isReg()) {
+      return op.getReg();
+    }
+  }
+  assert(0 && "should not reach");
+  return 0;
+}
+
+// Find sign extension sequence such as the following:
+// reg1 = IL_ASINT_i8 reg
+// reg2 = SHL_i32 reg1, 24
+// reg3 = SHR_i32 reg2, 24
+// reg4 = IL_ASCHAR_i32 reg3
+// or zero extension sequence such as the following:
+// reg1 = IL_ASINT_i8 reg
+// reg2 = BINARY_AND_i32 reg1, 0xff
+// reg3 = IL_ASCHAR_i32 reg2
+// The above sequence does sign/zero-extension to reg if reg is of type i8
+// Return the last instruction in the sequence
+// Return NULL if no such sequence found
+MachineInstr* AMDILMachinePeephole::findExtendInstruction(MachineOperand& op,
+    bool isSigned)
+{
+  unsigned opReg = op.getReg();
+  uint32_t regClassID = MFP->getRegInfo().getRegClass(opReg)->getID();
+  int64_t constVal;
+  int promoteOp;
+  int demoteOp;
+  int binaryAndOp;
+  int shlOp;
+  int shrOp;
+  uint32_t intRegClassID;
+  getExtendOpcodes(regClassID, isSigned, constVal, promoteOp, demoteOp,
+                   binaryAndOp, shlOp, shrOp, intRegClassID);
+  const MachineRegisterInfo& MRI = MFP->getRegInfo();
+  MachineInstr* promoteInst = findRegUse(opReg, promoteOp, MRI);
+  if (promoteInst == NULL) return NULL;
+  if (!hasSingleRegDef(*promoteInst)) return NULL;
+  uint32_t reg1 = firstDefReg(*promoteInst);
+  uint32_t reg3;
+  if (isSigned) {
+    MachineInstr* shlInst = findRegUse(reg1, shlOp, constVal, MRI);
+    if (shlInst == NULL) return NULL;
+    if (!hasSingleRegDef(*shlInst)) return NULL;
+    uint32_t reg2 = firstDefReg(*shlInst);
+    MachineInstr* shrInst = findRegUse(reg2, shrOp, constVal, MRI);
+    if (shrInst == NULL) return NULL;
+    if (!hasSingleRegDef(*shrInst)) return NULL;
+    reg3 = firstDefReg(*shrInst);
+  } else {
+    MachineInstr* andInst = findRegUse(reg1, binaryAndOp, constVal, MRI);
+    if (andInst == NULL) return NULL;
+    if (!hasSingleRegDef(*andInst)) return NULL;
+    reg3 = firstDefReg(*andInst);
+  }
+  MachineInstr* demoteInst = findRegUse(reg3, demoteOp, MRI);
+  if (demoteInst == NULL) return NULL;
+  if (!hasSingleRegDef(*demoteInst)) return NULL;
+  return demoteInst;
+}
+
+// returns opcodes to be used to sign/zero extend the given register class
+void
+AMDILMachinePeephole::getExtendOpcodes(uint32_t regClassID,
+                                       bool isSigned,
+                                       int64_t& constVal,
+                                       int& promoteOp,
+                                       int& demoteOp,
+                                       int& binaryAndOp,
+                                       int& shlOp,
+                                       int& shrOp,
+                                       uint32_t& intRegClassID)
+{
+  switch(regClassID) {
+  default:
+    assert(0 && "unexpected reg class");
+  case AMDIL::GPRI8RegClassID:
+    constVal = isSigned ? 24 : 0xFF;
+    promoteOp = AMDIL::IL_ASINT_i8;
+    demoteOp = AMDIL::IL_ASCHAR_i32;
+    binaryAndOp = AMDIL::BINARY_AND_i32;
+    intRegClassID = AMDIL::GPRI32RegClassID;
+    shlOp = AMDIL::SHL_i32;
+    shrOp = AMDIL::SHR_i32;
+    break;
+  case AMDIL::GPRV2I8RegClassID:
+    constVal = isSigned ? 24 : 0xFF;
+    promoteOp = AMDIL::IL_ASV2INT_v2i8;
+    demoteOp = AMDIL::IL_ASV2CHAR_v2i32;
+    binaryAndOp = AMDIL::BINARY_AND_v2i32;
+    intRegClassID = AMDIL::GPRV2I32RegClassID;
+    shlOp = AMDIL::SHLVEC_v2i32;
+    shrOp = AMDIL::SHRVEC_v2i32;
+    break;
+  case AMDIL::GPRV4I8RegClassID:
+    constVal = isSigned ? 24 : 0xFF;
+    promoteOp = AMDIL::IL_ASV4INT_v4i8;
+    demoteOp = AMDIL::IL_ASV4CHAR_v4i32;
+    binaryAndOp = AMDIL::BINARY_AND_v4i32;
+    intRegClassID = AMDIL::GPRV4I32RegClassID;
+    shlOp = AMDIL::SHLVEC_v4i32;
+    shrOp = AMDIL::SHRVEC_v4i32;
+    break;
+  case AMDIL::GPRI16RegClassID:
+    constVal = isSigned ? 16 : 0xFFFF;
+    promoteOp = AMDIL::IL_ASINT_i16;
+    demoteOp = AMDIL::IL_ASSHORT_i32;
+    binaryAndOp = AMDIL::BINARY_AND_i32;
+    intRegClassID = AMDIL::GPRI32RegClassID;
+    shlOp = AMDIL::SHL_i32;
+    shrOp = AMDIL::SHR_i32;
+    break;
+  case AMDIL::GPRV2I16RegClassID:
+    constVal = isSigned ? 16 : 0xFFFF;
+    promoteOp = AMDIL::IL_ASV2INT_v2i16;
+    demoteOp = AMDIL::IL_ASV2SHORT_v2i32;
+    binaryAndOp = AMDIL::BINARY_AND_v2i32;
+    intRegClassID = AMDIL::GPRV2I32RegClassID;
+    shlOp = AMDIL::SHLVEC_v2i32;
+    shrOp = AMDIL::SHRVEC_v2i32;
+    break;
+  case AMDIL::GPRV4I16RegClassID:
+    constVal = isSigned ? 16 : 0xFFFF;
+    promoteOp = AMDIL::IL_ASV4INT_v4i16;
+    demoteOp = AMDIL::IL_ASV4SHORT_v4i32;
+    binaryAndOp = AMDIL::BINARY_AND_v4i32;
+    intRegClassID = AMDIL::GPRV4I32RegClassID;
+    shlOp = AMDIL::SHLVEC_v4i32;
+    shrOp = AMDIL::SHRVEC_v4i32;
+    break;
+  }
+}
+
+// create sequence of instructions to sign/zero extend the given register
+uint32_t
+AMDILMachinePeephole::addExtendInstruction(MachineBasicBlock::iterator &MIB,
+    uint32_t reg,
+    bool isSigned)
+{
+  int64_t constVal;
+  int promoteOp;
+  int demoteOp;
+  int binaryAndOp;
+  int shlOp;
+  int shrOp;
+  uint32_t intRegClassID;
+  uint32_t regClassID = MFP->getRegInfo().getRegClass(reg)->getID();
+  getExtendOpcodes(regClassID, isSigned, constVal, promoteOp, demoteOp,
+                   binaryAndOp, shlOp, shrOp, intRegClassID);
+  uint32_t constReg = genVReg(AMDIL::GPRI32RegClassID);
+  uint32_t intReg = genVReg(intRegClassID);
+  uint32_t intReg2 = genVReg(intRegClassID);
+  uint32_t dstReg = genVReg(regClassID);
+  generateMachineInst(promoteOp, intReg, reg, MIB);
+  generateMachineInst(AMDIL::LOADCONST_i32, constReg, MIB).addImm(constVal);
+  if (isSigned) {
+    uint32_t intReg3 = genVReg(intRegClassID);
+    generateMachineInst(shlOp, intReg3, intReg, constReg, MIB);
+    generateMachineInst(shrOp, intReg2, intReg3, constReg, MIB);
+  } else {
+    generateMachineInst(binaryAndOp, intReg2, intReg, constReg, MIB);
+  }
+  generateMachineInst(demoteOp, dstReg, intReg2, MIB);
+  return dstReg;
+}
+
+// sign/zero extend an operand of a MachineInstr by either reuse an existing
+// sequence of sign/zero extension of the operand or by creating a new sequence.
+void
+AMDILMachinePeephole::extendOperand(MachineBasicBlock::iterator &MIB,
+                                    uint32_t opIdx,
+                                    bool isSigned)
+{
+  MachineInstr* mi = MIB;
+  DEBUG(dbgs() << (isSigned ? "sign" : "zero") << " extending operand "
+        << opIdx << " for " << *mi);
+  MachineOperand& op = mi->getOperand(opIdx);
+  assert(op.isReg() && op.isUse() && "extending non-register or def operand");
+  uint32_t opReg = op.getReg();
+  uint32_t newOpReg;
+
+  assert((unsigned)MFP->getRegInfo().getRegClass(opReg)->getID()
+         == (unsigned)mi->getDesc().OpInfo[opIdx].RegClass
+         && "inconsistent op reg class");
+
+  // first check the sext/zext map to see if it already has a sign/zero
+  // extention, if so, reuse it
+  Reg2RegMap& map = isSigned ? sextMap : zextMap;
+  Reg2RegMap::iterator it = map.find(opReg);
+  if (it != map.end()) {
+    DEBUG(dbgs() << "Found in map ");
+    newOpReg = it->second;
+  } else {
+    // not in the map. See if we can find in the DFG
+    MachineInstr* extendInst = findExtendInstruction(op, isSigned);
+    if (extendInst && hasSingleRegDef(*extendInst)) {
+      newOpReg = firstDefReg(*extendInst);
+      DEBUG(dbgs() << "Found in DFG ");
+    } else {
+      // not in the DFG either. Create sign/zero extention.
+      newOpReg = addExtendInstruction(MIB, opReg, isSigned);
+      DEBUG(dbgs() << "Created ");
+    }
+    map[opReg] = newOpReg;
+  }
+  DEBUG(dbgs() << (isSigned ? "sign" : "zero") << " extension vreg"
+        << TargetRegisterInfo::virtReg2Index(newOpReg) << " for vreg"
+        << TargetRegisterInfo::virtReg2Index(opReg) << "\n");
+  op.setReg(newOpReg);
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem32.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem32.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem32.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem32.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,836 @@
+//===-- AMDILMem32.td -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def ArenaSupport : Predicate<"Subtarget->device()->isSupported(AMDILDeviceInfo::ArenaUAV)">;
+
+let Predicates = [Has32BitPtr] in {
+
+  let isCodeGenOnly=1 in {
+    //===----------------------------------------------------------------------===//
+    // Store Memory Operations
+    //===----------------------------------------------------------------------===//
+    defm GLOBALTRUNCSTORE   : GTRUNCSTORE<"!global trunc store">;
+    defm GLOBALSTORE        : STORE<"!global store"         , global_store>;
+    defm LOCALTRUNCSTORE    : LTRUNCSTORE<"!local trunc store">;
+    defm LOCALSTORE         : STORE<"!local store"          , local_store>;
+    defm PRIVATETRUNCSTORE  : PTRUNCSTORE<"!private trunc store">;
+    defm PRIVATESTORE       : STORE<"!private store"        , private_store>;
+    defm REGIONTRUNCSTORE   : RTRUNCSTORE<"!region trunc store">;
+    defm REGIONSTORE        : STORE<"!region hw store"      , region_store>;
+
+
+    //===---------------------------------------------------------------------===//
+    // Load Memory Operations
+    //===---------------------------------------------------------------------===//
+    defm GLOBALLOAD         : LOAD<"!global load"            , global_load>;
+    defm GLOBALZEXTLOAD     : LOAD<"!global zext load"       , global_zext_load>;
+    defm GLOBALSEXTLOAD     : LOAD<"!global sext load"       , global_sext_load>;
+    defm GLOBALAEXTLOAD     : LOAD<"!global aext load"       , global_aext_load>;
+    defm PRIVATELOAD        : LOAD<"!private load"           , private_load>;
+    defm PRIVATEZEXTLOAD    : LOAD<"!private zext load"      , private_zext_load>;
+    defm PRIVATESEXTLOAD    : LOAD<"!private sext load"      , private_sext_load>;
+    defm PRIVATEAEXTLOAD    : LOAD<"!private aext load"      , private_aext_load>;
+    defm CPOOLLOAD          : LOAD<"!constant pool load"     , cp_load>;
+    defm CPOOLZEXTLOAD      : LOAD<"!constant pool zext load", cp_zext_load>;
+    defm CPOOLSEXTLOAD      : LOAD<"!constant pool sext load", cp_sext_load>;
+    defm CPOOLAEXTLOAD      : LOAD<"!constant aext pool load", cp_aext_load>;
+    defm CONSTANTLOAD       : LOAD<"!constant load"          , constant_load>;
+    defm CONSTANTZEXTLOAD   : LOAD<"!constant zext load"     , constant_zext_load>;
+    defm CONSTANTSEXTLOAD   : LOAD<"!constant sext load"     , constant_sext_load>;
+    defm CONSTANTAEXTLOAD   : LOAD<"!constant aext load"     , constant_aext_load>;
+    defm LOCALLOAD          : LOAD<"!local load"             , local_load>;
+    defm LOCALZEXTLOAD      : LOAD<"!local zext load"        , local_zext_load>;
+    defm LOCALSEXTLOAD      : LOAD<"!local sext load"        , local_sext_load>;
+    defm LOCALAEXTLOAD      : LOAD<"!local aext load"        , local_aext_load>;
+    defm REGIONLOAD         : LOAD<"!region load"            , region_load>;
+    defm REGIONZEXTLOAD     : LOAD<"!region zext load"       , region_zext_load>;
+    defm REGIONSEXTLOAD     : LOAD<"!region sext load"       , region_sext_load>;
+    defm REGIONAEXTLOAD     : LOAD<"!region aext load"       , region_aext_load>;
+  }
+
+
+  //===---------------------------------------------------------------------===//
+  // IO Expansion Load/Store Instructions
+  //===---------------------------------------------------------------------===//
+  // These opcode has custom swizzle patterns for some of the arguments.
+  let mayLoad = 1 in {
+    // Raw UAV Operations
+    def UAVRAWLOAD_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+    def UAVRAWLOAD_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+    def UAVRAWLOAD_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id) $dst, $addy"), []>;
+    def UAVRAWLOADCACHED_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+    def UAVRAWLOADCACHED_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+    def UAVRAWLOADCACHED_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+    def UAVRAWLOADCACHEDALIGNED_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached_aligned $dst, $addy"), []>;
+    def UAVRAWLOADCACHEDALIGNED_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_id($id)_cached_aligned $dst, $addy"), []>;
+    let Predicates = [HasByteShortUAV] in {
+      def UAVRAWLOAD_i8 : TwoInOneOut<IL_OP_UAV_BYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_BYTE_LOAD.Text, "_id($id) $dst, $addy"), []>;
+      def UAVRAWLOAD_u8 : TwoInOneOut<IL_OP_UAV_UBYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_UBYTE_LOAD.Text, "_id($id) $dst, $addy"), []>;
+      def UAVRAWLOAD_i16 : TwoInOneOut<IL_OP_UAV_SHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_SHORT_LOAD.Text, "_id($id) $dst, $addy"), []>;
+      def UAVRAWLOAD_u16 : TwoInOneOut<IL_OP_UAV_USHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_USHORT_LOAD.Text, "_id($id) $dst, $addy"), []>;
+      def UAVRAWLOADCACHED_i8 : TwoInOneOut<IL_OP_UAV_BYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_BYTE_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+      def UAVRAWLOADCACHED_u8 : TwoInOneOut<IL_OP_UAV_UBYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_UBYTE_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+      def UAVRAWLOADCACHED_i16 : TwoInOneOut<IL_OP_UAV_SHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_SHORT_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+      def UAVRAWLOADCACHED_u16 : TwoInOneOut<IL_OP_UAV_USHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i32imm:$id),
+          !strconcat(IL_OP_UAV_USHORT_LOAD.Text, "_id($id)_cached $dst, $addy"), []>;
+    }
+    // Arena UAV Operations
+    def UAVARENALOAD_i8 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(byte) $dst, $addy"), []>;
+    def UAVARENALOAD_i16 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(short) $dst, $addy"), []>;
+    def UAVARENALOAD_i32 : TwoInOneOut<IL_OP_ARENA_UAV_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_ARENA_UAV_LOAD.Text, "_id($id)_size(dword) $dst, $addy"), []>;
+
+    // Scratch Buffer Operations
+    def SCRATCHLOAD : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_MOV.Text, " $dst, x$id[$addy]"), []>;
+
+    // Constant Buffer Operations
+    def CBLOAD : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_MOV.Text, " $dst, cb$id[$addy]"), []>;
+
+    // GDS Operations
+    def GDSLOAD : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD_i8 : TwoInOneOut<IL_OP_GDS_LOAD_BYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_BYTE.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD_u8 : TwoInOneOut<IL_OP_GDS_LOAD_UBYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_UBYTE.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD_i16 : TwoInOneOut<IL_OP_GDS_LOAD_SHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_SHORT.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD_u16 : TwoInOneOut<IL_OP_GDS_LOAD_USHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_USHORT.Text, "_id($id) $dst, $addy"), []>;
+
+    // LDS Operations
+    def LDSLOADVEC : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+    def LDSLOADVEC_v2i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+    def LDSLOADVEC_v4i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+    def LDSLOAD : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD_i8 : TwoInOneOut<IL_OP_LDS_LOAD_BYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_BYTE.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD_u8 : TwoInOneOut<IL_OP_LDS_LOAD_UBYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_UBYTE.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD_i16 : TwoInOneOut<IL_OP_LDS_LOAD_SHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_SHORT.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD_u16 : TwoInOneOut<IL_OP_LDS_LOAD_USHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i32imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_USHORT.Text, "_id($id) $dst, $addy"), []>;
+
+  }
+  // Store Operations
+  let mayStore = 1 in {
+    // Raw UAV Operations
+    def UAVRAWSTORE_i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRI32:$mem),
+        (ins GPRXI32:$addy, GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+    def UAVRAWSTORE_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV2I32:$mem),
+        (ins GPRXI32:$addy, GPRV2I32:$src, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+    def UAVRAWSTORE_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV4I32:$mem),
+        (ins GPRXI32:$addy, GPRV4I32:$src, i32imm:$id),
+        !strconcat(IL_OP_RAW_UAV_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+    let Predicates = [HasByteShortUAV] in {
+      def UAVRAWSTORE_i8 : TwoInOneOut<IL_OP_UAV_BYTE_STORE, (outs GPRI32:$mem),
+          (ins GPRXI32:$addy, GPRI32:$src, i32imm:$id),
+          !strconcat(IL_OP_UAV_BYTE_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+      def UAVRAWSTORE_i16 : TwoInOneOut<IL_OP_UAV_SHORT_STORE, (outs GPRI32:$mem),
+          (ins GPRXI32:$addy, GPRI32:$src, i32imm:$id),
+          !strconcat(IL_OP_UAV_SHORT_STORE.Text, "_id($id) $mem, $addy, $src"), []>;
+
+    }
+    // Arena UAV Operations
+    def UAVARENASTORE_i8 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRXI32:$addy),
+        (ins GPRI8:$src, i32imm:$id),
+        !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+            "_id($id)_size(byte) $addy, $src"), []>;
+    def UAVARENASTORE_i16 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRXI32:$addy),
+        (ins GPRI16:$src, i32imm:$id),
+        !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+            "_id($id)_size(short) $addy, $src"), []>;
+    def UAVARENASTORE_i32 : TwoInOneOut<IL_OP_ARENA_UAV_STORE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_ARENA_UAV_STORE.Text,
+            "_id($id)_size(dword) $addy, $src"), []>;
+
+    // Scratch Buffer Operations
+    def SCRATCHSTORE : TwoInOneOut<IL_OP_MOV, (outs GPRXI32:$addy),
+        (ins GPRV4I32:$data, i32imm:$id),
+        !strconcat(IL_OP_MOV.Text, " x$id[$addy, $data"), []>;
+
+    // GDS Operations
+    def GDSSTORE : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+
+    def GDSSTORE_i8 : TwoInOneOut<IL_OP_GDS_STORE_BYTE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_GDS_STORE_BYTE.Text, "_id($id) $addy, $src"), []>;
+
+    def GDSSTORE_i16 : TwoInOneOut<IL_OP_GDS_STORE_SHORT, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_GDS_STORE_SHORT.Text, "_id($id) $addy, $src"), []>;
+  
+    // LDS Operations
+    def LDSSTOREVEC : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRI32:$mem),
+        (ins GPRXI32:$addy, GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+    def LDSSTOREVEC_v2i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV2I32:$mem),
+        (ins GPRXI32:$addy, GPRV2I32:$src, i32imm:$id),
+        !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+
+    def LDSSTOREVEC_v4i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV4I32:$mem),
+        (ins GPRXI32:$addy, GPRV4I32:$src, i32imm:$id),
+        !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+
+    def LDSSTORE : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+
+    def LDSSTORE_i8 : TwoInOneOut<IL_OP_LDS_STORE_BYTE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_LDS_STORE_BYTE.Text, "_id($id) $addy, $src"), []>;
+
+    def LDSSTORE_i16 : TwoInOneOut<IL_OP_LDS_STORE_SHORT, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i32imm:$id),
+        !strconcat(IL_OP_LDS_STORE_SHORT.Text, "_id($id) $addy, $src"), []>;
+  }
+  // Image related operations.
+  def IMAGE1D_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DA_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_array_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DB_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text, 
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_buffer_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DB_TXLD : ILFormat<IL_OP_LOAD, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$addy),
+      !strconcat(IL_OP_LOAD.Text, 
+          "_id($ptr) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_buffer_load ADDR:$ptr, GPRI32:$addy))]>;
+  def IMAGE2D_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE2DA_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_array_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE3D_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image3d_read_norm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1D_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DA_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_array_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DB_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text, 
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_buffer_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE2D_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE2DA_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_array_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE3D_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image3d_read_unnorm ADDR:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1D_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_info0 ADDR:$ptr))]>;
+  def IMAGE1D_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_info1 ADDR:$ptr))]>;
+  def IMAGE1DA_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info0 ADDR:$ptr))]>;
+  def IMAGE1DA_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info1 ADDR:$ptr))]>;
+  def IMAGE1DB_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_buffer_info0 ADDR:$ptr))]>;
+  def IMAGE1DB_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_buffer_info1 ADDR:$ptr))]>;
+  def IMAGE2D_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_info0 ADDR:$ptr))]>;
+  def IMAGE2D_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_info1 ADDR:$ptr))]>;
+  def IMAGE2DA_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info0 ADDR:$ptr))]>;
+  def IMAGE2DA_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info1 ADDR:$ptr))]>;
+  def IMAGE3D_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image3d_info0 ADDR:$ptr))]>;
+  def IMAGE3D_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM3232:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image3d_info1 ADDR:$ptr))]>;
+  def IMAGE1D_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM3232:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image1d_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE1DA_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM3232:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image1d_array_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE1DB_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM3232:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image1d_buffer_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE2D_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM3232:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image2d_write ADDR:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE2DA_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM3232:$ptr, GPRV4I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image2d_array_write ADDR:$ptr, GPRV4I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE3D_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM3232:$ptr, GPRV4I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image3d_write ADDR:$ptr, GPRV4I32:$addy, GPRV4I32:$data)]>;
+
+  // Printf related operations.
+  let isCall=1, isAsCheapAsAMove = 1 in {
+    def GET_PRINTF_OFFSET_i32 : ILFormat<IL_OP_MOV, (outs GPRXI32:$dst),
+        (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[8].y0"),
+        [(set GPRXI32:$dst, (int_AMDIL_get_printf_offset))]>;
+    def GET_PRINTF_SIZE_i32 : ILFormat<IL_OP_MOV, (outs GPRXI32:$dst),
+        (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[8].z0"),
+        [(set GPRXI32:$dst, (int_AMDIL_get_printf_size))]>;
+  }
+  // Atomic operations
+  def ATOM_G_ADD_NORET     : BinAtomNoRet<IL_OP_UAV_ADD,
+      "_id($id)", atom_g_add_noret>;
+  def ATOM_G_AND_NORET     : BinAtomNoRet<IL_OP_UAV_AND,
+      "_id($id)", atom_g_and_noret>;
+  def ATOM_G_MAX_NORET     : BinAtomNoRet<IL_OP_UAV_MAX,
+      "_id($id)", atom_g_max_noret>;
+  def ATOM_G_MIN_NORET     : BinAtomNoRet<IL_OP_UAV_MIN,
+      "_id($id)", atom_g_min_noret>;
+  def ATOM_G_UMAX_NORET    : BinAtomNoRet<IL_OP_UAV_UMAX,
+      "_id($id)", atom_g_umax_noret>;
+  def ATOM_G_UMIN_NORET    : BinAtomNoRet<IL_OP_UAV_UMIN,
+      "_id($id)", atom_g_umin_noret>;
+  def ATOM_G_OR_NORET      : BinAtomNoRet<IL_OP_UAV_OR,
+      "_id($id)", atom_g_or_noret>;
+  def ATOM_G_RSUB_NORET    : BinAtomNoRet<IL_OP_UAV_RSUB,
+      "_id($id)", atom_g_rsub_noret>;
+  def ATOM_G_SUB_NORET     : BinAtomNoRet<IL_OP_UAV_SUB,
+      "_id($id)", atom_g_sub_noret>;
+  def ATOM_G_XOR_NORET     : BinAtomNoRet<IL_OP_UAV_XOR,
+      "_id($id)", atom_g_xor_noret>;
+  def ATOM_G_INC_NORET     : BinAtomNoRet<IL_OP_UAV_INC,
+      "_id($id)", atom_g_inc_noret>;
+  def ATOM_G_DEC_NORET     : BinAtomNoRet<IL_OP_UAV_DEC,
+      "_id($id)", atom_g_dec_noret>;
+  def ATOM_G_CMPXCHG_NORET    : CmpXChgNoRet<IL_OP_UAV_CMP,
+      "_id($id)", atom_g_cmpxchg_noret>;
+  let Predicates = [ArenaSupport] in {
+  def ATOM_A_ADD_NORET     : BinAtomNoRet<IL_OP_UAV_ADD,
+      "_id($id)_arena", atom_g_add_noret>;
+  def ATOM_A_AND_NORET     : BinAtomNoRet<IL_OP_UAV_AND,
+      "_id($id)_arena", atom_g_and_noret>;
+  def ATOM_A_MAX_NORET     : BinAtomNoRet<IL_OP_UAV_MAX,
+      "_id($id)_arena", atom_g_max_noret>;
+  def ATOM_A_MIN_NORET     : BinAtomNoRet<IL_OP_UAV_MIN,
+      "_id($id)_arena", atom_g_min_noret>;
+  def ATOM_A_UMAX_NORET    : BinAtomNoRet<IL_OP_UAV_UMAX,
+      "_id($id)_arena", atom_g_umax_noret>;
+  def ATOM_A_UMIN_NORET    : BinAtomNoRet<IL_OP_UAV_UMIN,
+      "_id($id)_arena", atom_g_umin_noret>;
+  def ATOM_A_OR_NORET      : BinAtomNoRet<IL_OP_UAV_OR,
+      "_id($id)_arena", atom_g_or_noret>;
+  def ATOM_A_RSUB_NORET    : BinAtomNoRet<IL_OP_UAV_RSUB,
+      "_id($id)_arena", atom_g_rsub_noret>;
+  def ATOM_A_SUB_NORET     : BinAtomNoRet<IL_OP_UAV_SUB,
+      "_id($id)_arena", atom_g_sub_noret>;
+  def ATOM_A_XOR_NORET     : BinAtomNoRet<IL_OP_UAV_XOR,
+      "_id($id)_arena", atom_g_xor_noret>;
+  def ATOM_A_INC_NORET     : BinAtomNoRet<IL_OP_UAV_INC,
+      "_id($id)_arena", atom_g_inc_noret>;
+  def ATOM_A_DEC_NORET     : BinAtomNoRet<IL_OP_UAV_DEC,
+      "_id($id)_arena", atom_g_dec_noret>;
+  def ATOM_A_CMPXCHG_NORET    : CmpXChgNoRet<IL_OP_UAV_CMP,
+      "_id($id)_arena", atom_g_cmpxchg_noret>;
+  }
+  def ATOM_L_ADD_NORET     : BinAtomNoRet<IL_OP_LDS_ADD,
+      "_id($id)", atom_l_add_noret>;
+  def ATOM_L_AND_NORET     : BinAtomNoRet<IL_OP_LDS_AND,
+      "_id($id)", atom_l_and_noret>;
+  def ATOM_L_MAX_NORET     : BinAtomNoRet<IL_OP_LDS_MAX,
+      "_id($id)", atom_l_max_noret>;
+  def ATOM_L_MIN_NORET     : BinAtomNoRet<IL_OP_LDS_MIN,
+      "_id($id)", atom_l_min_noret>;
+  def ATOM_L_UMAX_NORET    : BinAtomNoRet<IL_OP_LDS_UMAX,
+      "_id($id)", atom_l_umax_noret>;
+  def ATOM_L_UMIN_NORET    : BinAtomNoRet<IL_OP_LDS_UMIN,
+      "_id($id)", atom_l_umin_noret>;
+  def ATOM_L_MSKOR_NORET   : TriAtomNoRet<IL_OP_LDS_MSKOR,
+      "_id($id)", atom_l_mskor_noret>;
+  def ATOM_L_OR_NORET      : BinAtomNoRet<IL_OP_LDS_OR,
+      "_id($id)", atom_l_or_noret>;
+  def ATOM_L_RSUB_NORET    : BinAtomNoRet<IL_OP_LDS_RSUB,
+      "_id($id)", atom_l_rsub_noret>;
+  def ATOM_L_SUB_NORET     : BinAtomNoRet<IL_OP_LDS_SUB,
+      "_id($id)", atom_l_sub_noret>;
+  def ATOM_L_XOR_NORET     : BinAtomNoRet<IL_OP_LDS_XOR,
+      "_id($id)", atom_l_xor_noret>;
+  def ATOM_L_INC_NORET     : BinAtomNoRet<IL_OP_LDS_INC,
+      "_id($id)", atom_l_inc_noret>;
+  def ATOM_L_DEC_NORET     : BinAtomNoRet<IL_OP_LDS_DEC,
+      "_id($id)", atom_l_dec_noret>;
+  def ATOM_L_CMPXCHG_NORET    : TriAtomNoRet<IL_OP_LDS_CMP,
+      "_id($id)", atom_l_cmpxchg_noret>;
+  def ATOM_R_ADD_NORET     : BinAtomNoRet<IL_OP_GDS_ADD,
+      "_id($id)", atom_r_add_noret>;
+  def ATOM_R_AND_NORET     : BinAtomNoRet<IL_OP_GDS_AND,
+      "_id($id)", atom_r_and_noret>;
+  def ATOM_R_MAX_NORET     : BinAtomNoRet<IL_OP_GDS_MAX,
+      "_id($id)", atom_r_max_noret>;
+  def ATOM_R_MIN_NORET     : BinAtomNoRet<IL_OP_GDS_MIN,
+      "_id($id)", atom_r_min_noret>;
+  def ATOM_R_UMAX_NORET    : BinAtomNoRet<IL_OP_GDS_UMAX,
+      "_id($id)", atom_r_umax_noret>;
+  def ATOM_R_UMIN_NORET    : BinAtomNoRet<IL_OP_GDS_UMIN,
+      "_id($id)", atom_r_umin_noret>;
+  def ATOM_R_MSKOR_NORET   : TriAtomNoRet<IL_OP_GDS_MSKOR,
+      "_id($id)", atom_r_mskor_noret>;
+  def ATOM_R_OR_NORET      : BinAtomNoRet<IL_OP_GDS_OR,
+      "_id($id)", atom_r_or_noret>;
+  def ATOM_R_RSUB_NORET    : BinAtomNoRet<IL_OP_GDS_RSUB,
+      "_id($id)", atom_r_rsub_noret>;
+  def ATOM_R_SUB_NORET     : BinAtomNoRet<IL_OP_GDS_SUB,
+      "_id($id)", atom_r_sub_noret>;
+  def ATOM_R_XOR_NORET     : BinAtomNoRet<IL_OP_GDS_XOR,
+      "_id($id)", atom_r_xor_noret>;
+  def ATOM_R_INC_NORET     : BinAtomNoRet<IL_OP_GDS_INC,
+      "_id($id)", atom_r_inc_noret>;
+  def ATOM_R_DEC_NORET     : BinAtomNoRet<IL_OP_GDS_DEC,
+      "_id($id)", atom_r_dec_noret>;
+  def ATOM_R_CMPXCHG_NORET    : CmpXChgNoRet<IL_OP_GDS_CMP,
+      "_id($id)", atom_r_cmpxchg_noret>;
+  // All of the atomic functions that return
+  def ATOM_G_ADD     : BinAtom<IL_OP_UAV_READ_ADD,
+      "_id($id)", atom_g_add>;
+  def ATOM_G_AND     : BinAtom<IL_OP_UAV_READ_AND,
+      "_id($id)", atom_g_and>;
+  def ATOM_G_MAX     : BinAtom<IL_OP_UAV_READ_MAX,
+      "_id($id)", atom_g_max>;
+  def ATOM_G_MIN     : BinAtom<IL_OP_UAV_READ_MIN,
+      "_id($id)", atom_g_min>;
+  def ATOM_G_UMAX    : BinAtom<IL_OP_UAV_READ_UMAX,
+      "_id($id)", atom_g_umax>;
+  def ATOM_G_UMIN    : BinAtom<IL_OP_UAV_READ_UMIN,
+      "_id($id)", atom_g_umin>;
+  def ATOM_G_OR      : BinAtom<IL_OP_UAV_READ_OR,
+      "_id($id)", atom_g_or>;
+  def ATOM_G_RSUB    : BinAtom<IL_OP_UAV_READ_RSUB,
+      "_id($id)", atom_g_rsub>;
+  def ATOM_G_SUB     : BinAtom<IL_OP_UAV_READ_SUB,
+      "_id($id)", atom_g_sub>;
+  def ATOM_G_XOR     : BinAtom<IL_OP_UAV_READ_XOR,
+      "_id($id)", atom_g_xor>;
+  def ATOM_G_INC     : BinAtom<IL_OP_UAV_READ_INC,
+      "_id($id)", atom_g_inc>;
+  def ATOM_G_DEC     : BinAtom<IL_OP_UAV_READ_DEC,
+      "_id($id)", atom_g_dec>;
+  def ATOM_G_XCHG    : BinAtom<IL_OP_UAV_READ_XCHG,
+      "_id($id)", atom_g_xchg>;
+  def ATOM_G_CMPXCHG : CmpXChg<IL_OP_UAV_READ_CMPXCHG,
+      "_id($id)", atom_g_cmpxchg>;
+  // Arena atomic accesses
+  let Predicates = [ArenaSupport] in {
+  def ATOM_A_ADD     : BinAtom<IL_OP_UAV_READ_ADD,
+      "_id($id)_arena", atom_g_add>;
+  def ATOM_A_AND     : BinAtom<IL_OP_UAV_READ_AND,
+      "_id($id)_arena", atom_g_and>;
+  def ATOM_A_MAX     : BinAtom<IL_OP_UAV_READ_MAX,
+      "_id($id)_arena", atom_g_max>;
+  def ATOM_A_MIN     : BinAtom<IL_OP_UAV_READ_MIN,
+      "_id($id)_arena", atom_g_min>;
+  def ATOM_A_UMAX    : BinAtom<IL_OP_UAV_READ_UMAX,
+      "_id($id)_arena", atom_g_umax>;
+  def ATOM_A_UMIN    : BinAtom<IL_OP_UAV_READ_UMIN,
+      "_id($id)_arena", atom_g_umin>;
+  def ATOM_A_OR      : BinAtom<IL_OP_UAV_READ_OR,
+      "_id($id)_arena", atom_g_or>;
+  def ATOM_A_RSUB    : BinAtom<IL_OP_UAV_READ_RSUB,
+      "_id($id)_arena", atom_g_rsub>;
+  def ATOM_A_SUB     : BinAtom<IL_OP_UAV_READ_SUB,
+      "_id($id)_arena", atom_g_sub>;
+  def ATOM_A_XOR     : BinAtom<IL_OP_UAV_READ_XOR,
+      "_id($id)_arena", atom_g_xor>;
+  def ATOM_A_INC     : BinAtom<IL_OP_UAV_READ_INC,
+      "_id($id)_arena", atom_g_inc>;
+  def ATOM_A_DEC     : BinAtom<IL_OP_UAV_READ_DEC,
+      "_id($id)_arena", atom_g_dec>;
+  def ATOM_A_XCHG    : BinAtom<IL_OP_UAV_READ_XCHG,
+      "_id($id)_arena", atom_g_xchg>;
+  def ATOM_A_CMPXCHG : CmpXChg<IL_OP_UAV_READ_CMPXCHG,
+      "_id($id)_arena", atom_g_cmpxchg>;
+}
+  def ATOM_L_ADD     : BinAtom<IL_OP_LDS_READ_ADD,
+      "_id($id)", atom_l_add>;
+  def ATOM_L_AND     : BinAtom<IL_OP_LDS_READ_AND,
+      "_id($id)", atom_l_and>;
+  def ATOM_L_MAX     : BinAtom<IL_OP_LDS_READ_MAX,
+      "_id($id)", atom_l_max>;
+  def ATOM_L_MIN     : BinAtom<IL_OP_LDS_READ_MIN,
+      "_id($id)", atom_l_min>;
+  def ATOM_L_UMAX    : BinAtom<IL_OP_LDS_READ_UMAX,
+      "_id($id)", atom_l_umax>;
+  def ATOM_L_UMIN    : BinAtom<IL_OP_LDS_READ_UMIN,
+      "_id($id)", atom_l_umin>;
+  def ATOM_L_OR      : BinAtom<IL_OP_LDS_READ_OR,
+      "_id($id)", atom_l_or>;
+  def ATOM_L_MSKOR   : TriAtom<IL_OP_LDS_READ_MSKOR,
+      "_id($id)", atom_l_mskor>;
+  def ATOM_L_RSUB    : BinAtom<IL_OP_LDS_READ_RSUB,
+      "_id($id)", atom_l_rsub>;
+  def ATOM_L_SUB     : BinAtom<IL_OP_LDS_READ_SUB,
+      "_id($id)", atom_l_sub>;
+  def ATOM_L_XOR     : BinAtom<IL_OP_LDS_READ_XOR,
+      "_id($id)", atom_l_xor>;
+  def ATOM_L_INC     : BinAtom<IL_OP_LDS_READ_INC,
+      "_id($id)", atom_l_inc>;
+  def ATOM_L_DEC     : BinAtom<IL_OP_LDS_READ_DEC,
+      "_id($id)", atom_l_dec>;
+  def ATOM_L_XCHG    : BinAtom<IL_OP_LDS_READ_XCHG,
+      "_id($id)", atom_l_xchg>;
+  def ATOM_L_CMPXCHG : TriAtom<IL_OP_LDS_READ_CMPXCHG,
+      "_id($id)", atom_l_cmpxchg>;
+  def ATOM_R_ADD     : BinAtom<IL_OP_GDS_READ_ADD,
+      "_id($id)", atom_r_add>;
+  def ATOM_R_AND     : BinAtom<IL_OP_GDS_READ_AND,
+      "_id($id)", atom_r_and>;
+  def ATOM_R_MAX     : BinAtom<IL_OP_GDS_READ_MAX,
+      "_id($id)", atom_r_max>;
+  def ATOM_R_MIN     : BinAtom<IL_OP_GDS_READ_MIN,
+      "_id($id)", atom_r_min>;
+  def ATOM_R_UMAX    : BinAtom<IL_OP_GDS_READ_UMAX,
+      "_id($id)", atom_r_umax>;
+  def ATOM_R_UMIN    : BinAtom<IL_OP_GDS_READ_UMIN,
+      "_id($id)", atom_r_umin>;
+  def ATOM_R_OR      : BinAtom<IL_OP_GDS_READ_OR,
+      "_id($id)", atom_r_or>;
+  def ATOM_R_MSKOR   : TriAtom<IL_OP_GDS_READ_MSKOR,
+      "_id($id)", atom_r_mskor>;
+  def ATOM_R_RSUB    : BinAtom<IL_OP_GDS_READ_RSUB,
+      "_id($id)", atom_r_rsub>;
+  def ATOM_R_SUB     : BinAtom<IL_OP_GDS_READ_SUB,
+      "_id($id)", atom_r_sub>;
+  def ATOM_R_XOR     : BinAtom<IL_OP_GDS_READ_XOR,
+      "_id($id)", atom_r_xor>;
+  def ATOM_R_INC     : BinAtom<IL_OP_GDS_READ_INC,
+      "_id($id)", atom_r_inc>;
+  def ATOM_R_DEC     : BinAtom<IL_OP_GDS_READ_DEC,
+      "_id($id)", atom_r_dec>;
+  def ATOM_R_XCHG    : BinAtom<IL_OP_GDS_READ_XCHG,
+      "_id($id)", atom_r_xchg>;
+  def ATOM_R_CMPXCHG : CmpXChg<IL_OP_GDS_READ_CMPXCHG,
+      "_id($id)", atom_r_cmpxchg>;
+  // 64bit atomic operations.
+  def ATOM_G_ADD_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_ADD,
+      "_b64_id($id)", atom_g_add_noret>;
+  def ATOM_G_AND_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_AND,
+      "_b64_id($id)", atom_g_and_noret>;
+  def ATOM_G_MAX_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_MAX,
+      "_b64_id($id)", atom_g_max_noret>;
+  def ATOM_G_MIN_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_MIN,
+      "_b64_id($id)", atom_g_min_noret>;
+  def ATOM_G_UMAX_NORET_B64 : BinAtomNoRetI64<IL_OP_UAV_UMAX,
+      "_b64_id($id)", atom_g_umax_noret>;
+  def ATOM_G_UMIN_NORET_B64 : BinAtomNoRetI64<IL_OP_UAV_UMIN,
+      "_b64_id($id)", atom_g_umin_noret>;
+  def ATOM_G_OR_NORET_B64   : BinAtomNoRetI64<IL_OP_UAV_OR,
+      "_b64_id($id)", atom_g_or_noret>;
+  def ATOM_G_RSUB_NORET_B64 : BinAtomNoRetI64<IL_OP_UAV_RSUB,
+      "_b64_id($id)", atom_g_rsub_noret>;
+  def ATOM_G_SUB_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_SUB,
+      "_b64_id($id)", atom_g_sub_noret>;
+  def ATOM_G_XOR_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_XOR,
+      "_b64_id($id)", atom_g_xor_noret>;
+  def ATOM_G_INC_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_INC,
+      "_b64_id($id)", atom_g_inc_noret>;
+  def ATOM_G_DEC_NORET_B64  : BinAtomNoRetI64<IL_OP_UAV_DEC,
+      "_b64_id($id)", atom_g_dec_noret>;
+  def ATOM_G_CMPXCHG_NORET_B64 : CmpXChgNoRetI64<IL_OP_UAV_CMP,
+      "_b64_id($id)", atom_g_cmpxchg_noret>;
+  def ATOM_L_ADD_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_ADD,
+      "64_id($id)", atom_l_add_noret>;
+  def ATOM_L_AND_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_AND,
+      "64_id($id)", atom_l_and_noret>;
+  def ATOM_L_MAX_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_MAX,
+      "64_id($id)", atom_l_max_noret>;
+  def ATOM_L_MIN_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_MIN,
+      "64_id($id)", atom_l_min_noret>;
+  def ATOM_L_UMAX_NORET_B64 : BinAtomNoRetI64<IL_OP_LDS_UMAX,
+      "64_id($id)", atom_l_umax_noret>;
+  def ATOM_L_UMIN_NORET_B64 : BinAtomNoRetI64<IL_OP_LDS_UMIN,
+      "64_id($id)", atom_l_umin_noret>;
+  def ATOM_L_MSKOR_NORET_B64: TriAtomNoRetI64<IL_OP_LDS_MSKOR,
+      "64_id($id)", atom_l_mskor_noret>;
+  def ATOM_L_OR_NORET_B64   : BinAtomNoRetI64<IL_OP_LDS_OR,
+      "64_id($id)", atom_l_or_noret>;
+  def ATOM_L_RSUB_NORET_B64 : BinAtomNoRetI64<IL_OP_LDS_RSUB,
+      "64_id($id)", atom_l_rsub_noret>;
+  def ATOM_L_SUB_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_SUB,
+      "64_id($id)", atom_l_sub_noret>;
+  def ATOM_L_XOR_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_XOR,
+      "64_id($id)", atom_l_xor_noret>;
+  def ATOM_L_INC_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_INC,
+      "64_id($id)", atom_l_inc_noret>;
+  def ATOM_L_DEC_NORET_B64  : BinAtomNoRetI64<IL_OP_LDS_DEC,
+      "64_id($id)", atom_l_dec_noret>;
+  def ATOM_L_CMPXCHG_NORET_B64 : TriAtomNoRetI64<IL_OP_LDS_CMP,
+      "64_id($id)", atom_l_cmpxchg_noret>;
+  def ATOM_R_ADD_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_ADD,
+      "64_id($id)", atom_r_add_noret>;
+  def ATOM_R_AND_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_AND,
+      "64_id($id)", atom_r_and_noret>;
+  def ATOM_R_MAX_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_MAX,
+      "64_id($id)", atom_r_max_noret>;
+  def ATOM_R_MIN_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_MIN,
+      "64_id($id)", atom_r_min_noret>;
+  def ATOM_R_UMAX_NORET_B64 : BinAtomNoRetI64<IL_OP_GDS_UMAX,
+      "64_id($id)", atom_r_umax_noret>;
+  def ATOM_R_UMIN_NORET_B64 : BinAtomNoRetI64<IL_OP_GDS_UMIN,
+      "64_id($id)", atom_r_umin_noret>;
+  def ATOM_R_MSKOR_NORET_B64: TriAtomNoRetI64<IL_OP_GDS_MSKOR,
+      "64_id($id)", atom_r_mskor_noret>;
+  def ATOM_R_OR_NORET_B64   : BinAtomNoRetI64<IL_OP_GDS_OR,
+      "64_id($id)", atom_r_or_noret>;
+  def ATOM_R_RSUB_NORET_B64 : BinAtomNoRetI64<IL_OP_GDS_RSUB,
+      "64_id($id)", atom_r_rsub_noret>;
+  def ATOM_R_SUB_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_SUB,
+      "64_id($id)", atom_r_sub_noret>;
+  def ATOM_R_XOR_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_XOR,
+      "64_id($id)", atom_r_xor_noret>;
+  def ATOM_R_INC_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_INC,
+      "64_id($id)", atom_r_inc_noret>;
+  def ATOM_R_DEC_NORET_B64  : BinAtomNoRetI64<IL_OP_GDS_DEC,
+      "64_id($id)", atom_r_dec_noret>;
+  def ATOM_R_CMPXCHG_NORET_B64 : CmpXChgNoRetI64<IL_OP_GDS_CMP,
+      "64_id($id)", atom_r_cmpxchg_noret>;
+  // All of the atomic functions that return
+  def ATOM_G_ADD_B64     : BinAtomI64<IL_OP_UAV_READ_ADD,
+      "_b64_id($id)", atom_g_add>;
+  def ATOM_G_AND_B64     : BinAtomI64<IL_OP_UAV_READ_AND,
+      "_b64_id($id)", atom_g_and>;
+  def ATOM_G_MAX_B64     : BinAtomI64<IL_OP_UAV_READ_MAX,
+      "_b64_id($id)", atom_g_max>;
+  def ATOM_G_MIN_B64     : BinAtomI64<IL_OP_UAV_READ_MIN,
+      "_b64_id($id)", atom_g_min>;
+  def ATOM_G_UMAX_B64    : BinAtomI64<IL_OP_UAV_READ_UMAX,
+      "_b64_id($id)", atom_g_umax>;
+  def ATOM_G_UMIN_B64    : BinAtomI64<IL_OP_UAV_READ_UMIN,
+      "_b64_id($id)", atom_g_umin>;
+  def ATOM_G_OR_B64      : BinAtomI64<IL_OP_UAV_READ_OR,
+      "_b64_id($id)", atom_g_or>;
+  def ATOM_G_RSUB_B64    : BinAtomI64<IL_OP_UAV_READ_RSUB,
+      "_b64_id($id)", atom_g_rsub>;
+  def ATOM_G_SUB_B64     : BinAtomI64<IL_OP_UAV_READ_SUB,
+      "_b64_id($id)", atom_g_sub>;
+  def ATOM_G_XOR_B64     : BinAtomI64<IL_OP_UAV_READ_XOR,
+      "_b64_id($id)", atom_g_xor>;
+  def ATOM_G_INC_B64     : BinAtomI64<IL_OP_UAV_READ_INC,
+      "_b64_id($id)", atom_g_inc>;
+  def ATOM_G_DEC_B64     : BinAtomI64<IL_OP_UAV_READ_DEC,
+      "_b64_id($id)", atom_g_dec>;
+  def ATOM_G_XCHG_B64    : BinAtomI64<IL_OP_UAV_READ_XCHG,
+      "_b64_id($id)", atom_g_xchg>;
+  def ATOM_G_CMPXCHG_B64 : CmpXChgI64<IL_OP_UAV_READ_CMPXCHG,
+      "_b64_id($id)", atom_g_cmpxchg>;
+  def ATOM_L_ADD_B64     : BinAtomI64<IL_OP_LDS_READ_ADD,
+      "64_id($id)", atom_l_add>;
+  def ATOM_L_AND_B64     : BinAtomI64<IL_OP_LDS_READ_AND,
+      "64_id($id)", atom_l_and>;
+  def ATOM_L_MAX_B64     : BinAtomI64<IL_OP_LDS_READ_MAX,
+      "64_id($id)", atom_l_max>;
+  def ATOM_L_MIN_B64    : BinAtomI64<IL_OP_LDS_READ_MIN,
+      "64_id($id)", atom_l_min>;
+  def ATOM_L_UMAX_B64    : BinAtomI64<IL_OP_LDS_READ_UMAX,
+      "64_id($id)", atom_l_umax>;
+  def ATOM_L_UMIN_B64    : BinAtomI64<IL_OP_LDS_READ_UMIN,
+      "64_id($id)", atom_l_umin>;
+  def ATOM_L_OR_B64      : BinAtomI64<IL_OP_LDS_READ_OR,
+      "64_id($id)", atom_l_or>;
+  def ATOM_L_MSKOR_B64   : TriAtomI64<IL_OP_LDS_READ_MSKOR,
+      "64_id($id)", atom_l_mskor>;
+  def ATOM_L_RSUB_B64    : BinAtomI64<IL_OP_LDS_READ_RSUB,
+      "64_id($id)", atom_l_rsub>;
+  def ATOM_L_SUB_B64     : BinAtomI64<IL_OP_LDS_READ_SUB,
+      "64_id($id)", atom_l_sub>;
+  def ATOM_L_XOR_B64     : BinAtomI64<IL_OP_LDS_READ_XOR,
+      "64_id($id)", atom_l_xor>;
+  def ATOM_L_INC_B64     : BinAtomI64<IL_OP_LDS_READ_INC,
+      "64_id($id)", atom_l_inc>;
+  def ATOM_L_DEC_B64     : BinAtomI64<IL_OP_LDS_READ_DEC,
+      "64_id($id)", atom_l_dec>;
+  def ATOM_L_XCHG_B64    : BinAtomI64<IL_OP_LDS_READ_XCHG,
+      "64_id($id)", atom_l_xchg>;
+  def ATOM_L_CMPXCHG_B64 : TriAtomI64<IL_OP_LDS_READ_CMPXCHG,
+      "64_id($id)", atom_l_cmpxchg>;
+  def ATOM_R_ADD_B64     : BinAtomI64<IL_OP_GDS_READ_ADD,
+      "64_id($id)", atom_r_add>;
+  def ATOM_R_AND_B64     : BinAtomI64<IL_OP_GDS_READ_AND,
+      "64_id($id)", atom_r_and>;
+  def ATOM_R_MAX_B64     : BinAtomI64<IL_OP_GDS_READ_MAX,
+      "64_id($id)", atom_r_max>;
+  def ATOM_R_MIN_B64     : BinAtomI64<IL_OP_GDS_READ_MIN,
+      "64_id($id)", atom_r_min>;
+  def ATOM_R_UMAX_B64    : BinAtomI64<IL_OP_GDS_READ_UMAX,
+      "64_id($id)", atom_r_umax>;
+  def ATOM_R_UMIN_B64    : BinAtomI64<IL_OP_GDS_READ_UMIN,
+      "64_id($id)", atom_r_umin>;
+  def ATOM_R_OR_B64      : BinAtomI64<IL_OP_GDS_READ_OR,
+      "64_id($id)", atom_r_or>;
+  def ATOM_R_MSKOR_B64   : TriAtomI64<IL_OP_GDS_READ_MSKOR,
+      "64_id($id)", atom_r_mskor>;
+  def ATOM_R_RSUB_B64    : BinAtomI64<IL_OP_GDS_READ_RSUB,
+      "64_id($id)", atom_r_rsub>;
+  def ATOM_R_SUB_B64     : BinAtomI64<IL_OP_GDS_READ_SUB,
+      "64_id($id)", atom_r_sub>;
+  def ATOM_R_XOR_B64     : BinAtomI64<IL_OP_GDS_READ_XOR,
+      "64_id($id)", atom_r_xor>;
+  def ATOM_R_INC_B64     : BinAtomI64<IL_OP_GDS_READ_INC,
+      "64_id($id)", atom_r_inc>;
+  def ATOM_R_DEC_B64     : BinAtomI64<IL_OP_GDS_READ_DEC,
+      "64_id($id)", atom_r_dec>;
+  def ATOM_R_XCHG_B64    : BinAtomI64<IL_OP_GDS_READ_XCHG,
+      "64_id($id)", atom_r_xchg>;
+  def ATOM_R_CMPXCHG_B64 : CmpXChgI64<IL_OP_GDS_READ_CMPXCHG,
+      "64_id($id)", atom_r_cmpxchg>;
+
+  // atomic counter operations.
+  def APPEND_ALLOC : Append<IL_OP_APPEND_BUF_ALLOC,
+      "_id($id)", append_alloc>;
+  def APPEND_CONSUME : Append<IL_OP_APPEND_BUF_CONSUME,
+      "_id($id)", append_consume>;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem64.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem64.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem64.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,719 @@
+//===-- AMDILMem64.td -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Has64BitPtr] in {
+  let isCodeGenOnly=1 in {
+    //===----------------------------------------------------------------------===//
+    // Store Memory Operations
+    //===----------------------------------------------------------------------===//
+    defm GLOBALTRUNCSTORE64 : GTRUNCSTORE64<"!global trunc store">;
+    defm GLOBAL              : STORE64<"!global store"         , global_store>;
+    defm LOCALTRUNCSTORE64  : LTRUNCSTORE64<"!local trunc store">;
+    defm LOCAL               : STORE64<"!local store"          , local_store>;
+    defm PRIVATETRUNCSTORE64 : PTRUNCSTORE64<"!private trunc store">;
+    defm PRIVATE             : STORE64<"!private store"        , private_store>;
+    defm REGIONTRUNCSTORE64 : RTRUNCSTORE64<"!region trunc store">;
+    defm REGION              : STORE64<"!region hw store"      , region_store>;
+
+
+    //===---------------------------------------------------------------------===//
+    // Load Memory Operations
+    //===---------------------------------------------------------------------===//
+    defm GLOBAL       : LOAD64<"!global load"            , global_load>;
+    defm GLOBALZEXT   : LOAD64<"!global zext load"       , global_zext_load>;
+    defm GLOBALSEXT   : LOAD64<"!global sext load"       , global_sext_load>;
+    defm GLOBALAEXT   : LOAD64<"!global aext load"       , global_aext_load>;
+    defm PRIVATE      : LOAD64<"!private load"           , private_load>;
+    defm PRIVATEZEXT  : LOAD64<"!private zext load"      , private_zext_load>;
+    defm PRIVATESEXT  : LOAD64<"!private sext load"      , private_sext_load>;
+    defm PRIVATEAEXT  : LOAD64<"!private aext load"      , private_aext_load>;
+    defm CPOOL        : LOAD64<"!constant pool load"     , cp_load>;
+    defm CPOOLZEXT    : LOAD64<"!constant pool zext load", cp_zext_load>;
+    defm CPOOLSEXT    : LOAD64<"!constant pool sext load", cp_sext_load>;
+    defm CPOOLAEXT    : LOAD64<"!constant aext pool load", cp_aext_load>;
+    defm CONSTANT     : LOAD64<"!constant load"          , constant_load>;
+    defm CONSTANTZEXT : LOAD64<"!constant zext load"     , constant_zext_load>;
+    defm CONSTANTSEXT : LOAD64<"!constant sext load"     , constant_sext_load>;
+    defm CONSTANTAEXT : LOAD64<"!constant aext load"     , constant_aext_load>;
+    defm LOCAL        : LOAD64<"!local load"             , local_load>;
+    defm LOCALZEXT    : LOAD64<"!local zext load"        , local_zext_load>;
+    defm LOCALSEXT    : LOAD64<"!local sext load"        , local_sext_load>;
+    defm LOCALAEXT    : LOAD64<"!local aext load"        , local_aext_load>;
+    defm REGION       : LOAD64<"!region load"            , region_load>;
+    defm REGIONZEXT   : LOAD64<"!region zext load"       , region_zext_load>;
+    defm REGIONSEXT   : LOAD64<"!region sext load"       , region_sext_load>;
+    defm REGIONAEXT   : LOAD64<"!region aext load"       , region_aext_load>;
+  }
+  //===---------------------------------------------------------------------===//
+  // IO Expansion Load/Store Instructions
+  //===---------------------------------------------------------------------===//
+  // These opcode has custom swizzle patterns for some of the arguments.
+  let mayLoad = 1 in {
+    def UAVRAWLOAD64_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id) $dst, $addy"), []>;
+    def UAVRAWLOAD64_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id) $dst, $addy"), []>;
+    def UAVRAWLOAD64_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id) $dst, $addy"), []>;
+    def UAVRAWLOADCACHED64_i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id)_cached $dst, $addy"), []>;
+    def UAVRAWLOADCACHED64_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id)_cached $dst, $addy"), []>;
+    def UAVRAWLOADCACHED64_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id)_cached $dst, $addy"), []>;
+    def UAVRAWLOADCACHEDALIGNED64_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id)_cached_aligned $dst, $addy"), []>;
+    def UAVRAWLOADCACHEDALIGNED64_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_LOAD, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_LOAD.Text, "_addr(64)_id($id)_cached_aligned $dst, $addy"), []>;
+    let Predicates = [HasByteShortUAV] in {
+      def UAVRAWLOAD64_i8 : TwoInOneOut<IL_OP_UAV_BYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_BYTE_LOAD.Text, "_addr(64)_id($id) $dst, $addy"), []>;
+      def UAVRAWLOAD64_u8 : TwoInOneOut<IL_OP_UAV_UBYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_UBYTE_LOAD.Text, "_addr(64)_id($id) $dst, $addy"), []>;
+      def UAVRAWLOAD64_i16 : TwoInOneOut<IL_OP_UAV_SHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_SHORT_LOAD.Text, "_addr(64)_id($id) $dst, $addy"), []>;
+      def UAVRAWLOAD64_u16 : TwoInOneOut<IL_OP_UAV_USHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_USHORT_LOAD.Text, "_addr(64)_id($id) $dst, $addy"), []>;
+      def UAVRAWSTORE64_i8 : TwoInOneOut<IL_OP_UAV_BYTE_STORE, (outs GPRI32:$mem),
+          (ins GPRXI32:$addy, GPRI32:$src, i64imm:$id),
+          !strconcat(IL_OP_UAV_BYTE_STORE.Text, "_addr(64)_id($id) $mem, $addy, $src"), []>;
+      def UAVRAWSTORE64_i16 : TwoInOneOut<IL_OP_UAV_SHORT_STORE, (outs GPRI32:$mem),
+          (ins GPRXI32:$addy, GPRI32:$src, i64imm:$id),
+          !strconcat(IL_OP_UAV_SHORT_STORE.Text, "_addr(64)_id($id) $mem, $addy, $src"), []>;
+      def UAVRAWLOADCACHED64_i8 : TwoInOneOut<IL_OP_UAV_BYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_BYTE_LOAD.Text, "_addr(64)_id($id)_cached $dst, $addy"), []>;
+      def UAVRAWLOADCACHED64_u8 : TwoInOneOut<IL_OP_UAV_UBYTE_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_UBYTE_LOAD.Text, "_addr(64)_id($id)_cached $dst, $addy"), []>;
+      def UAVRAWLOADCACHED64_i16 : TwoInOneOut<IL_OP_UAV_SHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_SHORT_LOAD.Text, "_addr(64)_id($id)_cached $dst, $addy"), []>;
+      def UAVRAWLOADCACHED64_u16 : TwoInOneOut<IL_OP_UAV_USHORT_LOAD, (outs GPRI32:$dst),
+          (ins GPRXI32:$addy, i64imm:$id),
+          !strconcat(IL_OP_UAV_USHORT_LOAD.Text, "_addr(64)_id($id)_cached $dst, $addy"), []>;
+    }
+    def SCRATCHLOAD64 : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_MOV.Text, " $dst, x$id[$addy]"), []>;
+    def CBLOAD64 : TwoInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_MOV.Text, " $dst, cb$id[$addy]"), []>;
+    def GDSLOAD64 : TwoInOneOut<IL_OP_GDS_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_GDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD64_i8 : TwoInOneOut<IL_OP_GDS_LOAD_BYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_BYTE.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD64_u8 : TwoInOneOut<IL_OP_GDS_LOAD_UBYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_UBYTE.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD64_i16 : TwoInOneOut<IL_OP_GDS_LOAD_SHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_SHORT.Text, "_id($id) $dst, $addy"), []>;
+    def GDSLOAD64_u16 : TwoInOneOut<IL_OP_GDS_LOAD_USHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_GDS_LOAD_USHORT.Text, "_id($id) $dst, $addy"), []>;
+
+    def LDSLOADVEC64 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+    def LDSLOADVEC64_v2i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV2I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+    def LDSLOADVEC64_v4i32 : TwoInOneOut<IL_OP_LDS_LOAD_VEC, (outs GPRV4I32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_VEC.Text, "_id($id) $dst, $addy, $addy"), []>;
+    def LDSLOAD64 : TwoInOneOut<IL_OP_LDS_LOAD, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD64_i8 : TwoInOneOut<IL_OP_LDS_LOAD_BYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_BYTE.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD64_u8 : TwoInOneOut<IL_OP_LDS_LOAD_UBYTE, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_UBYTE.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD64_i16 : TwoInOneOut<IL_OP_LDS_LOAD_SHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_SHORT.Text, "_id($id) $dst, $addy"), []>;
+    def LDSLOAD64_u16 : TwoInOneOut<IL_OP_LDS_LOAD_USHORT, (outs GPRI32:$dst),
+        (ins GPRXI32:$addy, i64imm:$id),
+        !strconcat(IL_OP_LDS_LOAD_USHORT.Text, "_id($id) $dst, $addy"), []>;
+
+  }
+  let mayStore = 1 in {
+    def UAVRAWSTORE64_i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRI32:$mem),
+        (ins GPRXI32:$addy, GPRI32:$src, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_STORE.Text, "_addr(64)_id($id) $mem, $addy, $src"), []>;
+    def UAVRAWSTORE64_v2i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV2I32:$mem),
+        (ins GPRXI32:$addy, GPRV2I32:$src, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_STORE.Text, "_addr(64)_id($id) $mem, $addy, $src"), []>;
+    def UAVRAWSTORE64_v4i32 : TwoInOneOut<IL_OP_RAW_UAV_STORE, (outs GPRV4I32:$mem),
+        (ins GPRXI32:$addy, GPRV4I32:$src, i64imm:$id),
+        !strconcat(IL_OP_RAW_UAV_STORE.Text, "_addr(64)_id($id) $mem, $addy, $src"), []>;
+    def SCRATCHSTORE64 : TwoInOneOut<IL_OP_MOV, (outs GPRXI32:$addy),
+        (ins GPRV4I32:$data, i64imm:$id),
+        !strconcat(IL_OP_MOV.Text, " x$id[$addy, $data"), []>;
+    def GDSSTORE64 : TwoInOneOut<IL_OP_GDS_STORE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i64imm:$id),
+        !strconcat(IL_OP_GDS_STORE.Text, "_id($id) $addy, $src"), []>;
+    def LDSSTOREVEC64 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRI32:$mem),
+        (ins GPRXI32:$addy, GPRI32:$src, i64imm:$id),
+        !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+    def LDSSTOREVEC64_v2i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV2I32:$mem),
+        (ins GPRXI32:$addy, GPRV2I32:$src, i64imm:$id),
+        !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+    def LDSSTOREVEC64_v4i32 : ThreeInOneOut<IL_OP_LDS_STORE_VEC, (outs GPRV4I32:$mem),
+        (ins GPRXI32:$addy, GPRV4I32:$src, i64imm:$id),
+        !strconcat(IL_OP_LDS_STORE_VEC.Text, "_id($id) $mem, $addy, $src, $src"), []>;
+    def LDSSTORE64 : TwoInOneOut<IL_OP_LDS_STORE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i64imm:$id),
+        !strconcat(IL_OP_LDS_STORE.Text, "_id($id) $addy, $src"), []>;
+    def LDSSTORE64_i8 : TwoInOneOut<IL_OP_LDS_STORE_BYTE, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i64imm:$id),
+        !strconcat(IL_OP_LDS_STORE_BYTE.Text, "_id($id) $addy, $src"), []>;
+    def LDSSTORE64_i16 : TwoInOneOut<IL_OP_LDS_STORE_SHORT, (outs GPRXI32:$addy),
+        (ins GPRI32:$src, i64imm:$id),
+        !strconcat(IL_OP_LDS_STORE_SHORT.Text, "_id($id) $addy, $src"), []>;
+  }
+  // Image related operations.
+  def IMAGE1D64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DA64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_array_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DB64_TXLD : ILFormat<IL_OP_LOAD, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$addy),
+      !strconcat(IL_OP_LOAD.Text, 
+          "_id($ptr) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_buffer_load ADDR64:$ptr, GPRI32:$addy))]>;
+  def IMAGE1DB64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text, 
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_buffer_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE2D64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE2DA64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_array_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE3D64_READ : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(normalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image3d_read_norm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1D64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DA64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_array_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1DB64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text, 
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image1d_buffer_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE2D64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE2DA64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image2d_array_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE3D64_READ_UNNORM : ILFormat<IL_OP_SAMPLE, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr, GPRI32:$sampler, GPRV4F32:$addy),
+      !strconcat(IL_OP_SAMPLE.Text,
+          "_id($ptr)_sampler($sampler)_coordtype(unnormalized) $dst, $addy"),
+      [(set GPRV4I32:$dst,
+          (int_AMDIL_image3d_read_unnorm ADDR64:$ptr, GPRI32:$sampler, GPRV4F32:$addy))]>;
+  def IMAGE1D64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_info0 ADDR64:$ptr))]>;
+  def IMAGE1D64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_info1 ADDR64:$ptr))]>;
+  def IMAGE1DA64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info0 ADDR64:$ptr))]>;
+  def IMAGE1DA64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_array_info1 ADDR64:$ptr))]>;
+  def IMAGE1DB64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_buffer_info0 ADDR64:$ptr))]>;
+  def IMAGE1DB64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image1d_buffer_info1 ADDR64:$ptr))]>;
+  def IMAGE2DA64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info0 ADDR64:$ptr))]>;
+  def IMAGE2DA64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_array_info1 ADDR64:$ptr))]>;
+  def IMAGE2D64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_info0 ADDR64:$ptr))]>;
+  def IMAGE2D64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image2d_info1 ADDR64:$ptr))]>;
+  def IMAGE3D64_INFO0 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image3d_info0 ADDR64:$ptr))]>;
+  def IMAGE3D64_INFO1 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+      (ins MEM6464:$ptr),
+      !strconcat(IL_OP_MOV.Text, " $dst, $ptr"),
+      [(set GPRV4I32:$dst, (int_AMDIL_image3d_info1 ADDR64:$ptr))]>;
+  def IMAGE1D64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM6464:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image1d_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE1DA64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM6464:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image1d_array_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE1DB64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM6464:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image1d_buffer_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE2D64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM6464:$ptr, GPRV2I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image2d_write ADDR64:$ptr, GPRV2I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE2DA64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM6464:$ptr, GPRV4I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image2d_array_write ADDR64:$ptr, GPRV4I32:$addy, GPRV4I32:$data)]>;
+  def IMAGE3D64_WRITE : ILFormat<IL_OP_UAV_STORE, (outs),
+      (ins MEM6464:$ptr, GPRV4I32:$addy, GPRV4I32:$data),
+      !strconcat(IL_OP_UAV_STORE.Text,
+          "_id($ptr) $addy, $data"),
+      [(int_AMDIL_image3d_write ADDR64:$ptr, GPRV4I32:$addy, GPRV4I32:$data)]>;
+
+
+  // Printf related operations.
+  let isCall=1, isAsCheapAsAMove = 1 in {
+    def GET_PRINTF_OFFSET_i64: ILFormat<IL_OP_MOV, (outs GPRXYI64:$dst),
+        (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[8].zw"),
+        [(set GPRXYI64:$dst, (int_AMDIL_get_printf_offset))]>;
+    def GET_PRINTF_SIZE_i64 : ILFormat<IL_OP_MOV, (outs GPRXYI64:$dst),
+        (ins), !strconcat(IL_OP_MOV.Text, " $dst, cb0[9].x0"),
+        [(set GPRXYI64:$dst, (int_AMDIL_get_printf_size))]>;
+  }
+    // Atomic operations
+  def ATOM64_G_ADD_NORET     : BinAtomNoRet64<IL_OP_UAV_ADD,
+      "_addr(64)_id($id)", atom_g_add_noret>;
+  def ATOM64_G_AND_NORET     : BinAtomNoRet64<IL_OP_UAV_AND,
+      "_addr(64)_id($id)", atom_g_and_noret>;
+  def ATOM64_G_MAX_NORET     : BinAtomNoRet64<IL_OP_UAV_MAX,
+      "_addr(64)_id($id)", atom_g_max_noret>;
+  def ATOM64_G_MIN_NORET     : BinAtomNoRet64<IL_OP_UAV_MIN,
+      "_addr(64)_id($id)", atom_g_min_noret>;
+  def ATOM64_G_UMAX_NORET    : BinAtomNoRet64<IL_OP_UAV_UMAX,
+      "_addr(64)_id($id)", atom_g_umax_noret>;
+  def ATOM64_G_UMIN_NORET    : BinAtomNoRet64<IL_OP_UAV_UMIN,
+      "_addr(64)_id($id)", atom_g_umin_noret>;
+  def ATOM64_G_OR_NORET      : BinAtomNoRet64<IL_OP_UAV_OR,
+      "_addr(64)_id($id)", atom_g_or_noret>;
+  def ATOM64_G_RSUB_NORET    : BinAtomNoRet64<IL_OP_UAV_RSUB,
+      "_addr(64)_id($id)", atom_g_rsub_noret>;
+  def ATOM64_G_SUB_NORET     : BinAtomNoRet64<IL_OP_UAV_SUB,
+      "_addr(64)_id($id)", atom_g_sub_noret>;
+  def ATOM64_G_XOR_NORET     : BinAtomNoRet64<IL_OP_UAV_XOR,
+      "_addr(64)_id($id)", atom_g_xor_noret>;
+  def ATOM64_G_INC_NORET     : BinAtomNoRet64<IL_OP_UAV_INC,
+      "_addr(64)_id($id)", atom_g_inc_noret>;
+  def ATOM64_G_DEC_NORET     : BinAtomNoRet64<IL_OP_UAV_DEC,
+      "_addr(64)_id($id)", atom_g_dec_noret>;
+  def ATOM64_G_CMPXCHG_NORET    : CmpXChgNoRet64<IL_OP_UAV_CMP,
+      "_addr(64)_id($id)", atom_g_cmpxchg_noret>;
+  def ATOM64_L_ADD_NORET     : BinAtomNoRet64<IL_OP_LDS_ADD,
+      "_id($id)", atom_l_add_noret>;
+  def ATOM64_L_AND_NORET     : BinAtomNoRet64<IL_OP_LDS_AND,
+      "_id($id)", atom_l_and_noret>;
+  def ATOM64_L_MAX_NORET     : BinAtomNoRet64<IL_OP_LDS_MAX,
+      "_id($id)", atom_l_max_noret>;
+  def ATOM64_L_MIN_NORET     : BinAtomNoRet64<IL_OP_LDS_MIN,
+      "_id($id)", atom_l_min_noret>;
+  def ATOM64_L_UMAX_NORET    : BinAtomNoRet64<IL_OP_LDS_UMAX,
+      "_id($id)", atom_l_umax_noret>;
+  def ATOM64_L_UMIN_NORET    : BinAtomNoRet64<IL_OP_LDS_UMIN,
+      "_id($id)", atom_l_umin_noret>;
+  def ATOM64_L_MSKOR_NORET   : TriAtomNoRet64<IL_OP_LDS_MSKOR,
+      "_id($id)", atom_l_mskor_noret>;
+  def ATOM64_L_OR_NORET      : BinAtomNoRet64<IL_OP_LDS_OR,
+      "_id($id)", atom_l_or_noret>;
+  def ATOM64_L_RSUB_NORET    : BinAtomNoRet64<IL_OP_LDS_RSUB,
+      "_id($id)", atom_l_rsub_noret>;
+  def ATOM64_L_SUB_NORET     : BinAtomNoRet64<IL_OP_LDS_SUB,
+      "_id($id)", atom_l_sub_noret>;
+  def ATOM64_L_XOR_NORET     : BinAtomNoRet64<IL_OP_LDS_XOR,
+      "_id($id)", atom_l_xor_noret>;
+  def ATOM64_L_INC_NORET     : BinAtomNoRet64<IL_OP_LDS_INC,
+      "_id($id)", atom_l_inc_noret>;
+  def ATOM64_L_DEC_NORET     : BinAtomNoRet64<IL_OP_LDS_DEC,
+      "_id($id)", atom_l_dec_noret>;
+  def ATOM64_L_CMPXCHG_NORET    : TriAtomNoRet64<IL_OP_LDS_CMP,
+      "_id($id)", atom_l_cmpxchg_noret>;
+  def ATOM64_R_ADD_NORET     : BinAtomNoRet64<IL_OP_GDS_ADD,
+      "_id($id)", atom_r_add_noret>;
+  def ATOM64_R_AND_NORET     : BinAtomNoRet64<IL_OP_GDS_AND,
+      "_id($id)", atom_r_and_noret>;
+  def ATOM64_R_MAX_NORET     : BinAtomNoRet64<IL_OP_GDS_MAX,
+      "_id($id)", atom_r_max_noret>;
+  def ATOM64_R_MIN_NORET     : BinAtomNoRet64<IL_OP_GDS_MIN,
+      "_id($id)", atom_r_min_noret>;
+  def ATOM64_R_UMAX_NORET    : BinAtomNoRet64<IL_OP_GDS_UMAX,
+      "_id($id)", atom_r_umax_noret>;
+  def ATOM64_R_UMIN_NORET    : BinAtomNoRet64<IL_OP_GDS_UMIN,
+      "_id($id)", atom_r_umin_noret>;
+  def ATOM64_R_MSKOR_NORET   : TriAtomNoRet64<IL_OP_GDS_MSKOR,
+      "_id($id)", atom_r_mskor_noret>;
+  def ATOM64_R_OR_NORET      : BinAtomNoRet64<IL_OP_GDS_OR,
+      "_id($id)", atom_r_or_noret>;
+  def ATOM64_R_RSUB_NORET    : BinAtomNoRet64<IL_OP_GDS_RSUB,
+      "_id($id)", atom_r_rsub_noret>;
+  def ATOM64_R_SUB_NORET     : BinAtomNoRet64<IL_OP_GDS_SUB,
+      "_id($id)", atom_r_sub_noret>;
+  def ATOM64_R_XOR_NORET     : BinAtomNoRet64<IL_OP_GDS_XOR,
+      "_id($id)", atom_r_xor_noret>;
+  def ATOM64_R_INC_NORET     : BinAtomNoRet64<IL_OP_GDS_INC,
+      "_id($id)", atom_r_inc_noret>;
+  def ATOM64_R_DEC_NORET     : BinAtomNoRet64<IL_OP_GDS_DEC,
+      "_id($id)", atom_r_dec_noret>;
+  def ATOM64_R_CMPXCHG_NORET    : CmpXChgNoRet64<IL_OP_GDS_CMP,
+      "_id($id)", atom_r_cmpxchg_noret>;
+  // All of the atomic functions that return
+  def ATOM64_G_ADD     : BinAtom64<IL_OP_UAV_READ_ADD,
+      "_addr(64)_id($id)", atom_g_add>;
+  def ATOM64_G_AND     : BinAtom64<IL_OP_UAV_READ_AND,
+      "_addr(64)_id($id)", atom_g_and>;
+  def ATOM64_G_MAX     : BinAtom64<IL_OP_UAV_READ_MAX,
+      "_addr(64)_id($id)", atom_g_max>;
+  def ATOM64_G_MIN     : BinAtom64<IL_OP_UAV_READ_MIN,
+      "_addr(64)_id($id)", atom_g_min>;
+  def ATOM64_G_UMAX    : BinAtom64<IL_OP_UAV_READ_UMAX,
+      "_addr(64)_id($id)", atom_g_umax>;
+  def ATOM64_G_UMIN    : BinAtom64<IL_OP_UAV_READ_UMIN,
+      "_addr(64)_id($id)", atom_g_umin>;
+  def ATOM64_G_OR      : BinAtom64<IL_OP_UAV_READ_OR,
+      "_addr(64)_id($id)", atom_g_or>;
+  def ATOM64_G_RSUB    : BinAtom64<IL_OP_UAV_READ_RSUB,
+      "_addr(64)_id($id)", atom_g_rsub>;
+  def ATOM64_G_SUB     : BinAtom64<IL_OP_UAV_READ_SUB,
+      "_addr(64)_id($id)", atom_g_sub>;
+  def ATOM64_G_XOR     : BinAtom64<IL_OP_UAV_READ_XOR,
+      "_addr(64)_id($id)", atom_g_xor>;
+  def ATOM64_G_INC     : BinAtom64<IL_OP_UAV_READ_INC,
+      "_addr(64)_id($id)", atom_g_inc>;
+  def ATOM64_G_DEC     : BinAtom64<IL_OP_UAV_READ_DEC,
+      "_addr(64)_id($id)", atom_g_dec>;
+  def ATOM64_G_XCHG    : BinAtom64<IL_OP_UAV_READ_XCHG,
+      "_addr(64)_id($id)", atom_g_xchg>;
+  def ATOM64_G_CMPXCHG : CmpXChg64<IL_OP_UAV_READ_CMPXCHG,
+      "_addr(64)_id($id)", atom_g_cmpxchg>;
+  def ATOM64_L_ADD     : BinAtom64<IL_OP_LDS_READ_ADD,
+      "_id($id)", atom_l_add>;
+  def ATOM64_L_AND     : BinAtom64<IL_OP_LDS_READ_AND,
+      "_id($id)", atom_l_and>;
+  def ATOM64_L_MAX     : BinAtom64<IL_OP_LDS_READ_MAX,
+      "_id($id)", atom_l_max>;
+  def ATOM64_L_MIN     : BinAtom64<IL_OP_LDS_READ_MIN,
+      "_id($id)", atom_l_min>;
+  def ATOM64_L_UMAX    : BinAtom64<IL_OP_LDS_READ_UMAX,
+      "_id($id)", atom_l_umax>;
+  def ATOM64_L_UMIN    : BinAtom64<IL_OP_LDS_READ_UMIN,
+      "_id($id)", atom_l_umin>;
+  def ATOM64_L_OR      : BinAtom64<IL_OP_LDS_READ_OR,
+      "_id($id)", atom_l_or>;
+  def ATOM64_L_MSKOR   : TriAtom64<IL_OP_LDS_READ_MSKOR,
+      "_id($id)", atom_l_mskor>;
+  def ATOM64_L_RSUB    : BinAtom64<IL_OP_LDS_READ_RSUB,
+      "_id($id)", atom_l_rsub>;
+  def ATOM64_L_SUB     : BinAtom64<IL_OP_LDS_READ_SUB,
+      "_id($id)", atom_l_sub>;
+  def ATOM64_L_XOR     : BinAtom64<IL_OP_LDS_READ_XOR,
+      "_id($id)", atom_l_xor>;
+  def ATOM64_L_INC     : BinAtom64<IL_OP_LDS_READ_INC,
+      "_id($id)", atom_l_inc>;
+  def ATOM64_L_DEC     : BinAtom64<IL_OP_LDS_READ_DEC,
+      "_id($id)", atom_l_dec>;
+  def ATOM64_L_XCHG    : BinAtom64<IL_OP_LDS_READ_XCHG,
+      "_id($id)", atom_l_xchg>;
+  def ATOM64_L_CMPXCHG : TriAtom64<IL_OP_LDS_READ_CMPXCHG,
+      "_id($id)", atom_l_cmpxchg>;
+  def ATOM64_R_ADD     : BinAtom64<IL_OP_GDS_READ_ADD,
+      "_id($id)", atom_r_add>;
+  def ATOM64_R_AND     : BinAtom64<IL_OP_GDS_READ_AND,
+      "_id($id)", atom_r_and>;
+  def ATOM64_R_MAX     : BinAtom64<IL_OP_GDS_READ_MAX,
+      "_id($id)", atom_r_max>;
+  def ATOM64_R_MIN     : BinAtom64<IL_OP_GDS_READ_MIN,
+      "_id($id)", atom_r_min>;
+  def ATOM64_R_UMAX    : BinAtom64<IL_OP_GDS_READ_UMAX,
+      "_id($id)", atom_r_umax>;
+  def ATOM64_R_UMIN    : BinAtom64<IL_OP_GDS_READ_UMIN,
+      "_id($id)", atom_r_umin>;
+  def ATOM64_R_OR      : BinAtom64<IL_OP_GDS_READ_OR,
+      "_id($id)", atom_r_or>;
+  def ATOM64_R_MSKOR   : TriAtom64<IL_OP_GDS_READ_MSKOR,
+      "_id($id)", atom_r_mskor>;
+  def ATOM64_R_RSUB    : BinAtom64<IL_OP_GDS_READ_RSUB,
+      "_id($id)", atom_r_rsub>;
+  def ATOM64_R_SUB     : BinAtom64<IL_OP_GDS_READ_SUB,
+      "_id($id)", atom_r_sub>;
+  def ATOM64_R_XOR     : BinAtom64<IL_OP_GDS_READ_XOR,
+      "_id($id)", atom_r_xor>;
+  def ATOM64_R_INC     : BinAtom64<IL_OP_GDS_READ_INC,
+      "_id($id)", atom_r_inc>;
+  def ATOM64_R_DEC     : BinAtom64<IL_OP_GDS_READ_DEC,
+      "_id($id)", atom_r_dec>;
+  def ATOM64_R_XCHG    : BinAtom64<IL_OP_GDS_READ_XCHG,
+      "_id($id)", atom_r_xchg>;
+  def ATOM64_R_CMPXCHG : CmpXChg64<IL_OP_GDS_READ_CMPXCHG,
+      "_id($id)", atom_r_cmpxchg>;
+  // 64bit atomic operations.
+  def ATOM64_G_ADD_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_ADD,
+      "_b64_addr(64)_id($id)", atom_g_add_noret>;
+  def ATOM64_G_AND_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_AND,
+      "_b64_addr(64)_id($id)", atom_g_and_noret>;
+  def ATOM64_G_MAX_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_MAX,
+      "_b64_addr(64)_id($id)", atom_g_max_noret>;
+  def ATOM64_G_MIN_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_MIN,
+      "_b64_addr(64)_id($id)", atom_g_min_noret>;
+  def ATOM64_G_UMAX_NORET_B64 : BinAtomNoRet64I64<IL_OP_UAV_UMAX,
+      "_b64_addr(64)_id($id)", atom_g_umax_noret>;
+  def ATOM64_G_UMIN_NORET_B64 : BinAtomNoRet64I64<IL_OP_UAV_UMIN,
+      "_b64_addr(64)_id($id)", atom_g_umin_noret>;
+  def ATOM64_G_OR_NORET_B64   : BinAtomNoRet64I64<IL_OP_UAV_OR,
+      "_b64_addr(64)_id($id)", atom_g_or_noret>;
+  def ATOM64_G_RSUB_NORET_B64 : BinAtomNoRet64I64<IL_OP_UAV_RSUB,
+      "_b64_addr(64)_id($id)", atom_g_rsub_noret>;
+  def ATOM64_G_SUB_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_SUB,
+      "_b64_addr(64)_id($id)", atom_g_sub_noret>;
+  def ATOM64_G_XOR_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_XOR,
+      "_b64_addr(64)_id($id)", atom_g_xor_noret>;
+  def ATOM64_G_INC_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_INC,
+      "_b64_addr(64)_id($id)", atom_g_inc_noret>;
+  def ATOM64_G_DEC_NORET_B64  : BinAtomNoRet64I64<IL_OP_UAV_DEC,
+      "_b64_addr(64)_id($id)", atom_g_dec_noret>;
+  def ATOM64_G_CMPXCHG_NORET_B64 : CmpXChgNoRet64I64<IL_OP_UAV_CMP,
+      "_b64_addr(64)_id($id)", atom_g_cmpxchg_noret>;
+  def ATOM64_G_ADD_B64     : BinAtom64I64<IL_OP_UAV_READ_ADD,
+      "_b64_addr(64)_id($id)", atom_g_add>;
+  def ATOM64_G_AND_B64     : BinAtom64I64<IL_OP_UAV_READ_AND,
+      "_b64_addr(64)_id($id)", atom_g_and>;
+  def ATOM64_G_MAX_B64     : BinAtom64I64<IL_OP_UAV_READ_MAX,
+      "_b64_addr(64)_id($id)", atom_g_max>;
+  def ATOM64_G_MIN_B64     : BinAtom64I64<IL_OP_UAV_READ_MIN,
+      "_b64_addr(64)_id($id)", atom_g_min>;
+  def ATOM64_G_UMAX_B64    : BinAtom64I64<IL_OP_UAV_READ_UMAX,
+      "_b64_addr(64)_id($id)", atom_g_umax>;
+  def ATOM64_G_UMIN_B64    : BinAtom64I64<IL_OP_UAV_READ_UMIN,
+      "_b64_addr(64)_id($id)", atom_g_umin>;
+  def ATOM64_G_OR_B64      : BinAtom64I64<IL_OP_UAV_READ_OR,
+      "_b64_addr(64)_id($id)", atom_g_or>;
+  def ATOM64_G_RSUB_B64    : BinAtom64I64<IL_OP_UAV_READ_RSUB,
+      "_b64_addr(64)_id($id)", atom_g_rsub>;
+  def ATOM64_G_SUB_B64     : BinAtom64I64<IL_OP_UAV_READ_SUB,
+      "_b64_addr(64)_id($id)", atom_g_sub>;
+  def ATOM64_G_XOR_B64     : BinAtom64I64<IL_OP_UAV_READ_XOR,
+      "_b64_addr(64)_id($id)", atom_g_xor>;
+  def ATOM64_G_INC_B64     : BinAtom64I64<IL_OP_UAV_READ_INC,
+      "_b64_addr(64)_id($id)", atom_g_inc>;
+  def ATOM64_G_DEC_B64     : BinAtom64I64<IL_OP_UAV_READ_DEC,
+      "_b64_addr(64)_id($id)", atom_g_dec>;
+  def ATOM64_G_XCHG_B64    : BinAtom64I64<IL_OP_UAV_READ_XCHG,
+      "_b64_addr(64)_id($id)", atom_g_xchg>;
+  def ATOM64_G_CMPXCHG_B64 : CmpXChg64I64<IL_OP_UAV_READ_CMPXCHG,
+      "_b64_addr(64)_id($id)", atom_g_cmpxchg>;
+  def ATOM64_L_ADD_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_ADD,
+      "64_id($id)", atom_l_add_noret>;
+  def ATOM64_L_AND_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_AND,
+      "64_id($id)", atom_l_and_noret>;
+  def ATOM64_L_MAX_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_MAX,
+      "64_id($id)", atom_l_max_noret>;
+  def ATOM64_L_MIN_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_MIN,
+      "64_id($id)", atom_l_min_noret>;
+  def ATOM64_L_UMAX_NORET_B64 : BinAtomNoRet64I64<IL_OP_LDS_UMAX,
+      "64_id($id)", atom_l_umax_noret>;
+  def ATOM64_L_UMIN_NORET_B64 : BinAtomNoRet64I64<IL_OP_LDS_UMIN,
+      "64_id($id)", atom_l_umin_noret>;
+  def ATOM64_L_MSKOR_NORET_B64: TriAtomNoRet64I64<IL_OP_LDS_MSKOR,
+      "64_id($id)", atom_l_mskor_noret>;
+  def ATOM64_L_OR_NORET_B64   : BinAtomNoRet64I64<IL_OP_LDS_OR,
+      "64_id($id)", atom_l_or_noret>;
+  def ATOM64_L_RSUB_NORET_B64 : BinAtomNoRet64I64<IL_OP_LDS_RSUB,
+      "64_id($id)", atom_l_rsub_noret>;
+  def ATOM64_L_SUB_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_SUB,
+      "64_id($id)", atom_l_sub_noret>;
+  def ATOM64_L_XOR_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_XOR,
+      "64_id($id)", atom_l_xor_noret>;
+  def ATOM64_L_INC_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_INC,
+      "64_id($id)", atom_l_inc_noret>;
+  def ATOM64_L_DEC_NORET_B64  : BinAtomNoRet64I64<IL_OP_LDS_DEC,
+      "64_id($id)", atom_l_dec_noret>;
+  def ATOM64_L_CMPXCHG_NORET_B64 : TriAtomNoRet64I64<IL_OP_LDS_CMP,
+      "64_id($id)", atom_l_cmpxchg_noret>;
+  def ATOM64_R_ADD_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_ADD,
+      "64_id($id)", atom_r_add_noret>;
+  def ATOM64_R_AND_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_AND,
+      "64_id($id)", atom_r_and_noret>;
+  def ATOM64_R_MAX_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_MAX,
+      "64_id($id)", atom_r_max_noret>;
+  def ATOM64_R_MIN_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_MIN,
+      "64_id($id)", atom_r_min_noret>;
+  def ATOM64_R_UMAX_NORET_B64 : BinAtomNoRet64I64<IL_OP_GDS_UMAX,
+      "64_id($id)", atom_r_umax_noret>;
+  def ATOM64_R_UMIN_NORET_B64 : BinAtomNoRet64I64<IL_OP_GDS_UMIN,
+      "64_id($id)", atom_r_umin_noret>;
+  def ATOM64_R_MSKOR_NORET_B64: TriAtomNoRet64I64<IL_OP_GDS_MSKOR,
+      "64_id($id)", atom_r_mskor_noret>;
+  def ATOM64_R_OR_NORET_B64   : BinAtomNoRet64I64<IL_OP_GDS_OR,
+      "64_id($id)", atom_r_or_noret>;
+  def ATOM64_R_RSUB_NORET_B64 : BinAtomNoRet64I64<IL_OP_GDS_RSUB,
+      "64_id($id)", atom_r_rsub_noret>;
+  def ATOM64_R_SUB_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_SUB,
+      "64_id($id)", atom_r_sub_noret>;
+  def ATOM64_R_XOR_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_XOR,
+      "64_id($id)", atom_r_xor_noret>;
+  def ATOM64_R_INC_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_INC,
+      "64_id($id)", atom_r_inc_noret>;
+  def ATOM64_R_DEC_NORET_B64  : BinAtomNoRet64I64<IL_OP_GDS_DEC,
+      "64_id($id)", atom_r_dec_noret>;
+  def ATOM64_R_CMPXCHG_NORET_B64 : CmpXChgNoRet64I64<IL_OP_GDS_CMP,
+      "64_id($id)", atom_r_cmpxchg_noret>;
+  // All of the atomic functions that return
+  def ATOM64_L_ADD_B64     : BinAtom64I64<IL_OP_LDS_READ_ADD,
+      "64_id($id)", atom_l_add>;
+  def ATOM64_L_AND_B64     : BinAtom64I64<IL_OP_LDS_READ_AND,
+      "64_id($id)", atom_l_and>;
+  def ATOM64_L_MAX_B64     : BinAtom64I64<IL_OP_LDS_READ_MAX,
+      "64_id($id)", atom_l_max>;
+  def ATOM64_L_MIN_B64    : BinAtom64I64<IL_OP_LDS_READ_MIN,
+      "64_id($id)", atom_l_min>;
+  def ATOM64_L_UMAX_B64    : BinAtom64I64<IL_OP_LDS_READ_UMAX,
+      "64_id($id)", atom_l_umax>;
+  def ATOM64_L_UMIN_B64    : BinAtom64I64<IL_OP_LDS_READ_UMIN,
+      "64_id($id)", atom_l_umin>;
+  def ATOM64_L_OR_B64      : BinAtom64I64<IL_OP_LDS_READ_OR,
+      "64_id($id)", atom_l_or>;
+  def ATOM64_L_MSKOR_B64   : TriAtom64I64<IL_OP_LDS_READ_MSKOR,
+      "64_id($id)", atom_l_mskor>;
+  def ATOM64_L_RSUB_B64    : BinAtom64I64<IL_OP_LDS_READ_RSUB,
+      "64_id($id)", atom_l_rsub>;
+  def ATOM64_L_SUB_B64     : BinAtom64I64<IL_OP_LDS_READ_SUB,
+      "64_id($id)", atom_l_sub>;
+  def ATOM64_L_XOR_B64     : BinAtom64I64<IL_OP_LDS_READ_XOR,
+      "64_id($id)", atom_l_xor>;
+  def ATOM64_L_INC_B64     : BinAtom64I64<IL_OP_LDS_READ_INC,
+      "64_id($id)", atom_l_inc>;
+  def ATOM64_L_DEC_B64     : BinAtom64I64<IL_OP_LDS_READ_DEC,
+      "64_id($id)", atom_l_dec>;
+  def ATOM64_L_XCHG_B64    : BinAtom64I64<IL_OP_LDS_READ_XCHG,
+      "64_id($id)", atom_l_xchg>;
+  def ATOM64_L_CMPXCHG_B64 : TriAtom64I64<IL_OP_LDS_READ_CMPXCHG,
+      "64_id($id)", atom_l_cmpxchg>;
+  def ATOM64_R_ADD_B64     : BinAtom64I64<IL_OP_GDS_READ_ADD,
+      "64_id($id)", atom_r_add>;
+  def ATOM64_R_AND_B64     : BinAtom64I64<IL_OP_GDS_READ_AND,
+      "64_id($id)", atom_r_and>;
+  def ATOM64_R_MAX_B64     : BinAtom64I64<IL_OP_GDS_READ_MAX,
+      "64_id($id)", atom_r_max>;
+  def ATOM64_R_MIN_B64     : BinAtom64I64<IL_OP_GDS_READ_MIN,
+      "64_id($id)", atom_r_min>;
+  def ATOM64_R_UMAX_B64    : BinAtom64I64<IL_OP_GDS_READ_UMAX,
+      "64_id($id)", atom_r_umax>;
+  def ATOM64_R_UMIN_B64    : BinAtom64I64<IL_OP_GDS_READ_UMIN,
+      "64_id($id)", atom_r_umin>;
+  def ATOM64_R_OR_B64      : BinAtom64I64<IL_OP_GDS_READ_OR,
+      "64_id($id)", atom_r_or>;
+  def ATOM64_R_MSKOR_B64   : TriAtom64I64<IL_OP_GDS_READ_MSKOR,
+      "64_id($id)", atom_r_mskor>;
+  def ATOM64_R_RSUB_B64    : BinAtom64I64<IL_OP_GDS_READ_RSUB,
+      "64_id($id)", atom_r_rsub>;
+  def ATOM64_R_SUB_B64     : BinAtom64I64<IL_OP_GDS_READ_SUB,
+      "64_id($id)", atom_r_sub>;
+  def ATOM64_R_XOR_B64     : BinAtom64I64<IL_OP_GDS_READ_XOR,
+      "64_id($id)", atom_r_xor>;
+  def ATOM64_R_INC_B64     : BinAtom64I64<IL_OP_GDS_READ_INC,
+      "64_id($id)", atom_r_inc>;
+  def ATOM64_R_DEC_B64     : BinAtom64I64<IL_OP_GDS_READ_DEC,
+      "64_id($id)", atom_r_dec>;
+  def ATOM64_R_XCHG_B64    : BinAtom64I64<IL_OP_GDS_READ_XCHG,
+      "64_id($id)", atom_r_xchg>;
+  def ATOM64_R_CMPXCHG_B64 : CmpXChg64I64<IL_OP_GDS_READ_CMPXCHG,
+      "64_id($id)", atom_r_cmpxchg>;
+
+  // atomic counter operations.
+  def APPEND64_ALLOC : Append64<IL_OP_APPEND_BUF_ALLOC,
+      "_id($id)", append_alloc>;
+  def APPEND64_CONSUME : Append64<IL_OP_APPEND_BUF_CONSUME,
+      "_id($id)", append_consume>;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1319 @@
+//===-- AMDILModuleInfo.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILModuleInfo.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILKernel.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cstdio>
+
+using namespace llvm;
+
+static inline uint32_t AlignDown_32(uint32_t Value, uint32_t Alignment)
+{
+  return Value & ~(Alignment - 1);
+}
+
+static inline uint32_t AlignUp_32(uint32_t Value, uint32_t Alignment)
+{
+  return AlignDown_32(Value + Alignment - 1, Alignment);
+}
+
+AMDILModuleInfo::AMDILModuleInfo(const MachineModuleInfo &MMI)
+  : mMMI(&MMI),
+    symTab(NULL),
+    mSTM(NULL),
+    TM(NULL),
+    mOffset(0),
+    mReservedBuffs(0),
+    mCurrentCPOffset(0),
+    mPrintfOffset(0),
+    mProcessed(false) { }
+
+AMDILModuleInfo::~AMDILModuleInfo()
+{
+  for (StringMap<AMDILKernel*>::iterator kb = mKernels.begin(), ke = mKernels.end();
+       kb != ke; ++kb) {
+    StringMapEntry<AMDILKernel*> cur = *kb;
+    AMDILKernel *ptr = cur.getValue();
+    delete ptr;
+  }
+}
+
+static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl, const std::string &arg)
+{
+  if (!krnl) {
+    return NULL;
+  }
+  llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
+  for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end();
+       begin != end; ++begin) {
+    if (!strcmp(begin->name.data(),arg.c_str())) {
+      return &(*begin);
+    }
+  }
+  return NULL;
+}
+
+void AMDILModuleInfo::processModule(const Module *M,
+                                    const AMDILTargetMachine *mTM)
+{
+  Module::const_global_iterator GI;
+  Module::const_global_iterator GE;
+  mSTM = mTM->getSubtargetImpl();
+  TM = mTM;
+  if (mProcessed) {
+    return;
+  }
+
+  for (GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) {
+    const GlobalValue *GV = GI;
+    llvm::StringRef GVName = GV->getName();
+    const char *name = GVName.data();
+
+    if (!strncmp(name, "sgv", 3)) {
+      mKernelArgs[GVName] = parseSGV(GV);
+    } else if (!strncmp(name, "fgv", 3)) {
+      // we can ignore this since we don't care about the filename
+      // string
+    } else if ((name[0] == 'l' || name[0] == 'r') && !strncmp(name + 1, "vgv", 3)) {
+      // "lvgv" or "rvgv"
+      mLocalArgs[GVName] = parseXVGV(GV);
+    } else if (!strncmp(name, "llvm.image.annotations", 22)) {
+      parseImageAnnotate(GV);
+    } else if (!strncmp(name, "llvm.global.annotations", 23)) {
+      parseGlobalAnnotate(GV);
+    } else if (!strncmp(name, "llvm.constpointer.annotations", 29)) {
+      parseConstantPtrAnnotate(GV);
+    } else if (!strncmp(name, "llvm.sampler.annotations", 24)) {
+      parseSamplerAnnotate(GV);
+    } else if (!strncmp(name, "llvm.argtypename.annotations", 28)) {
+      parseIgnoredGlobal(GV);
+    } else if (!strncmp(name, "llvm.argtypeconst.annotations", 29)) {
+      parseIgnoredGlobal(GV);
+    } else if (!strncmp(name, "llvm.readonlypointer.annotations", 32)) {
+      parseIgnoredGlobal(GV);
+    } else if (!strncmp(name, "llvm.signedOrSignedpointee.annotations", 38)) {
+      parseIgnoredGlobal(GV);
+    } else if (!strncmp(name, "llvm.restrictpointer.annotations", 32)) {
+      parseIgnoredGlobal(GV);
+    } else if (!strncmp(name, "llvm.volatilepointer.annotations", 32)) {
+      parseIgnoredGlobal(GV);
+    } else if (strstr(name, "cllocal")) {
+      parseAutoArray(GV, false);
+    } else if (strstr(name, "clregion")) {
+      parseAutoArray(GV, true);
+    } else if (!GV->use_empty()
+               && mIgnoreStr.find(GVName) == mIgnoreStr.end()) {
+      parseConstantPtr(GV);
+    }
+  }
+
+  allocateGlobalCB();
+
+  safeForEach(M->begin(), M->end(),
+              std::bind1st(
+                std::mem_fun(&AMDILModuleInfo::checkConstPtrsUseHW),
+                this));
+  // Make sure we only process the module once even though this function
+  // is called everytime a MachineFunctionInfo object is instantiated.
+  mProcessed = true;
+}
+
+void AMDILModuleInfo::allocateGlobalCB(void)
+{
+  uint32_t maxCBSize = mSTM->device()->getMaxCBSize();
+  uint32_t offset = 0;
+  uint32_t curCB = 0;
+  uint32_t swoffset = 0;
+  for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
+       cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+    bool constHW = mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+    AMDILConstPtr &c = cpb->second;
+    c.usesHardware = false;
+    if (constHW) {
+      // If we have a limit on the max CB Size, then we need to make sure that
+      // the constant sizes fall within the limits.
+      if (c.size <= maxCBSize) {
+        offset = AlignUp_32(offset, c.align);
+        if (offset + c.size > maxCBSize) {
+          offset = 0;
+          curCB++;
+        }
+        if (curCB < mSTM->device()->getMaxNumCBs()) {
+          c.cbNum = curCB + CB_BASE_OFFSET;
+          c.offset = offset;
+          offset += c.size;
+          c.usesHardware = true;
+          continue;
+        }
+      }
+    }
+    swoffset = AlignUp_32(swoffset, c.align);
+    c.cbNum = 0;
+    c.offset = swoffset;
+    swoffset += c.size;
+  }
+  if (!mConstMems.empty()) {
+    mReservedBuffs = curCB + 1;
+  }
+}
+
+bool AMDILModuleInfo::checkConstPtrsUseHW(llvm::Module::const_iterator *FCI)
+{
+  Function::const_arg_iterator AI, AE;
+  const Function *func = *FCI;
+  std::string name = func->getName();
+  AMDILKernel *krnl = mKernels[name];
+  if (!krnl || !krnl->mKernel) {
+    return false;
+  }
+  if (mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+    for (AI = func->arg_begin(), AE = func->arg_end();
+         AI != AE; ++AI) {
+      const Argument *Arg = &(*AI);
+      const PointerType *P = dyn_cast<PointerType>(Arg->getType());
+      if (!P) {
+        continue;
+      }
+      if (P->getAddressSpace() != AMDILAS::CONSTANT_ADDRESS) {
+        continue;
+      }
+      const AMDILConstPtr *ptr = getConstPtr(krnl, Arg->getName());
+      if (ptr) {
+        continue;
+      }
+      AMDILConstPtr constAttr;
+      constAttr.name = Arg->getName();
+      constAttr.size = this->mSTM->device()->getMaxCBSize();
+      constAttr.base = Arg;
+      constAttr.isArgument = true;
+      constAttr.isArray = false;
+      constAttr.offset = 0;
+      constAttr.align = 16;
+      constAttr.usesHardware =
+        mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem);
+      if (constAttr.usesHardware) {
+        constAttr.cbNum = krnl->constPtr.size() + 2;
+      } else {
+        constAttr.cbNum = 0;
+      }
+      krnl->constPtr.push_back(constAttr);
+    }
+  }
+  // Now lets make sure that only the N largest buffers
+  // get allocated in hardware if we have too many buffers
+  uint32_t numPtrs = krnl->constPtr.size();
+  if (numPtrs > (this->mSTM->device()->getMaxNumCBs() - mReservedBuffs)) {
+    // TODO: Change this routine so it sorts
+    // AMDILConstPtr instead of pulling the sizes out
+    // and then grab the N largest and disable the rest
+    llvm::SmallVector<uint32_t, 16> sizes;
+    for (uint32_t x = 0; x < numPtrs; ++x) {
+      sizes.push_back(krnl->constPtr[x].size);
+    }
+    std::sort(sizes.begin(), sizes.end());
+    uint32_t numToDisable = numPtrs - (mSTM->device()->getMaxNumCBs() -
+                                       mReservedBuffs);
+    uint32_t safeSize = sizes[numToDisable-1];
+    for (uint32_t x = 0; x < numPtrs && numToDisable; ++x) {
+      if (krnl->constPtr[x].size <= safeSize) {
+        krnl->constPtr[x].usesHardware = false;
+        --numToDisable;
+      }
+    }
+  }
+  // Renumber all of the valid CB's so that
+  // they are linear increase
+  uint32_t CBid = 2 + mReservedBuffs;
+  for (uint32_t x = 0; x < numPtrs; ++x) {
+    if (krnl->constPtr[x].usesHardware) {
+      krnl->constPtr[x].cbNum = CBid++;
+    }
+  }
+  for (StringMap<AMDILConstPtr>::iterator cpb = mConstMems.begin(),
+       cpe = mConstMems.end(); cpb != cpe; ++cpb) {
+    if (cpb->second.usesHardware) {
+      krnl->constPtr.push_back(cpb->second);
+    }
+  }
+  for (uint32_t x = 0; x < krnl->constPtr.size(); ++x) {
+    AMDILConstPtr &c = krnl->constPtr[x];
+    uint32_t cbNum = c.cbNum - CB_BASE_OFFSET;
+    if (cbNum < HW_MAX_NUM_CB && c.cbNum >= CB_BASE_OFFSET) {
+      if ((c.size + c.offset) > krnl->constSizes[cbNum]) {
+        krnl->constSizes[cbNum] = AlignUp_32(c.size + c.offset, 16);
+      }
+    } else {
+      krnl->constPtr[x].usesHardware = false;
+    }
+  }
+  return false;
+}
+
+int32_t AMDILModuleInfo::getArrayOffset(const llvm::StringRef &a) const
+{
+  StringMap<AMDILArrayMem>::const_iterator iter = mArrayMems.find(a);
+  if (iter != mArrayMems.end()) {
+    return iter->second.offset;
+  } else {
+    return -1;
+  }
+}
+
+int32_t AMDILModuleInfo::getConstOffset(const llvm::StringRef &a) const
+{
+  StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(a);
+  if (iter != mConstMems.end()) {
+    return iter->second.offset;
+  } else {
+    return -1;
+  }
+}
+
+bool AMDILModuleInfo::getConstHWBit(const llvm::StringRef &name) const
+{
+  StringMap<AMDILConstPtr>::const_iterator iter = mConstMems.find(name);
+  if (iter != mConstMems.end()) {
+    return iter->second.usesHardware;
+  } else {
+    return false;
+  }
+}
+
+// As of right now we only care about the required group size
+// so we can skip the variable encoding
+AMDILKernelAttr AMDILModuleInfo::parseSGV(const GlobalValue *G)
+{
+  AMDILKernelAttr nArg;
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+  memset(&nArg, 0, sizeof(nArg));
+  for (int x = 0; x < 3; ++x) {
+    nArg.reqGroupSize[x] = mSTM->getDefaultSize(x);
+    nArg.reqRegionSize[x] = mSTM->getDefaultSize(x);
+  }
+  if (!GV || !GV->hasInitializer()) {
+    return nArg;
+  }
+  const Constant *CV = GV->getInitializer();
+  const ConstantDataArray *CA = dyn_cast_or_null<ConstantDataArray>(CV);
+  if (!CA || !CA->isString()) {
+    return nArg;
+  }
+  std::string init = CA->getAsString();
+  size_t pos = init.find("RWG");
+  if (pos != llvm::StringRef::npos) {
+    pos += 3;
+    std::string LWS = init.substr(pos, init.length() - pos);
+    const char *lws = LWS.c_str();
+    sscanf(lws, "%u,%u,%u", &(nArg.reqGroupSize[0]),
+           &(nArg.reqGroupSize[1]),
+           &(nArg.reqGroupSize[2]));
+    nArg.mHasRWG = true;
+  }
+  pos = init.find("RWR");
+  if (pos != llvm::StringRef::npos) {
+    pos += 3;
+    std::string LWS = init.substr(pos, init.length() - pos);
+    const char *lws = LWS.c_str();
+    sscanf(lws, "%u,%u,%u", &(nArg.reqRegionSize[0]),
+           &(nArg.reqRegionSize[1]),
+           &(nArg.reqRegionSize[2]));
+    nArg.mHasRWR = true;
+  }
+  return nArg;
+}
+
+AMDILLocalArg AMDILModuleInfo::parseXVGV(const GlobalValue *G)
+{
+  AMDILLocalArg nArg;
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+  nArg.name = "";
+  if (!GV || !GV->hasInitializer()) {
+    return nArg;
+  }
+  const ConstantArray *CA =
+    dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+  if (!CA) {
+    return nArg;
+  }
+  for (size_t x = 0, y = CA->getNumOperands(); x < y; ++x) {
+    const Value *local = CA->getOperand(x);
+    const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(local);
+    if (!CE || !CE->getNumOperands()) {
+      continue;
+    }
+    nArg.name = (*(CE->op_begin()))->getName();
+    if (mArrayMems.find(nArg.name) != mArrayMems.end()) {
+      nArg.local.push_back(&(mArrayMems[nArg.name]));
+    }
+  }
+  return nArg;
+}
+
+void AMDILModuleInfo::parseSamplerAnnotate(const GlobalValue *G)
+{
+  const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
+  const ConstantArray *CA =
+    dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+  if (!CA) {
+    return;
+  }
+  uint32_t numOps = CA->getNumOperands();
+  for (uint32_t x = 0; x < numOps; ++x) {
+    const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(x));
+    const GlobalVariable *nameGV =
+      dyn_cast<GlobalVariable>(nameField->getOperand(0));
+    const ConstantDataArray *nameArray =
+      dyn_cast<ConstantDataArray>(nameGV->getInitializer());
+    std::string nameStr = nameArray->getAsString();
+    mSamplerSet[GV->getName()].insert(nameStr.substr(0, nameStr.size()-1));
+    // Lets add this string to the set of strings we should ignore processing
+    mIgnoreStr.insert(nameGV->getName());
+    if (mConstMems.find(nameGV->getName())
+        != mConstMems.end()) {
+      // If we already processesd this string as a constant, lets remove it from
+      // the list of known constants.  This way we don't process unneeded data
+      // and don't generate code/metadata for strings that are never used.
+      mConstMems.erase(mConstMems.find(nameGV->getName()));
+    }
+
+  }
+}
+
+void AMDILModuleInfo::parseIgnoredGlobal(const GlobalValue *G)
+{
+  const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
+  const ConstantArray *CA =
+    dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+  if (!CA) {
+    return;
+  }
+  uint32_t numOps = CA->getNumOperands();
+  for (uint32_t x = 0; x < numOps; ++x) {
+    const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(x));
+    const GlobalVariable *nameGV =
+      dyn_cast<GlobalVariable>(nameField->getOperand(0));
+    // Lets add this string to the set of strings we should ignore processing
+    mIgnoreStr.insert(nameGV->getName());
+    if (mConstMems.find(nameGV->getName())
+        != mConstMems.end()) {
+      // If we already processesd this string as a constant, lets remove it from
+      // the list of known constants.  This way we don't process unneeded data
+      // and don't generate code/metadata for strings that are never used.
+      mConstMems.erase(mConstMems.find(nameGV->getName()));
+    }
+  }
+}
+
+std::set<std::string> *
+AMDILModuleInfo::getSamplerForKernel(llvm::StringRef &ref)
+{
+  return (mSamplerSet.find(ref) != mSamplerSet.end()) ? &mSamplerSet[ref] : NULL;
+}
+
+void AMDILModuleInfo::parseConstantPtrAnnotate(const GlobalValue *G)
+{
+  const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(G);
+  const ConstantArray *CA =
+    dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+  if (!CA) {
+    return;
+  }
+  uint32_t numOps = CA->getNumOperands();
+  for (uint32_t x = 0; x < numOps; ++x) {
+    const Value *V = CA->getOperand(x);
+    const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+    if (!CS) {
+      continue;
+    }
+    assert(CS->getNumOperands() == 2 && "There can only be 2"
+           " fields, a name and size");
+    const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CS->getOperand(0));
+    const ConstantInt *sizeField = dyn_cast<ConstantInt>(CS->getOperand(1));
+    assert(nameField && "There must be a constant name field");
+    assert(sizeField && "There must be a constant size field");
+    const GlobalVariable *nameGV =
+      dyn_cast<GlobalVariable>(nameField->getOperand(0));
+    const ConstantDataArray *nameArray =
+      dyn_cast<ConstantDataArray>(nameGV->getInitializer());
+    // Lets add this string to the set of strings we should ignore processing
+    mIgnoreStr.insert(nameGV->getName());
+    if (mConstMems.find(nameGV->getName())
+        != mConstMems.end()) {
+      // If we already processesd this string as a constant, lets remove it from
+      // the list of known constants.  This way we don't process unneeded data
+      // and don't generate code/metadata for strings that are never used.
+      mConstMems.erase(mConstMems.find(nameGV->getName()));
+    } else {
+      mIgnoreStr.insert(CS->getOperand(0)->getName());
+    }
+    AMDILConstPtr constAttr;
+    constAttr.name = nameArray->getAsString();
+    constAttr.size = AlignUp_32(sizeField->getZExtValue(), 16);
+    constAttr.base = CS;
+    constAttr.isArgument = true;
+    constAttr.isArray = false;
+    constAttr.cbNum = 0;
+    constAttr.offset = 0;
+    constAttr.align = 16;
+    constAttr.usesHardware = (constAttr.size <= mSTM->device()->getMaxCBSize());
+    // Now that we have all our constant information,
+    // lets update the AMDILKernel
+    llvm::StringRef AMDILKernelName = G->getName().data() + 30;
+    AMDILKernel *k;
+    if (mKernels.find(AMDILKernelName) != mKernels.end()) {
+      k = mKernels[AMDILKernelName];
+      k->mName = AMDILKernelName;
+    } else {
+      k = new AMDILKernel(AMDILKernelName, false);
+    }
+    constAttr.cbNum = k->constPtr.size() + 2;
+    k->constPtr.push_back(constAttr);
+    mKernels[AMDILKernelName] = k;
+  }
+}
+
+void AMDILModuleInfo::parseImageAnnotate(const GlobalValue *G)
+{
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+  const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA) {
+    return;
+  }
+  if (isa<GlobalValue>(CA)) {
+    return;
+  }
+  uint32_t e = CA->getNumOperands();
+  if (!e) {
+    return;
+  }
+  AMDILKernel *k;
+  llvm::StringRef name = G->getName().data() + 23;
+  if (mKernels.find(name) != mKernels.end()) {
+    k = mKernels[name];
+    k->mName = name;
+  } else {
+    k = new AMDILKernel(name, false);
+  }
+
+  for (uint32_t i = 0; i != e; ++i) {
+    const Value *V = CA->getOperand(i);
+    const Constant *C = dyn_cast<Constant>(V);
+    const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+    if (CS && CS->getNumOperands() == 2) {
+      if (mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()) !=
+          mConstMems.end()) {
+        // If we already processesd this string as a constant, lets remove it
+        // from the list of known constants.  This way we don't process unneeded
+        // data and don't generate code/metadata for strings that are never
+        // used.
+        mConstMems.erase(
+          mConstMems.find(CS->getOperand(0)->getOperand(0)->getName()));
+      } else {
+        mIgnoreStr.insert(CS->getOperand(0)->getOperand(0)->getName());
+      }
+      const ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(1));
+      uint32_t val = (uint32_t)CI->getZExtValue();
+      if (val == 1) {
+        k->readOnly.insert(i);
+      } else if (val == 2) {
+        k->writeOnly.insert(i);
+      } else {
+        assert(!"Unknown image type value!");
+      }
+    }
+  }
+  mKernels[name] = k;
+}
+
+void AMDILModuleInfo::parseAutoArray(const GlobalValue *GV, bool isRegion)
+{
+  const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+  AMDILArrayMem tmp;
+  tmp.isHW = true;
+  tmp.offset = 0;
+  tmp.align = std::max(G->getAlignment(), 16U);
+  if (G == NULL) {
+    tmp.vecSize = 0;
+  } else {
+    // dereference the pointer type because GlobalVariable is always a pointer
+    // type, and we want to calculate the size of the memory that the
+    // GlobalVariable pointer points to
+    PointerType *pTy = dyn_cast<PointerType>(G->getType());
+    assert(pTy && "Global Variable not pointer type");
+    Type *ty = pTy->getElementType();
+    tmp.vecSize = TM->getTargetData()->getTypeAllocSize(ty);
+  }
+  tmp.isRegion = isRegion;
+  mArrayMems[GV->getName()] = tmp;
+}
+
+void AMDILModuleInfo::parseConstantPtr(const GlobalValue *GV)
+{
+  const GlobalVariable *G = dyn_cast<GlobalVariable>(GV);
+  AMDILConstPtr constAttr;
+  constAttr.name = G->getName();
+  if (G == NULL) {
+    constAttr.size = 0;
+  } else {
+    // dereference the pointer type because GlobalVariable is always a pointer
+    // type, and we want to calculate the size of the memory that the
+    // GlobalVariable pointer points to
+    PointerType *pTy = dyn_cast<PointerType>(G->getType());
+    assert(pTy && "Global Variable not pointer type");
+    Type *ty = pTy->getElementType();
+    constAttr.size = TM->getTargetData()->getTypeAllocSize(ty);
+  }
+  constAttr.base = GV;
+  constAttr.isArgument = false;
+  constAttr.isArray = true;
+  constAttr.offset = 0;
+  constAttr.align = std::max(G->getAlignment(), 16U);
+  constAttr.cbNum = 0;
+  constAttr.usesHardware = false;
+  mConstMems[GV->getName()] = constAttr;
+}
+
+void AMDILModuleInfo::parseGlobalAnnotate(const GlobalValue *G)
+{
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+  if (!GV->hasInitializer()) {
+    return;
+  }
+  const Constant *CT = GV->getInitializer();
+  if (!CT || isa<GlobalValue>(CT)) {
+    return;
+  }
+  const ConstantArray *CA = dyn_cast<ConstantArray>(CT);
+  if (!CA) {
+    return;
+  }
+
+  unsigned int nKernels = CA->getNumOperands();
+  for (unsigned int i = 0, e = nKernels; i != e; ++i) {
+    parseKernelInformation(CA->getOperand(i));
+  }
+}
+
+AMDILLocalArg* AMDILModuleInfo::parseKernelLRInfo(AMDILKernel *kernel, const Constant *CV)
+{
+  llvm::StringRef xvgvName = "";  // lvgv or rvgv
+
+  assert(CV);
+
+  if (CV->getNumOperands()) {
+    xvgvName = (*(CV->op_begin()))->getName();
+  }
+
+  // There can be multiple local or region arrays, so we
+  // need to handle each one separately
+
+  AMDILLocalArg *ptr = NULL;
+  if (mLocalArgs.find(xvgvName) != mLocalArgs.end()) {
+    ptr = &mLocalArgs[xvgvName];
+
+    llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
+    for (ib = ptr->local.begin(), ie = ptr->local.end(); ib != ie; ++ib) {
+      AMDILArrayMem *a = *ib;
+      uint32_t *curSize;
+      if (a->isRegion) {
+        curSize = (a->isHW) ? &kernel->curHWRSize : &kernel->curRSize;
+      } else {
+        curSize = (a->isHW) ? &kernel->curHWSize : &kernel->curSize;
+      }
+      a->offset = AlignUp_32(*curSize, a->align);
+      *curSize = a->offset + a->vecSize;
+    }
+  }
+
+  return ptr;
+}
+
+void AMDILModuleInfo::parseKernelInformation(const Value *V)
+{
+  if (isa<GlobalValue>(V)) {
+    return;
+  }
+  const ConstantStruct *CS = dyn_cast_or_null<ConstantStruct>(V);
+  if (!CS) {
+    return;
+  }
+  uint32_t N = CS->getNumOperands();
+  assert((N == 5 || N == 6) && "Expected 5 or 6 operands");
+
+  AMDILKernel *kernel;
+
+  // The first operand is always a pointer to the AMDILKernel.
+  const Constant *CV = dyn_cast<Constant>(CS->getOperand(0));
+  llvm::StringRef AMDILKernelName = "";
+  if (CV->getNumOperands()) {
+    AMDILKernelName = (*(CV->op_begin()))->getName();
+  }
+
+  // If we have images, then we have already created the AMDILKernel and we just need
+  // to get the AMDILKernel information.
+  if (mKernels.find(AMDILKernelName) != mKernels.end()) {
+    kernel = mKernels[AMDILKernelName];
+    kernel->mKernel = true;
+    kernel->mName = AMDILKernelName;
+  } else {
+    kernel = new AMDILKernel(AMDILKernelName, true);
+  }
+
+  // The second operand is SGV, there can only be one so we don't need to worry
+  // about parsing out multiple data points.
+  CV = dyn_cast<Constant>(CS->getOperand(1));
+
+  llvm::StringRef sgvName;
+  if (CV->getNumOperands()) {
+    sgvName = (*(CV->op_begin()))->getName();
+  }
+
+  if (mKernelArgs.find(sgvName) != mKernelArgs.end()) {
+    kernel->sgv = &mKernelArgs[sgvName];
+  }
+
+
+  // The third operand is FGV, which is skipped
+
+  // The fourth operand is LVGV
+  kernel->lvgv = parseKernelLRInfo(kernel, dyn_cast<Constant>(CS->getOperand(3)));
+
+  // The possibly missing (e.g. on Apple) fifth operand is RVGV
+  if (N >= 5) {
+    kernel->rvgv = parseKernelLRInfo(kernel, dyn_cast<Constant>(CS->getOperand(4)));
+  }
+
+  // The last (fifth or sixth) operand is NULL
+
+  mKernels[AMDILKernelName] = kernel;
+}
+
+AMDILKernel *
+AMDILModuleInfo::getKernel(const llvm::StringRef &name)
+{
+  StringMap<AMDILKernel*>::iterator iter = mKernels.find(name);
+  if (iter == mKernels.end()) {
+    return NULL;
+  } else {
+    return iter->second;
+  }
+}
+
+bool AMDILModuleInfo::isKernel(const llvm::StringRef &name) const
+{
+  return (mKernels.find(name) != mKernels.end());
+}
+
+bool AMDILModuleInfo::isWriteOnlyImage(const llvm::StringRef &name,
+                                       uint32_t iID) const
+{
+  const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
+  if (kiter == mKernels.end()) {
+    return false;
+  }
+  return kiter->second->writeOnly.count(iID);
+}
+
+bool AMDILModuleInfo::isReadOnlyImage(const llvm::StringRef &name,
+                                      uint32_t iID) const
+{
+  const StringMap<AMDILKernel*>::const_iterator kiter = mKernels.find(name);
+  if (kiter == mKernels.end()) {
+    return false;
+  }
+  return kiter->second->readOnly.count(iID);
+}
+
+int32_t AMDILModuleInfo::getArgID(const Argument *arg)
+{
+  DenseMap<const Argument *, int32_t>::iterator argiter = mArgIDMap.find(arg);
+  if (argiter != mArgIDMap.end()) {
+    return argiter->second;
+  } else {
+    return -1;
+  }
+}
+
+uint32_t
+AMDILModuleInfo::getRegion(const llvm::StringRef &name, uint32_t dim) const
+{
+  StringMap<AMDILKernel*>::const_iterator iter = mKernels.find(name);
+  if (iter != mKernels.end() && iter->second->sgv) {
+    AMDILKernelAttr *sgv = iter->second->sgv;
+    switch (dim) {
+    default:
+      break;
+    case 0:
+    case 1:
+    case 2:
+      return sgv->reqRegionSize[dim];
+      break;
+    case 3:
+      return sgv->reqRegionSize[0] *
+             sgv->reqRegionSize[1] *
+             sgv->reqRegionSize[2];
+    };
+  }
+  switch (dim) {
+  default:
+    return 1;
+  case 3:
+    return mSTM->getDefaultSize(0) *
+           mSTM->getDefaultSize(1) *
+           mSTM->getDefaultSize(2);
+  case 2:
+  case 1:
+  case 0:
+    return mSTM->getDefaultSize(dim);
+    break;
+  };
+  return 1;
+}
+
+StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_begin()
+{
+  return mConstMems.begin();
+}
+
+
+StringMap<AMDILConstPtr>::iterator AMDILModuleInfo::consts_end()
+{
+  return mConstMems.end();
+}
+
+bool AMDILModuleInfo::consts_empty()
+{
+  return mConstMems.empty();
+}
+
+bool AMDILModuleInfo::byteStoreExists(StringRef S) const
+{
+  return mByteStore.find(S) != mByteStore.end();
+}
+
+bool AMDILModuleInfo::usesHWConstant(const AMDILKernel *krnl,
+                                     const llvm::StringRef &arg)
+{
+  const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+  if (!curConst) {
+    return false;
+  }
+  return curConst->usesHardware;
+}
+
+uint32_t AMDILModuleInfo::getConstPtrSize(const AMDILKernel *krnl,
+    const llvm::StringRef &arg)
+{
+  const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+  if (!curConst) {
+    return 0;
+  }
+  return curConst->size;
+}
+
+uint32_t AMDILModuleInfo::getConstPtrOff(const AMDILKernel *krnl,
+    const llvm::StringRef &arg)
+{
+  const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+  if (!curConst) {
+    return 0;
+  }
+  return curConst->offset;
+}
+
+uint32_t AMDILModuleInfo::getConstPtrCB(const AMDILKernel *krnl,
+                                        const llvm::StringRef &arg)
+{
+  const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+  if (!curConst) {
+    return 0;
+  }
+  return curConst->cbNum;
+}
+
+void AMDILModuleInfo::calculateCPOffsets(const MachineFunction *MF,
+    AMDILKernel *krnl)
+{
+  const MachineConstantPool *MCP = MF->getConstantPool();
+  if (!MCP) {
+    return;
+  }
+  const std::vector<MachineConstantPoolEntry> consts = MCP->getConstants();
+  size_t numConsts = consts.size();
+  const TargetData *TD = TM->getTargetData();
+  for (size_t x = 0; x < numConsts; ++x) {
+    const Constant* constVal = consts[x].Val.ConstVal;
+    krnl->CPOffsets.push_back(std::make_pair(mCurrentCPOffset, constVal));
+    // Align the size to the vector boundary
+    uint32_t alignment = 16;
+    const GlobalValue *GV = dyn_cast<GlobalValue>(constVal);
+    Type* ty = constVal->getType();
+    if (GV) {
+      alignment = std::max(GV->getAlignment(), 16U);
+      // dereference the pointer type because GlobalVariable is always a pointer
+      // type, and we want to calculate the size of the memory that the
+      // GlobalVariable pointer points to
+      PointerType* pTy = dyn_cast<PointerType>(ty);
+      assert(pTy && "GlovalVariable not pointer type");
+      ty = pTy->getElementType();
+    }
+    size_t curSize = TD->getTypeAllocSize(ty);
+    curSize = AlignUp_32(curSize, alignment);
+    mCurrentCPOffset += curSize;
+  }
+}
+
+bool AMDILModuleInfo::isConstPtrArray(const AMDILKernel *krnl,
+                                      const llvm::StringRef &arg)
+{
+  const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+  if (curConst) {
+    return curConst->isArray;
+  } else {
+    return false;
+  }
+}
+
+bool AMDILModuleInfo::isConstPtrArgument(const AMDILKernel *krnl,
+    const llvm::StringRef &arg)
+{
+  const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+  if (curConst) {
+    return curConst->isArgument;
+  } else {
+    return false;
+  }
+}
+
+const Value *AMDILModuleInfo::getConstPtrValue(const AMDILKernel *krnl,
+    const llvm::StringRef &arg)
+{
+  const AMDILConstPtr *curConst = getConstPtr(krnl, arg);
+  if (curConst) {
+    return curConst->base;
+  } else {
+    return NULL;
+  }
+}
+
+static void
+dumpZeroElements(StructType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(IntegerType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(ArrayType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(VectorType * const T, OSTREAM_TYPE &O, bool asBytes);
+static void
+dumpZeroElements(Type * const T, OSTREAM_TYPE &O, bool asBytes);
+
+void dumpZeroElements(Type * const T, OSTREAM_TYPE &O, bool asBytes)
+{
+  if (!T) {
+    return;
+  }
+  switch(T->getTypeID()) {
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+    assert(0 && "These types are not supported by this backend");
+  default:
+  case Type::DoubleTyID:
+    if (asBytes) {
+      O << ":0:0:0:0:0:0:0:0";
+    } else {
+      O << ":0";
+    }
+    break;
+  case Type::FloatTyID:
+  case Type::PointerTyID:
+  case Type::FunctionTyID:
+    if (asBytes) {
+      O << ":0:0:0:0";
+    } else {
+      O << ":0";
+    }
+  case Type::IntegerTyID:
+    dumpZeroElements(dyn_cast<IntegerType>(T), O, asBytes);
+    break;
+  case Type::StructTyID: {
+    const StructType *ST = cast<StructType>(T);
+    if (!ST->isOpaque()) {
+      dumpZeroElements(dyn_cast<StructType>(T), O, asBytes);
+    } else { // A pre-LLVM 3.0 opaque type
+      if (asBytes) {
+        O << ":0:0:0:0";
+      } else {
+        O << ":0";
+      }
+    }
+  }
+  break;
+  case Type::ArrayTyID:
+    dumpZeroElements(dyn_cast<ArrayType>(T), O, asBytes);
+    break;
+  case Type::VectorTyID:
+    dumpZeroElements(dyn_cast<VectorType>(T), O, asBytes);
+    break;
+  };
+}
+
+void
+dumpZeroElements(StructType * const ST, OSTREAM_TYPE &O, bool asBytes)
+{
+  if (!ST) {
+    return;
+  }
+  Type *curType;
+  StructType::element_iterator eib = ST->element_begin();
+  StructType::element_iterator eie = ST->element_end();
+  for (; eib != eie; ++eib) {
+    curType = *eib;
+    dumpZeroElements(curType, O, asBytes);
+  }
+}
+
+void
+dumpZeroElements(IntegerType * const IT, OSTREAM_TYPE &O, bool asBytes)
+{
+  if (asBytes) {
+    unsigned byteWidth = (IT->getBitWidth() >> 3);
+    for (unsigned x = 0; x < byteWidth; ++x) {
+      O << ":0";
+    }
+  }
+}
+
+void
+dumpZeroElements(ArrayType * const AT, OSTREAM_TYPE &O, bool asBytes)
+{
+  size_t size = AT->getNumElements();
+  for (size_t x = 0; x < size; ++x) {
+    dumpZeroElements(AT->getElementType(), O, asBytes);
+  }
+}
+
+void
+dumpZeroElements(VectorType * const VT, OSTREAM_TYPE &O, bool asBytes)
+{
+  size_t size = VT->getNumElements();
+  for (size_t x = 0; x < size; ++x) {
+    dumpZeroElements(VT->getElementType(), O, asBytes);
+  }
+}
+
+void AMDILModuleInfo::printConstantValue(const Constant *CAval,
+    OSTREAM_TYPE &O, bool asBytes)
+{
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CAval)) {
+    bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+    if (isDouble) {
+      double val = CFP->getValueAPF().convertToDouble();
+      union dtol_union {
+        double d;
+        uint64_t l;
+        char c[8];
+      } conv;
+      conv.d = val;
+      if (!asBytes) {
+        O << ":";
+        O.write_hex(conv.l);
+      } else {
+        for (int i = 0; i < 8; ++i) {
+          O << ":";
+          O.write_hex((unsigned)conv.c[i] & 0xFF);
+        }
+      }
+    } else {
+      float val = CFP->getValueAPF().convertToFloat();
+      union ftoi_union {
+        float f;
+        uint32_t u;
+        char c[4];
+      } conv;
+      conv.f = val;
+      if (!asBytes) {
+        O << ":";
+        O.write_hex(conv.u);
+      } else {
+        for (int i = 0; i < 4; ++i) {
+          O << ":";
+          O.write_hex((unsigned)conv.c[i] & 0xFF);
+        }
+      }
+    }
+  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CAval)) {
+    uint64_t zVal = CI->getValue().getZExtValue();
+    if (!asBytes) {
+      O << ":";
+      O.write_hex(zVal);
+    } else {
+      switch (CI->getBitWidth()) {
+      default: {
+        union ltob_union {
+          uint64_t l;
+          char c[8];
+        } conv;
+        conv.l = zVal;
+        for (int i = 0; i < 8; ++i) {
+          O << ":";
+          O.write_hex((unsigned)conv.c[i] & 0xFF);
+        }
+      }
+      break;
+      case 8:
+        O << ":";
+        O.write_hex(zVal & 0xFF);
+        break;
+      case 16: {
+        union stob_union {
+          uint16_t s;
+          char c[2];
+        } conv;
+        conv.s = (uint16_t)zVal;
+        O << ":";
+        O.write_hex((unsigned)conv.c[0] & 0xFF);
+        O << ":";
+        O.write_hex((unsigned)conv.c[1] & 0xFF);
+      }
+      break;
+      case 32: {
+        union itob_union {
+          uint32_t i;
+          char c[4];
+        } conv;
+        conv.i = (uint32_t)zVal;
+        for (int i = 0; i < 4; ++i) {
+          O << ":";
+          O.write_hex((unsigned)conv.c[i] & 0xFF);
+        }
+      }
+      break;
+      }
+    }
+  } else if (const ConstantVector *CV = dyn_cast<ConstantVector>(CAval)) {
+    int y = CV->getNumOperands()-1;
+    int x = 0;
+    for (; x < y; ++x) {
+      printConstantValue(CV->getOperand(x), O, asBytes);
+    }
+    printConstantValue(CV->getOperand(x), O, asBytes);
+  } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CAval)) {
+    int y = CS->getNumOperands();
+    int x = 0;
+    for (; x < y; ++x) {
+      printConstantValue(CS->getOperand(x), O, asBytes);
+    }
+  } else if (const ConstantAggregateZero *CAZ
+             = dyn_cast<ConstantAggregateZero>(CAval)) {
+    int y = CAZ->getNumOperands();
+    if (y > 0) {
+      int x = 0;
+      for (; x < y; ++x) {
+        printConstantValue((llvm::Constant *)CAZ->getOperand(x),
+                           O, asBytes);
+      }
+    } else {
+      if (asBytes) {
+        dumpZeroElements(CAval->getType(), O, asBytes);
+      } else {
+        int y = getNumElements(CAval->getType())-1;
+        for (int x = 0; x < y; ++x) {
+          O << ":0";
+        }
+        O << ":0";
+      }
+    }
+  } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CAval)) {
+    int y = CA->getNumOperands();
+    int x = 0;
+    for (; x < y; ++x) {
+      printConstantValue(CA->getOperand(x), O, asBytes);
+    }
+  } else if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CAval)) {
+    int y = CDS->getNumElements();
+    int x = 0;
+    for (; x < y; ++x) {
+      printConstantValue(CDS->getElementAsConstant(x), O, asBytes);
+    }
+  } else if (dyn_cast<ConstantPointerNull>(CAval)) {
+    O << ":0";
+  } else if (dyn_cast<ConstantExpr>(CAval)) {
+    O << ":0";
+  } else if (dyn_cast<UndefValue>(CAval)) {
+    O << ":0";
+  } else {
+    assert(0 && "Hit condition which was not expected");
+  }
+}
+
+static bool isStruct(Type * const T)
+{
+  if (!T) {
+    return false;
+  }
+  switch (T->getTypeID()) {
+  default:
+    return false;
+  case Type::PointerTyID:
+    return isStruct(T->getContainedType(0));
+  case Type::StructTyID:
+    return true;
+  case Type::ArrayTyID:
+  case Type::VectorTyID:
+    return isStruct(dyn_cast<SequentialType>(T)->getElementType());
+  };
+
+}
+
+void AMDILModuleInfo::dumpDataToCB(OSTREAM_TYPE &O, AMDILMachineFunctionInfo *mfi,
+                                   uint32_t id)
+{
+  uint32_t size = 0;
+  for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(),
+       cme = consts_end(); cmb != cme; ++cmb) {
+    AMDILConstPtr& c = cmb->second;
+    if (id == c.cbNum) {
+      if ((c.size + c.offset) > size) {
+        size = AlignUp_32(c.size + c.offset, 16);
+      }
+    }
+  }
+  const TargetData *TD = TM->getTargetData();
+  if (id == 0) {
+    O << ";#DATASTART:" << (size + mCurrentCPOffset) << "\n";
+    if (mCurrentCPOffset) {
+      for (StringMap<AMDILKernel*>::iterator kcpb = mKernels.begin(),
+           kcpe = mKernels.end(); kcpb != kcpe; ++kcpb) {
+        const AMDILKernel *k = kcpb->second;
+        if (!k) continue;
+        size_t numConsts = k->CPOffsets.size();
+        for (size_t x = 0; x < numConsts; ++x) {
+          size_t offset = k->CPOffsets[x].first;
+          const Constant *C = k->CPOffsets[x].second;
+          Type *Ty = C->getType();
+          if (isa<GlobalVariable>(C)) {
+            // dereference the pointer type because GlobalVariable is always
+            // a pointer type, and we want to calculate the size of the memory
+            // that the GlobalVariable pointer points to
+            PointerType* pTy = dyn_cast<PointerType>(Ty);
+            assert(pTy && "GlobalVariable not pointer type");
+            Ty = pTy->getElementType();
+          }
+          size_t size = (isStruct(Ty) ? TD->getTypeAllocSize(Ty)
+                         : getNumElements(Ty));
+          O << ";#" << getTypeName(Ty, symTab, mfi, true) << ":";
+          O << offset << ":" << size ;
+          printConstantValue(C, O, isStruct(Ty));
+          O << "\n";
+        }
+      }
+    }
+  } else {
+    O << ";#DATASTART:" << id << ":" << size << "\n";
+  }
+
+  for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
+       cmb != cme; ++cmb) {
+    if (cmb->second.cbNum != id) {
+      continue;
+    }
+    const GlobalVariable *G = dyn_cast<GlobalVariable>(cmb->second.base);
+    Type *Ty = (G) ? G->getType() : NULL;
+    size_t offset = cmb->second.offset;
+    const Constant *C = G->getInitializer();
+    size_t size = (isStruct(Ty)
+                   ? cmb->second.size
+                   : getNumElements(Ty));
+    O << ";#" << getTypeName(Ty, symTab, mfi, true) << ":";
+    if (!id) {
+      O << (offset + mCurrentCPOffset) << ":" << size;
+    } else {
+      O << offset << ":" << size;
+    }
+    if (C) {
+      printConstantValue(C, O, isStruct(Ty));
+    } else {
+      assert(0 && "Cannot have a constant pointer"
+             " without an initializer!");
+    }
+    O <<"\n";
+  }
+  if (id == 0) {
+    O << ";#DATAEND\n";
+  } else {
+    O << ";#DATAEND:" << id << "\n";
+  }
+}
+
+void
+AMDILModuleInfo::dumpDataSection(OSTREAM_TYPE &O, AMDILMachineFunctionInfo *mfi)
+{
+  if (consts_empty() && !mCurrentCPOffset) {
+    return;
+  } else {
+    llvm::DenseSet<uint32_t> const_set;
+    for (StringMap<AMDILConstPtr>::iterator cmb = consts_begin(), cme = consts_end();
+         cmb != cme; ++cmb) {
+      const_set.insert(cmb->second.cbNum);
+    }
+    if (mCurrentCPOffset) {
+      const_set.insert(0);
+    }
+    for (llvm::DenseSet<uint32_t>::iterator setb = const_set.begin(),
+         sete = const_set.end(); setb != sete; ++setb) {
+      dumpDataToCB(O, mfi, *setb);
+    }
+  }
+}
+
+/// Create a function ID if it is not known or return the known
+/// function ID.
+uint32_t AMDILModuleInfo::getOrCreateFunctionID(const GlobalValue* func)
+{
+  if (func->getName().size()) {
+    return getOrCreateFunctionID(func->getName());
+  }
+  uint32_t id;
+  if (mFuncPtrNames.find(func) == mFuncPtrNames.end()) {
+    id = mFuncPtrNames.size() + RESERVED_FUNCS + mFuncNames.size();
+    mFuncPtrNames[func] = id;
+  } else {
+    id = mFuncPtrNames[func];
+  }
+  return id;
+}
+
+uint32_t AMDILModuleInfo::getOrCreateFunctionID(const std::string &func)
+{
+  uint32_t id;
+  if (mFuncNames.find(func) == mFuncNames.end()) {
+    id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size();
+    mFuncNames[func] = id;
+  } else {
+    id = mFuncNames[func];
+  }
+  return id;
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,184 @@
+//===-- AMDILModuleInfo.h -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for AMDIL targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_MACHINE_MODULE_INFO_H_
+#define _AMDIL_MACHINE_MODULE_INFO_H_
+#include "AMDIL.h"
+#include "AMDILKernel.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <set>
+#define CB_BASE_OFFSET 2
+
+namespace llvm
+{
+class AMDILKernel;
+class Argument;
+class TypeSymbolTable;
+class GlobalValue;
+class MachineFunction;
+class GlobalValue;
+
+class AMDILMachineFunctionInfo;
+class AMDILModuleInfo : public MachineModuleInfoImpl
+{
+protected:
+  const MachineModuleInfo *mMMI;
+public:
+  AMDILModuleInfo(const MachineModuleInfo &);
+  virtual ~AMDILModuleInfo();
+
+  void processModule(const Module *MF, const AMDILTargetMachine* mTM);
+
+  /// Process the given module and parse out the global variable metadata passed
+  /// down from the frontend-compiler
+
+  /// Returns true if the image ID corresponds to a read only image.
+  bool isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+  /// Returns true if the image ID corresponds to a write only image.
+  bool isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
+
+  /// Gets the group size of the kernel for the given dimension.
+  uint32_t getRegion(const llvm::StringRef &name, uint32_t dim) const;
+
+  /// Get the offset of the array for the kernel.
+  int32_t getArrayOffset(const llvm::StringRef &name) const;
+
+  /// Get the offset of the const memory for the kernel.
+  int32_t getConstOffset(const llvm::StringRef &name) const;
+
+  /// Get the boolean value if this particular constant uses HW or not.
+  bool getConstHWBit(const llvm::StringRef &name) const;
+
+  /// Get a reference to the kernel metadata information for the given function
+  /// name.
+  AMDILKernel *getKernel(const llvm::StringRef &name);
+  bool isKernel(const llvm::StringRef &name) const;
+
+  /// Dump the data section to the output stream for the given kernel.
+  void dumpDataSection(OSTREAM_TYPE &O, AMDILMachineFunctionInfo *mfi);
+
+  /// Iterate through the constants that are global to the compilation unit.
+  StringMap<AMDILConstPtr>::iterator consts_begin();
+  StringMap<AMDILConstPtr>::iterator consts_end();
+  bool consts_empty();
+
+  /// Query if the kernel has a byte store.
+  bool byteStoreExists(llvm::StringRef S) const;
+
+  /// Query if the constant pointer is an argument.
+  bool isConstPtrArgument(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+  /// Query if the constant pointer is an array that is globally scoped.
+  bool isConstPtrArray(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+  /// Query if the constant argument uses hardware or not
+  bool usesHWConstant(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+  /// Query the size of the constant pointer.
+  uint32_t getConstPtrSize(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+  /// Query the offset of the constant pointer.
+  uint32_t getConstPtrOff(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+  /// Query the constant buffer number for a constant pointer.
+  uint32_t getConstPtrCB(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+  /// Query the Value* that the constant pointer originates from.
+  const Value *getConstPtrValue(const AMDILKernel *krnl, const llvm::StringRef &arg);
+
+  /// Get the ID of the argument.
+  int32_t getArgID(const Argument *arg);
+
+  /// Get the unique function ID for the specific function name and create a new
+  /// unique ID if it is not found.
+  uint32_t getOrCreateFunctionID(const GlobalValue* func);
+  uint32_t getOrCreateFunctionID(const std::string &func);
+
+  /// Calculate the offsets of the constant pool for the given kernel and
+  /// machine function.
+  void calculateCPOffsets(const MachineFunction *MF, AMDILKernel *krnl);
+
+  void add_printf_offset(uint32_t offset) {
+    mPrintfOffset += offset;
+  }
+  uint32_t get_printf_offset() {
+    return mPrintfOffset;
+  }
+
+  std::set<std::string>* getSamplerForKernel(llvm::StringRef &kernelName);
+
+private:
+  /// Various functions that parse global value information and store them in
+  /// the global manager. This approach is used instead of dynamic parsing as it
+  /// might require more space, but should allow caching of data that gets
+  /// requested multiple times.
+  AMDILKernelAttr parseSGV(const GlobalValue *GV);
+
+  // Read the LVGV or RVGV annotation
+  AMDILLocalArg parseXVGV(const GlobalValue *GV);
+  void parseGlobalAnnotate(const GlobalValue *G);
+  void parseImageAnnotate(const GlobalValue *G);
+  void parseSamplerAnnotate(const GlobalValue *GV);
+  void parseConstantPtrAnnotate(const GlobalValue *G);
+  void parseIgnoredGlobal(const GlobalValue *G);
+  void printConstantValue(const Constant *CAval,
+                          OSTREAM_TYPE& O,
+                          bool asByte);
+
+
+  // parse the local and region operands for parseKernelInformation
+  AMDILLocalArg* parseKernelLRInfo(AMDILKernel *kernel, const Constant *CV);
+  void parseKernelInformation(const Value *V);
+  void parseAutoArray(const GlobalValue *G, bool isRegion);
+  void parseConstantPtr(const GlobalValue *G);
+  void allocateGlobalCB();
+  void dumpDataToCB(OSTREAM_TYPE &O, AMDILMachineFunctionInfo *mfi, uint32_t id);
+  bool checkConstPtrsUseHW(Module::const_iterator *F);
+
+  llvm::StringMap<AMDILKernel*> mKernels;
+  llvm::StringMap<AMDILKernelAttr> mKernelArgs;
+  llvm::StringMap<AMDILArrayMem> mArrayMems;
+  llvm::StringMap<AMDILConstPtr> mConstMems;
+  llvm::StringMap<AMDILLocalArg> mLocalArgs;
+  llvm::StringMap<uint32_t> mFuncNames;
+  llvm::DenseMap<const GlobalValue*, uint32_t> mFuncPtrNames;
+  llvm::DenseMap<uint32_t, llvm::StringRef> mImageNameMap;
+  llvm::StringMap<std::set<std::string> > mSamplerSet;
+  std::set<llvm::StringRef> mByteStore;
+  std::set<llvm::StringRef> mIgnoreStr;
+  llvm::DenseMap<const Argument *, int32_t> mArgIDMap;
+  const char *symTab;
+  const AMDILSubtarget *mSTM;
+  const TargetMachine *TM;
+  size_t mOffset;
+  uint32_t mReservedBuffs;
+  uint32_t mCurrentCPOffset;
+  uint32_t mPrintfOffset;
+  bool mProcessed;
+};
+
+
+
+} // end namespace llvm
+
+#endif // _AMDIL_COFF_MACHINE_MODULE_INFO_H_
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMultiClass.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMultiClass.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMultiClass.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMultiClass.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,1702 @@
+//===-- AMDILMultiClass.td ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+multiclass BranchConditional<SDNode Op> {
+  def _i8 : ILFormat<IL_OP_IFC, (outs),
+      (ins brtarget:$target, GPRI8:$src0),
+      "; i8 Pseudo branch instruction",
+      [(Op bb:$target, GPRI8:$src0)]>;
+  def _i16 : ILFormat<IL_OP_IFC, (outs),
+      (ins brtarget:$target, GPRI16:$src0),
+      "; i16 Pseudo branch instruction",
+      [(Op bb:$target, GPRI16:$src0)]>;
+  def _i32 : ILFormat<IL_OP_IFC, (outs),
+      (ins brtarget:$target, GPRI32:$src0),
+      "; i32 Pseudo branch instruction",
+      [(Op bb:$target, GPRI32:$src0)]>;
+  def _f32 : ILFormat<IL_OP_IFC, (outs),
+      (ins brtarget:$target, GPRF32:$src0),
+      "; f32 Pseudo branch instruction",
+      [(Op bb:$target, GPRF32:$src0)]>;
+  def _i64 : ILFormat<IL_OP_IFC, (outs),
+      (ins brtarget:$target, GPRI64:$src0),
+      "; i64 Pseudo branch instruction",
+      [(Op bb:$target, (i64 GPRI64:$src0))]>;
+  def _f64 : ILFormat<IL_OP_IFC, (outs),
+      (ins brtarget:$target, GPRF64:$src0),
+      "; f64 Pseudo branch instruction",
+      [(Op bb:$target, (f64 GPRF64:$src0))]>;
+}
+// Multiclass that handles compare instructions
+// When a definition is added here, a corrosponding defition
+// needs to be added at:
+// AMDILISelLowering.cpp at EmitInstrWithCustomInserter
+multiclass Compare<string asm> {
+  def _i8 : ILFormat<IL_OP_CMP, (outs GPRI8:$dst),
+      (ins i32imm:$cc, GPRI8:$src0, GPRI8:$src1),
+      !strconcat("; i8 ", asm),
+      [(set GPRI8:$dst, (IL_cmp imm:$cc, GPRI8:$src0, GPRI8:$src1))]>;
+  def _i16 : ILFormat<IL_OP_CMP, (outs GPRI16:$dst),
+      (ins i32imm:$cc, GPRI16:$src0, GPRI16:$src1),
+      !strconcat("; i16 ", asm),
+      [(set GPRI16:$dst, (IL_cmp imm:$cc, GPRI16:$src0, GPRI16:$src1))]>;
+  def _i32 : ILFormat<IL_OP_CMP, (outs GPRI32:$dst),
+      (ins i32imm:$cc, GPRI32:$src0, GPRI32:$src1),
+      !strconcat("; i32 ", asm),
+      [(set GPRI32:$dst, (IL_cmp imm:$cc, GPRI32:$src0, GPRI32:$src1))]>;
+  def _i64 : ILFormat<IL_OP_CMP, (outs GPRI64:$dst),
+      (ins i32imm:$cc, GPRI64:$src0, GPRI64:$src1),
+      !strconcat("; i64 ", asm),
+      [(set GPRI64:$dst, (IL_cmp imm:$cc, GPRI64:$src0, GPRI64:$src1))]>;
+  def _f32 : ILFormat<IL_OP_CMP, (outs GPRF32:$dst),
+      (ins i32imm:$cc, GPRF32:$src0, GPRF32:$src1),
+      !strconcat("; f32 ", asm),
+      [(set GPRF32:$dst, (IL_cmp imm:$cc, GPRF32:$src0, GPRF32:$src1))]>;
+  def _f64 : ILFormat<IL_OP_CMP, (outs GPRF64:$dst),
+      (ins i32imm:$cc, GPRF64:$src0, GPRF64:$src1),
+      !strconcat("; f64 ", asm),
+      [(set GPRF64:$dst, (IL_cmp imm:$cc, GPRF64:$src0, GPRF64:$src1))]>;
+  def _v2i8 : ILFormat<IL_OP_CMP, (outs GPRV2I8:$dst),
+      (ins i32imm:$cc, GPRV2I8:$src0, GPRV2I8:$src1),
+      !strconcat("; i8 ", asm),
+      [(set GPRV2I8:$dst, (IL_cmp imm:$cc, GPRV2I8:$src0, GPRV2I8:$src1))]>;
+  def _v2i16 : ILFormat<IL_OP_CMP, (outs GPRV2I16:$dst),
+      (ins i32imm:$cc, GPRV2I16:$src0, GPRV2I16:$src1),
+      !strconcat("; i16 ", asm),
+      [(set GPRV2I16:$dst, (IL_cmp imm:$cc, GPRV2I16:$src0, GPRV2I16:$src1))]>;
+  def _v2i32 : ILFormat<IL_OP_CMP, (outs GPRV2I32:$dst),
+      (ins i32imm:$cc, GPRV2I32:$src0, GPRV2I32:$src1),
+      !strconcat("; i32 ", asm),
+      [(set GPRV2I32:$dst, (IL_cmp imm:$cc, GPRV2I32:$src0, GPRV2I32:$src1))]>;
+  def _v2i64 : ILFormat<IL_OP_CMP, (outs GPRV2I64:$dst),
+      (ins i32imm:$cc, GPRV2I64:$src0, GPRV2I64:$src1),
+      !strconcat("; i64 ", asm),
+      [(set GPRV2I64:$dst, (IL_cmp imm:$cc, GPRV2I64:$src0, GPRV2I64:$src1))]>;
+  def _v2f32 : ILFormat<IL_OP_CMP, (outs GPRV2F32:$dst),
+      (ins i32imm:$cc, GPRV2F32:$src0, GPRV2F32:$src1),
+      !strconcat("; f32 ", asm),
+      [(set GPRV2F32:$dst, (IL_cmp imm:$cc, GPRV2F32:$src0, GPRV2F32:$src1))]>;
+  def _v2f64 : ILFormat<IL_OP_CMP, (outs GPRV2F64:$dst),
+      (ins i32imm:$cc, GPRV2F64:$src0, GPRV2F64:$src1),
+      !strconcat("; f64 ", asm),
+      [(set GPRV2F64:$dst, (IL_cmp imm:$cc, GPRV2F64:$src0, GPRV2F64:$src1))]>;
+  def _v4i8 : ILFormat<IL_OP_CMP, (outs GPRV4I8:$dst),
+      (ins i32imm:$cc, GPRV4I8:$src0, GPRV4I8:$src1),
+      !strconcat("; i8 ", asm),
+      [(set GPRV4I8:$dst, (IL_cmp imm:$cc, GPRV4I8:$src0, GPRV4I8:$src1))]>;
+  def _v4i16 : ILFormat<IL_OP_CMP, (outs GPRV4I16:$dst),
+      (ins i32imm:$cc, GPRV4I16:$src0, GPRV4I16:$src1),
+      !strconcat("; i16 ", asm),
+      [(set GPRV4I16:$dst, (IL_cmp imm:$cc, GPRV4I16:$src0, GPRV4I16:$src1))]>;
+  def _v4i32 : ILFormat<IL_OP_CMP, (outs GPRV4I32:$dst),
+      (ins i32imm:$cc, GPRV4I32:$src0, GPRV4I32:$src1),
+      !strconcat("; i32 ", asm),
+      [(set GPRV4I32:$dst, (IL_cmp imm:$cc, GPRV4I32:$src0, GPRV4I32:$src1))]>;
+  def _v4f32 : ILFormat<IL_OP_CMP, (outs GPRV4F32:$dst),
+      (ins i32imm:$cc, GPRV4F32:$src0, GPRV4F32:$src1),
+      !strconcat("; f32 ", asm),
+      [(set GPRV4F32:$dst, (IL_cmp imm:$cc, GPRV4F32:$src0, GPRV4F32:$src1))]>;
+}
+
+// Multiclass that handles constant values
+multiclass ILConstant<string asm> {
+  def _i8 : ILFormat<IL_OP_MOV, (outs GPRI8:$dst),
+      (ins i8imm:$val),
+      asm, [(set GPRI8:$dst, imm:$val)]>;
+
+  //  def _v2i8 : ILFormat<IL_OP_MOV, (outs GPRV2I8:$dst),
+  //      (ins i8imm:$val),
+  //      asm, [(set GPRV2I8:$dst, GPRV2I8:$val)]>;
+
+  //def _v4i8 : ILFormat<IL_OP_MOV, (outs GPRV4I8:$dst),
+  //(ins i8imm:$val),
+  //asm, [(set GPRV4I8:$dst, GPRV4I8:$val)]>;
+
+  def _i16 : ILFormat<IL_OP_MOV, (outs GPRI16:$dst),
+      (ins i16imm:$val),
+      asm, [(set GPRI16:$dst, imm:$val)]>;
+
+  //  def _v2i16 : ILFormat<IL_OP_MOV, (outs GPRV2I16:$dst),
+  //      (ins i16imm:$val),
+  //      asm, [(set GPRV2I16:$dst, GPRV2I16:$val)]>;
+
+  //  def _v4i16 : ILFormat<IL_OP_MOV, (outs GPRV4I16:$dst),
+  //      (ins i16imm:$val),
+  //      asm, [(set GPRV4I16:$dst, GPRV4I16:$val)]>;
+
+  def _i32 : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
+      (ins i32imm:$val),
+      asm, [(set GPRI32:$dst, imm:$val)]>;
+
+  //  def _v2i32 : ILFormat<IL_OP_MOV, (outs GPRV2I32:$dst),
+  //      (ins i32imm:$val),
+  //      asm, [(set GPRV2I32:$dst, GPRV2I32:$val)]>;
+
+  //  def _v4i32 : ILFormat<IL_OP_MOV, (outs GPRV4I32:$dst),
+  //      (ins GPRV4I32:$val),
+  //      asm, [(set GPRV4I32:$dst, GPRV4I32:$val)]>;
+
+  def _i64 : ILFormat<IL_OP_MOV, (outs GPRI64:$dst),
+      (ins i64imm:$val),
+      asm, [(set GPRI64:$dst, imm:$val)]>;
+
+  //  def _v2i64 : ILFormat<IL_OP_MOV, (outs GPRV2I64:$dst),
+  //      (ins i64imm:$val),
+  //      asm, [(set GPRV2I64:$dst, GPRV2I64:$val)]>;
+
+  def _f32 : ILFormat<IL_OP_MOV, (outs GPRF32:$dst),
+      (ins f32imm:$val),
+      asm, [(set GPRF32:$dst, fpimm:$val)]>;
+
+  //  def _v2f32 : ILFormat<IL_OP_MOV, (outs GPRV2F32:$dst),
+  //      (ins f32imm:$val),
+  //      asm, [(set GPRV2F32:$dst, GPRV2F32:$val)]>;
+
+  //  def _v4f32 : ILFormat<IL_OP_MOV, (outs GPRV4F32:$dst),
+  //      (ins f32imm:$val),
+  //      asm, [(set GPRV4F32:$dst, GPRV4F32:$val)]>;
+
+  def _f64 : ILFormat<IL_OP_MOV, (outs GPRF64:$dst),
+      (ins f64imm:$val),
+      asm, [(set GPRF64:$dst, fpimm:$val)]>;
+
+  //  def _v2f64 : ILFormat<IL_OP_MOV, (outs GPRV2F64:$dst),
+  //      (ins f64imm:$val),
+  //        asm, [(set GPRV2F64:$dst, GPRV2F64:$val)]>;
+
+}
+
+// Multiclass that handles memory store operations
+multiclass GTRUNCSTORE<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass LTRUNCSTORE<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass PTRUNCSTORE<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass RTRUNCSTORE<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i8trunc_store GPRI16:$val, ADDR:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i8trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i8trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i16trunc_store GPRI32:$val, ADDR:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i16trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i32trunc_store GPRI64:$val, ADDR:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_f32trunc_store GPRF64:$val, ADDR:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i8trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v4i8trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i8trunc_store GPRV2I16:$val, ADDR:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v4i8trunc_store GPRV4I16:$val, ADDR:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i16trunc_store GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v4i16trunc_store GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2f32trunc_store GPRV2F64:$val, ADDR:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i8trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i16trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i32trunc_store GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+
+// Multiclass that handles memory store operations
+multiclass STORE<string asm, PatFrag OpNode> {
+  def _i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI8:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI8:$val, ADDR:$ptr)]>;
+  def _i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI16:$val, ADDR:$ptr)]>;
+  def _i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI32:$val, ADDR:$ptr)]>;
+  def _f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF32:$val, ADDR:$ptr)]>;
+  def _i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI64:$val, ADDR:$ptr)]>;
+  def _f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF64:$val, ADDR:$ptr)]>;
+  def _v4f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4F32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4F32:$val, ADDR:$ptr)]>;
+  def _v2f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F32:$val, ADDR:$ptr)]>;
+  def _v4i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I32:$val, ADDR:$ptr)]>;
+  def _v2i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I8:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I8:$val, ADDR:$ptr)]>;
+  def _v2i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I16:$val, ADDR:$ptr)]>;
+  def _v4i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I8:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I8:$val, ADDR:$ptr)]>;
+  def _v4i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I16:$val, ADDR:$ptr)]>;
+  def _v2i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I32:$val, ADDR:$ptr)]>;
+  def _v2f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F64:$val, ADDR:$ptr)]>;
+  def _v2i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3232:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I64:$val, ADDR:$ptr)]>;
+}
+
+// Multiclass that handles load operations
+multiclass LOAD<string asm, PatFrag OpNode> {
+  def _i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI8:$dst, (OpNode ADDR:$ptr))]>;
+  def _i16 : OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI16:$dst, (OpNode ADDR:$ptr))]>;
+  def _i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI32:$dst, (OpNode ADDR:$ptr))]>;
+  def _f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF32:$dst, (OpNode ADDR:$ptr))]>;
+  def _i64 : OneInOneOut<IL_OP_MOV, (outs GPRI64:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI64:$dst, (OpNode ADDR:$ptr))]>;
+  def _f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF64:$dst, (OpNode ADDR:$ptr))]>;
+  def _v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4F32:$dst, (OpNode ADDR:$ptr))]>;
+  def _v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F32:$dst, (OpNode ADDR:$ptr))]>;
+  def _v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F64:$dst, (OpNode ADDR:$ptr))]>;
+  def _v4i32 : OneInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I32:$dst, (OpNode ADDR:$ptr))]>;
+  def _v2i8 : OneInOneOut<IL_OP_MOV, (outs GPRV2I8:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I8:$dst, (OpNode ADDR:$ptr))]>;
+  def _v2i16 : OneInOneOut<IL_OP_MOV, (outs GPRV2I16:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I16:$dst, (OpNode ADDR:$ptr))]>;
+  def _v4i8 : OneInOneOut<IL_OP_MOV, (outs GPRV4I8:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I8:$dst, (OpNode ADDR:$ptr))]>;
+  def _v4i16 : OneInOneOut<IL_OP_MOV, (outs GPRV4I16:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I16:$dst, (OpNode ADDR:$ptr))]>;
+  def _v2i32 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I32:$dst, (OpNode ADDR:$ptr))]>;
+  def _v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I64:$dst), (ins MEM3232:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I64:$dst, (OpNode ADDR:$ptr))]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass GTRUNCSTORE64<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(global_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass LTRUNCSTORE64<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(local_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass PTRUNCSTORE64<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(private_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+// Multiclass that handles memory store operations
+multiclass RTRUNCSTORE64<string asm> {
+  def _i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i8trunc_store GPRI16:$val, ADDR64:$ptr)]>;
+  def _i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i8trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i8trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i16trunc_store GPRI32:$val, ADDR64:$ptr)]>;
+  def _i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i16trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_i32trunc_store GPRI64:$val, ADDR64:$ptr)]>;
+  def _f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_f32trunc_store GPRF64:$val, ADDR64:$ptr)]>;
+  def _v2i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i8trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v4i8trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i8trunc_store GPRV2I16:$val, ADDR64:$ptr)]>;
+  def _v4i16i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v4i8trunc_store GPRV4I16:$val, ADDR64:$ptr)]>;
+  def _v2i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i16trunc_store GPRV2I32:$val, ADDR64:$ptr)]>;
+  def _v4i32i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v4i16trunc_store GPRV4I32:$val, ADDR64:$ptr)]>;
+  def _v2f64f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2f32trunc_store GPRV2F64:$val, ADDR64:$ptr)]>;
+  def _v2i64i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i8trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i16trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+  def _v2i64i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(region_v2i32trunc_store GPRV2I64:$val, ADDR64:$ptr)]>;
+}
+
+
+// Multiclass that handles memory store operations
+multiclass STORE64<string asm, PatFrag OpNode> {
+  def STORE64_i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI8:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI8:$val, ADDR64:$ptr)]>;
+  def STORE64_i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI16:$val, ADDR64:$ptr)]>;
+  def STORE64_i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI32:$val, ADDR64:$ptr)]>;
+  def STORE64_f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF32:$val, ADDR64:$ptr)]>;
+  def STORE64_i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI64:$val, ADDR64:$ptr)]>;
+  def STORE64_f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF64:$val, ADDR64:$ptr)]>;
+  def STORE64_v4f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4F32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4F32:$val, ADDR64:$ptr)]>;
+  def STORE64_v2f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F32:$val, ADDR64:$ptr)]>;
+  def STORE64_v4i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I32:$val, ADDR64:$ptr)]>;
+  def STORE64_v2i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I8:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I8:$val, ADDR64:$ptr)]>;
+  def STORE64_v2i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I16:$val, ADDR64:$ptr)]>;
+  def STORE64_v4i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I8:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I8:$val, ADDR64:$ptr)]>;
+  def STORE64_v4i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I16:$val, ADDR64:$ptr)]>;
+  def STORE64_v2i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I32:$val, ADDR64:$ptr)]>;
+  def STORE64_v2f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F64:$val, ADDR64:$ptr)]>;
+  def STORE64_v2i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6464:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I64:$val, ADDR64:$ptr)]>;
+  /*
+  def STORE6432_i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI8:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI8:$val, ADDR64:$ptr)]>;
+  def STORE6432_i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI16:$val, ADDR64:$ptr)]>;
+  def STORE6432_i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI32:$val, ADDR64:$ptr)]>;
+  def STORE6432_f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF32:$val, ADDR64:$ptr)]>;
+  def STORE6432_i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI64:$val, ADDR64:$ptr)]>;
+  def STORE6432_f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF64:$val, ADDR64:$ptr)]>;
+  def STORE6432_v4f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4F32:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4F32:$val, ADDR64:$ptr)]>;
+  def STORE6432_v2f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F32:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F32:$val, ADDR64:$ptr)]>;
+  def STORE6432_v4i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I32:$val, ADDR64:$ptr)]>;
+  def STORE6432_v2i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I8:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I8:$val, ADDR64:$ptr)]>;
+  def STORE6432_v2i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I16:$val, ADDR64:$ptr)]>;
+  def STORE6432_v4i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I8:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I8:$val, ADDR64:$ptr)]>;
+  def STORE6432_v4i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I16:$val, ADDR64:$ptr)]>;
+  def STORE6432_v2i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I32:$val, ADDR64:$ptr)]>;
+  def STORE6432_v2f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F64:$val, ADDR64:$ptr)]>;
+  def STORE6432_v2i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM6432:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I64:$val, ADDR64:$ptr)]>;
+  def STORE3264_i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI8:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI8:$val, ADDR64:$ptr)]>;
+  def STORE3264_i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI16:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI16:$val, ADDR64:$ptr)]>;
+  def STORE3264_i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI32:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI32:$val, ADDR64:$ptr)]>;
+  def STORE3264_f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF32:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF32:$val, ADDR64:$ptr)]>;
+  def STORE3264_i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRI64:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRI64:$val, ADDR64:$ptr)]>;
+  def STORE3264_f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRF64:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRF64:$val, ADDR64:$ptr)]>;
+  def STORE3264_v4f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4F32:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4F32:$val, ADDR64:$ptr)]>;
+  def STORE3264_v2f32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F32:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F32:$val, ADDR64:$ptr)]>;
+  def STORE3264_v4i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I32:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I32:$val, ADDR64:$ptr)]>;
+  def STORE3264_v2i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I8:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I8:$val, ADDR64:$ptr)]>;
+  def STORE3264_v2i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I16:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I16:$val, ADDR64:$ptr)]>;
+  def STORE3264_v4i8 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I8:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I8:$val, ADDR64:$ptr)]>;
+  def STORE3264_v4i16 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV4I16:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV4I16:$val, ADDR64:$ptr)]>;
+  def STORE3264_v2i32 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I32:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I32:$val, ADDR64:$ptr)]>;
+  def STORE3264_v2f64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2F64:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2F64:$val, ADDR64:$ptr)]>;
+  def STORE3264_v2i64 : OneInOneOut<IL_OP_MOV, (outs), (ins GPRV2I64:$val, MEM3264:$ptr),
+      !strconcat(asm, " $val $ptr"),
+      [(OpNode GPRV2I64:$val, ADDR64:$ptr)]>;
+  */
+}
+
+// Multiclass that handles load operations
+multiclass LOAD64<string asm, PatFrag OpNode> {
+  def LOAD64_i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_i16 : OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_i64 : OneInOneOut<IL_OP_MOV, (outs GPRI64:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4F32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v4i32 : OneInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v2i8 : OneInOneOut<IL_OP_MOV, (outs GPRV2I8:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v2i16 : OneInOneOut<IL_OP_MOV, (outs GPRV2I16:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v4i8 : OneInOneOut<IL_OP_MOV, (outs GPRV4I8:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v4i16 : OneInOneOut<IL_OP_MOV, (outs GPRV4I16:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v2i32 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD64_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I64:$dst), (ins MEM6464:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I64:$dst, (OpNode ADDR64:$ptr))]>;
+/*
+  def LOAD6432_i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_i16 : OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_i64 : OneInOneOut<IL_OP_MOV, (outs GPRI64:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4F32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v4i32 : OneInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v2i8 : OneInOneOut<IL_OP_MOV, (outs GPRV2I8:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v2i16 : OneInOneOut<IL_OP_MOV, (outs GPRV2I16:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v4i8 : OneInOneOut<IL_OP_MOV, (outs GPRV4I8:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v4i16 : OneInOneOut<IL_OP_MOV, (outs GPRV4I16:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v2i32 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD6432_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I64:$dst), (ins MEM6432:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_i16 : OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_i32 : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_i64 : OneInOneOut<IL_OP_MOV, (outs GPRI64:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRI64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRF64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4F32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2F64:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v4i32 : OneInOneOut<IL_OP_MOV, (outs GPRV4I32:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v2i8 : OneInOneOut<IL_OP_MOV, (outs GPRV2I8:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v2i16 : OneInOneOut<IL_OP_MOV, (outs GPRV2I16:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v4i8 : OneInOneOut<IL_OP_MOV, (outs GPRV4I8:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I8:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v4i16 : OneInOneOut<IL_OP_MOV, (outs GPRV4I16:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV4I16:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v2i32 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I32:$dst, (OpNode ADDR64:$ptr))]>;
+  def LOAD3264_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I64:$dst), (ins MEM3264:$ptr),
+      !strconcat(asm, " $dst $ptr"),
+      [(set GPRV2I64:$dst, (OpNode ADDR64:$ptr))]>;
+  */
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<ILOpCode opc> {
+  def _i8 : UnaryOpNoRet<opc, (outs), (ins GPRI8:$src),
+      !strconcat(opc.Text, " $src"), []>;
+  def _i16 : UnaryOpNoRet<opc, (outs), (ins GPRI16:$src),
+      !strconcat(opc.Text, " $src"), []>;
+  def _i32 : UnaryOpNoRet<opc, (outs), (ins GPRI32:$src),
+      !strconcat(opc.Text, " $src"), []>;
+  def _i64 : UnaryOpNoRet<opc, (outs), (ins GPRI64:$src),
+      !strconcat(opc.Text, " $src"), []>;
+  def _f32 : UnaryOpNoRet<opc, (outs), (ins GPRF32:$src),
+      !strconcat(opc.Text, " $src"), []>;
+  def _f64 : UnaryOpNoRet<opc, (outs), (ins GPRF64:$src),
+      !strconcat(opc.Text, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<ILOpCode opc> {
+  def _i8 : BinaryOpNoRet<opc, (outs),  (ins  GPRI8:$src0,  GPRI8:$src1),
+      !strconcat(opc.Text, " $src0, $src1"), []>;
+  def _i16 : BinaryOpNoRet<opc, (outs), (ins GPRI16:$src0, GPRI16:$src1),
+      !strconcat(opc.Text, " $src0, $src1"), []>;
+  def _i32 : BinaryOpNoRet<opc, (outs), (ins GPRI32:$src0, GPRI32:$src1),
+      !strconcat(opc.Text, " $src0, $src1"), []>;
+  def _i64 : BinaryOpNoRet<opc, (outs), (ins GPRI64:$src0, GPRI64:$src1),
+      !strconcat(opc.Text, " $src0, $src1"), []>;
+  def _f32 : BinaryOpNoRet<opc, (outs), (ins GPRF32:$src0, GPRF32:$src1),
+      !strconcat(opc.Text, " $src0, $src1"), []>;
+  def _f64 : BinaryOpNoRet<opc, (outs), (ins GPRF64:$src0, GPRF64:$src1),
+      !strconcat(opc.Text, " $src0, $src1"), []>;
+}
+
+// Class that handles the various vector extract patterns
+multiclass VectorExtract<SDNode OpNode> {
+  def _v2f64 : ExtractVectorClass<GPRF64, GPRV2F64, OpNode>;
+  def _v4f32: ExtractVectorClass<GPRF32, GPRV4F32, OpNode>;
+  def _v2f32 : ExtractVectorClass<GPRF32, GPRV2F32, OpNode>;
+  def _v2i64 : ExtractVectorClass<GPRI64, GPRV2I64, OpNode>;
+  def _v4i8 : ExtractVectorClass<GPRI8, GPRV4I8, OpNode>;
+  def _v4i16 : ExtractVectorClass<GPRI16, GPRV4I16, OpNode>;
+  def _v4i32 : ExtractVectorClass<GPRI32, GPRV4I32, OpNode>;
+  def _v2i8 : ExtractVectorClass<GPRI8, GPRV2I8, OpNode>;
+  def _v2i16 : ExtractVectorClass<GPRI16, GPRV2I16, OpNode>;
+  def _v2i32 : ExtractVectorClass<GPRI32, GPRV2I32, OpNode>;
+}
+
+multiclass VectorConcat<SDNode OpNode> {
+  def _v2f64 : VectorConcatClass<GPRV2F64, GPRF64, OpNode>;
+  def _v2i64 : VectorConcatClass<GPRV2F64, GPRI64, OpNode>;
+  def _v4f32 : VectorConcatClass<GPRV4F32, GPRV2F32, OpNode>;
+  def _v4i32 : VectorConcatClass<GPRV4I32, GPRV2I32, OpNode>;
+  def _v4i16 : VectorConcatClass<GPRV4I16, GPRV2I16, OpNode>;
+  def _v4i8 : VectorConcatClass<GPRV4I8, GPRV2I8, OpNode>;
+  def _v2f32 : VectorConcatClass<GPRV2F32, GPRF32, OpNode>;
+  def _v2i32 : VectorConcatClass<GPRV2I32, GPRI32, OpNode>;
+  def _v2i16 : VectorConcatClass<GPRV2I16, GPRI16, OpNode>;
+  def _v2i8 : VectorConcatClass<GPRV2I8, GPRI8, OpNode>;
+}
+
+// Class that handles the various vector insert patterns
+multiclass VectorInsert<SDNode OpNode> {
+  def _v2f64 : InsertVectorClass<IL_OP_I_ADD, GPRV2F64,
+      GPRF64, OpNode, "iadd">;
+  def _v4f32: InsertVectorClass<IL_OP_I_ADD, GPRV4F32,
+      GPRF32, OpNode, "iadd">;
+  def _v2f32 : InsertVectorClass<IL_OP_I_ADD, GPRV2F32,
+      GPRF32, OpNode, "iadd">;
+  def _v2i64 : InsertVectorClass<IL_OP_I_ADD, GPRV2I64,
+      GPRI64, OpNode, "iadd">;
+  def _v4i8 : InsertVectorClass<IL_OP_I_ADD, GPRV4I8,
+      GPRI8, OpNode, "iadd">;
+  def _v4i16 : InsertVectorClass<IL_OP_I_ADD, GPRV4I16,
+      GPRI16, OpNode, "iadd">;
+  def _v4i32 : InsertVectorClass<IL_OP_I_ADD, GPRV4I32,
+      GPRI32, OpNode, "iadd">;
+  def _v2i8 : InsertVectorClass<IL_OP_I_ADD, GPRV2I8,
+      GPRI8, OpNode, "iadd">;
+  def _v2i16 : InsertVectorClass<IL_OP_I_ADD, GPRV2I16,
+      GPRI16, OpNode, "iadd">;
+  def _v2i32 : InsertVectorClass<IL_OP_I_ADD, GPRV2I32,
+      GPRI32, OpNode, "iadd">;
+}
+
+// generic class that handles math instruction for OneInOneOut instruction
+// patterns
+multiclass UnaryOpMC<ILOpCode OpCode, SDNode OpNode> {
+  def _i8    : UnaryOp<OpCode, OpNode, GPRI8, GPRI8>;
+  def _i16    : UnaryOp<OpCode, OpNode, GPRI16, GPRI16>;
+  def _i32    : UnaryOp<OpCode, OpNode, GPRI32, GPRI32>;
+  def _f32    : UnaryOp<OpCode, OpNode, GPRF32, GPRF32>;
+  def _f64    : UnaryOp<OpCode, OpNode, GPRF64, GPRF64>;
+  def _i64    : UnaryOp<OpCode, OpNode, GPRI64, GPRI64>;
+  def _v4f32: UnaryOp<OpCode, OpNode, GPRV4F32, GPRV4F32>;
+  def _v4i16  : UnaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16>;
+  def _v4i8  : UnaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8>;
+  def _v4i32  : UnaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32>;
+  def _v2f32  : UnaryOp<OpCode, OpNode, GPRV2F32, GPRV2F32>;
+  def _v2i16  : UnaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16>;
+  def _v2i8  : UnaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8>;
+  def _v2i32  : UnaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32>;
+  def _v2f64  : UnaryOp<OpCode, OpNode, GPRV2F64, GPRV2F64>;
+  def _v2i64  : UnaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64>;
+}
+multiclass UnaryOpMCVec<ILOpCode OpCode, SDNode OpNode> {
+  def _v4f32: UnaryOp<OpCode, OpNode, GPRV4F32, GPRF32>;
+  def _v4i16  : UnaryOp<OpCode, OpNode, GPRV4I16, GPRI16>;
+  def _v4i8  : UnaryOp<OpCode, OpNode, GPRV4I8, GPRI8>;
+  def _v4i32  : UnaryOp<OpCode, OpNode, GPRV4I32, GPRI32>;
+  def _v2f32  : UnaryOp<OpCode, OpNode, GPRV2F32, GPRF32>;
+  def _v2i16  : UnaryOp<OpCode, OpNode, GPRV2I16, GPRI16>;
+  def _v2i8  : UnaryOp<OpCode, OpNode, GPRV2I8, GPRI8>;
+  def _v2i32  : UnaryOp<OpCode, OpNode, GPRV2I32, GPRI32>;
+  def _v2f64  : UnaryOp<OpCode, OpNode, GPRV2F64, GPRF64>;
+  def _v2i64  : UnaryOp<OpCode, OpNode, GPRV2I64, GPRI64>;
+}
+
+multiclass UnaryOpMCf32< ILOpCode f32OpCode, SDNode OpNode> {
+  def _f32    : UnaryOp<f32OpCode, OpNode, GPRF32, GPRF32>;
+  def _v4f32: UnaryOp<f32OpCode, OpNode, GPRV4F32, GPRV4F32>;
+  def _v2f32  : UnaryOp<f32OpCode, OpNode, GPRV2F32, GPRV2F32>;
+}
+
+
+multiclass UnaryOpMCf64<ILOpCode f64OpCode, SDNode OpNode> {
+  def _f64    : UnaryOp<f64OpCode, OpNode, GPRF64, GPRF64>;
+}
+
+multiclass UnaryOpMCi32< ILOpCode i32OpCode, SDNode OpNode> {
+  def _i8    : UnaryOp<i32OpCode, OpNode, GPRI8, GPRI8>;
+  def _i16    : UnaryOp<i32OpCode, OpNode, GPRI16, GPRI16>;
+  def _i32    : UnaryOp<i32OpCode, OpNode, GPRI32, GPRI32>;
+  def _v4i16  : UnaryOp<i32OpCode, OpNode, GPRV4I16, GPRV4I16>;
+  def _v4i8  : UnaryOp<i32OpCode, OpNode, GPRV4I8, GPRV4I8>;
+  def _v4i32  : UnaryOp<i32OpCode, OpNode, GPRV4I32, GPRV4I32>;
+  def _v2i16  : UnaryOp<i32OpCode, OpNode, GPRV2I16, GPRV2I16>;
+  def _v2i8  : UnaryOp<i32OpCode, OpNode, GPRV2I8, GPRV2I8>;
+  def _v2i32  : UnaryOp<i32OpCode, OpNode, GPRV2I32, GPRV2I32>;
+}
+
+
+multiclass BinaryOpMC<ILOpCode OpCode, SDNode OpNode> {
+  def _i8    : BinaryOp<OpCode, OpNode, GPRI8, GPRI8, GPRI8>;
+
+  def _i16    : BinaryOp<OpCode, OpNode, GPRI16, GPRI16, GPRI16>;
+  def _i32    : BinaryOp<OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+  def _f32    : BinaryOp<OpCode, OpNode, GPRF32, GPRF32, GPRF32>;
+  def _f64    : BinaryOp<OpCode, OpNode, GPRF64, GPRF64, GPRF64>;
+  def _i64    : BinaryOp<OpCode, OpNode, GPRI64, GPRI64, GPRI64>;
+  def _v4f32: BinaryOp<OpCode, OpNode, GPRV4F32, GPRV4F32, GPRV4F32>;
+  def _v4i16  : BinaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16, GPRV4I16>;
+  def _v4i8  : BinaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8, GPRV4I8>;
+  def _v4i32  : BinaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32, GPRV4I32>;
+  def _v2f32  : BinaryOp<OpCode, OpNode, GPRV2F32, GPRV2F32, GPRV2F32>;
+  def _v2i16  : BinaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16, GPRV2I16>;
+  def _v2i8  : BinaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8, GPRV2I8>;
+  def _v2i32  : BinaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32, GPRV2I32>;
+  def _v2f64  : BinaryOp<OpCode, OpNode, GPRV2F64, GPRV2F64, GPRV2F64>;
+  def _v2i64  : BinaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64, GPRV2I64>;
+}
+
+multiclass BinaryOpMCInt<ILOpCode OpCode, SDNode OpNode> {
+  def _i8    : BinaryOp<OpCode, OpNode, GPRI8, GPRI8, GPRI8>;
+
+  def _i16    : BinaryOp<OpCode, OpNode, GPRI16, GPRI16, GPRI16>;
+  def _i32    : BinaryOp<OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+  def _i64    : BinaryOp<OpCode, OpNode, GPRI64, GPRI64, GPRI64>;
+  def _v4i16  : BinaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16, GPRV4I16>;
+  def _v4i8  : BinaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8, GPRV4I8>;
+  def _v4i32  : BinaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32, GPRV4I32>;
+  def _v2i16  : BinaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16, GPRV2I16>;
+  def _v2i8  : BinaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8, GPRV2I8>;
+  def _v2i32  : BinaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32, GPRV2I32>;
+  def _v2i64  : BinaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64, GPRV2I64>;
+}
+
+// generic class that handles math instruction for ThreeInOneOut
+// instruction patterns
+multiclass TernaryOpMC<ILOpCode OpCode, SDNode OpNode> {
+  def _i8    : TernaryOp<OpCode, OpNode, GPRI8, GPRI8, GPRI8, GPRI8>;
+  def _i16    : TernaryOp<OpCode, OpNode, GPRI16, GPRI16, GPRI16, GPRI16>;
+  def _i32    : TernaryOp<OpCode, OpNode, GPRI32, GPRI32, GPRI32, GPRI32>;
+  def _f32    : TernaryOp<OpCode, OpNode, GPRF32, GPRF32, GPRF32, GPRF32>;
+  def _f64    : TernaryOp<OpCode, OpNode, GPRF64, GPRF64, GPRF64, GPRF64>;
+  def _i64    : TernaryOp<OpCode, OpNode, GPRI64, GPRI64, GPRI64, GPRI64>;
+  def _v4f32: TernaryOp<OpCode, OpNode, GPRV4F32, GPRV4F32,
+      GPRV4F32, GPRV4F32>;
+  def _v4i8  : TernaryOp<OpCode, OpNode, GPRV4I8, GPRV4I8,
+      GPRV4I8, GPRV4I8>;
+  def _v4i16  : TernaryOp<OpCode, OpNode, GPRV4I16, GPRV4I16,
+      GPRV4I16, GPRV4I16>;
+  def _v4i32  : TernaryOp<OpCode, OpNode, GPRV4I32, GPRV4I32,
+      GPRV4I32, GPRV4I32>;
+  def _v2f32  : TernaryOp<OpCode, OpNode, GPRV2F32, GPRV2F32,
+      GPRV2F32, GPRV2F32>;
+  def _v2i8  : TernaryOp<OpCode, OpNode, GPRV2I8, GPRV2I8,
+      GPRV2I8, GPRV2I8>;
+  def _v2i16  : TernaryOp<OpCode, OpNode, GPRV2I16, GPRV2I16,
+      GPRV2I16, GPRV2I16>;
+  def _v2i32  : TernaryOp<OpCode, OpNode, GPRV2I32, GPRV2I32,
+      GPRV2I32, GPRV2I32>;
+  def _v2f64  : TernaryOp<OpCode, OpNode, GPRV2F64, GPRV2F64,
+      GPRV2F64, GPRV2F64>;
+  def _v2i64  : TernaryOp<OpCode, OpNode, GPRV2I64, GPRV2I64,
+      GPRV2I64, GPRV2I64>;
+}
+multiclass BinaryOpMCi32<ILOpCode i32OpCode, SDNode OpNode> {
+  def _i8    : BinaryOp<i32OpCode, OpNode, GPRI8, GPRI8, GPRI8>;
+  def _i16   : BinaryOp<i32OpCode, OpNode, GPRI16, GPRI16, GPRI16>;
+  def _i32   : BinaryOp<i32OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+  def _v4i16 : BinaryOp<i32OpCode, OpNode, GPRV4I16,
+      GPRV4I16, GPRV4I16>;
+  def _v4i8  : BinaryOp<i32OpCode, OpNode, GPRV4I8,
+      GPRV4I8, GPRV4I8>;
+  def _v4i32 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+      GPRV4I32, GPRV4I32>;
+  def _v2i16 : BinaryOp<i32OpCode, OpNode, GPRV2I16,
+      GPRV2I16, GPRV2I16>;
+  def _v2i8  : BinaryOp<i32OpCode, OpNode, GPRV2I8,
+      GPRV2I8, GPRV2I8>;
+  def _v2i32 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+      GPRV2I32, GPRV2I32>;
+}
+multiclass BinaryOpMCi64<ILOpCode i64OpCode, SDNode OpNode> {
+  def _i64   : BinaryOp<i64OpCode, OpNode, GPRI64, GPRI64, GPRI64>;
+  def _v2i64 : BinaryOp<i64OpCode, OpNode, GPRV2I64,
+      GPRV2I64, GPRV2I64>;
+}
+
+multiclass BinaryOpMCf64<ILOpCode f64OpCode, SDNode OpNode> {
+  def _f64   : BinaryOp<f64OpCode, OpNode, GPRF64, GPRF64, GPRF64>;
+}
+multiclass BinaryOpMCi32Const<ILOpCode i32OpCode, SDNode OpNode> {
+  def _i8    : BinaryOp<i32OpCode, OpNode, GPRI8, GPRI8, GPRI32>;
+  def _i16   : BinaryOp<i32OpCode, OpNode, GPRI16, GPRI16, GPRI32>;
+  def _i32   : BinaryOp<i32OpCode, OpNode, GPRI32, GPRI32, GPRI32>;
+  def _v4i16 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+      GPRV4I32, GPRI32>;
+  def _v4i8  : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+      GPRV4I32, GPRI32>;
+  def _v4i32 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+      GPRV4I32, GPRI32>;
+  def _v2i16 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+      GPRV2I32, GPRI32>;
+  def _v2i8  : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+      GPRV2I32, GPRI32>;
+  def _v2i32 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+      GPRV2I32, GPRI32>;
+}
+multiclass BinaryOpMCi64Const<ILOpCode i32OpCode, SDNode OpNode> {
+  def _i8i64    : BinaryOp<i32OpCode, OpNode, GPRI8, GPRI8, GPRI64>;
+  def _i16i64   : BinaryOp<i32OpCode, OpNode, GPRI16, GPRI16, GPRI64>;
+  def _i32i64   : BinaryOp<i32OpCode, OpNode, GPRI32, GPRI32, GPRI64>;
+  def _v4i16i64 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+      GPRV4I32, GPRI64>;
+  def _v4i8i64  : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+      GPRV4I32, GPRI64>;
+  def _v4i32i64 : BinaryOp<i32OpCode, OpNode, GPRV4I32,
+      GPRV4I32, GPRI64>;
+  def _v2i16i64 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+      GPRV2I32, GPRI64>;
+  def _v2i8i64  : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+      GPRV2I32, GPRI64>;
+  def _v2i32i64 : BinaryOp<i32OpCode, OpNode, GPRV2I32,
+      GPRV2I32, GPRI64>;
+}
+multiclass BinaryOpMCf32<ILOpCode f32OpCode, SDNode OpNode> {
+  def _f32    : BinaryOp<f32OpCode, OpNode, GPRF32,
+      GPRF32, GPRF32>;
+  def _v4f32: BinaryOp<f32OpCode, OpNode, GPRV4F32,
+      GPRV4F32, GPRV4F32>;
+  def _v2f32  : BinaryOp<f32OpCode, OpNode, GPRV2F32,
+      GPRV2F32, GPRV2F32>;
+}
+
+multiclass TernaryOpMCf64<ILOpCode f64OpCode, SDNode OpNode> {
+  def _f64    : TernaryOp<f64OpCode, OpNode, GPRF64,
+      GPRF64, GPRF64, GPRF64>;
+}
+
+multiclass TernaryOpMCf32<ILOpCode f32OpCode, SDNode OpNode> {
+  def _f32    : TernaryOp<f32OpCode, OpNode, GPRF32,
+      GPRF32, GPRF32, GPRF32>;
+  def _v4f32: TernaryOp<f32OpCode, OpNode, GPRV4F32,
+      GPRV4F32, GPRV4F32, GPRV4F32>;
+  def _v2f32  : TernaryOp<f32OpCode, OpNode, GPRV2F32,
+      GPRV2F32, GPRV2F32, GPRV2F32>;
+}
+multiclass BinaryOpMCFloat<ILOpCode f32OpCode, ILOpCode f64OpCode,
+           SDNode OpNode> {
+             def _f64    : BinaryOp<f64OpCode, OpNode, GPRF64,
+             GPRF64, GPRF64>;
+             def _v2f64  : BinaryOp<f64OpCode, OpNode, GPRV2F64,
+                 GPRV2F64, GPRV2F64>;
+             def _f32    : BinaryOp<f32OpCode, OpNode, GPRF32,
+                 GPRF32, GPRF32>;
+             def _v2f32  : BinaryOp<f32OpCode, OpNode, GPRV2F32,
+                 GPRV2F32, GPRV2F32>;
+             def _v4f32: BinaryOp<f32OpCode, OpNode, GPRV4F32,
+                 GPRV4F32, GPRV4F32>;
+           }
+
+multiclass TernaryOpMCScalar<ILOpCode opcode, SDNode node>
+{
+  def _i8:  TernaryOp<opcode, node, GPRI8, GPRI8, GPRI8, GPRI8>;
+  def _i16: TernaryOp<opcode, node, GPRI16, GPRI8, GPRI16, GPRI16>;
+  def _i32: TernaryOp<opcode, node, GPRI32, GPRI8, GPRI32, GPRI32>;
+  def _i64: TernaryOp<opcode, node, GPRI64, GPRI8, GPRI64, GPRI64>;
+  def _f32: TernaryOp<opcode, node, GPRF32, GPRI8, GPRF32, GPRF32>;
+  def _f64: TernaryOp<opcode, node, GPRF64, GPRI8, GPRF64, GPRF64>;
+}
+
+
+multiclass BitConversion<ILOpCode opcode, RegisterClass Regs, SDNode OpNode>
+{
+  def _i8    : UnaryOp<opcode, OpNode, Regs,    GPRI8>;
+  def _i16   : UnaryOp<opcode, OpNode, Regs,   GPRI16>;
+  def _i32   : UnaryOp<opcode, OpNode, Regs,   GPRI32>;
+  def _f32   : UnaryOp<opcode, OpNode, Regs,   GPRF32>;
+  def _i64   : UnaryOp<opcode, OpNode, Regs,   GPRI64>;
+  def _f64   : UnaryOp<opcode, OpNode, Regs,   GPRF64>;
+  def _v2i8  : UnaryOp<opcode, OpNode, Regs,  GPRV2I8>;
+  def _v2i16 : UnaryOp<opcode, OpNode, Regs, GPRV2I16>;
+  def _v2i32 : UnaryOp<opcode, OpNode, Regs, GPRV2I32>;
+  def _v2f32 : UnaryOp<opcode, OpNode, Regs, GPRV2F32>;
+  def _v2i64 : UnaryOp<opcode, OpNode, Regs, GPRV2I64>;
+  def _v2f64 : UnaryOp<opcode, OpNode, Regs, GPRV2F64>;
+  def _v4i8  : UnaryOp<opcode, OpNode, Regs,  GPRV4I8>;
+  def _v4i16 : UnaryOp<opcode, OpNode, Regs, GPRV4I16>;
+  def _v4i32 : UnaryOp<opcode, OpNode, Regs, GPRV4I32>;
+  def _v4f32 : UnaryOp<opcode, OpNode, Regs, GPRV4F32>;
+}
+
+
+multiclass UnaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+  def _i32 : OneInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRI32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRI32:$dst, (intr GPRI32:$src))]>;
+  def _v2i32 : OneInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2I32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2I32:$dst, (intr GPRV2I32:$src))]>;
+  def _v4i32 : OneInOneOut<opcode, (outs GPRV4I32:$dst),
+      (ins GPRV4I32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV4I32:$dst, (intr GPRV4I32:$src))]>;
+}
+
+multiclass IntrConvertF32TOF16<ILOpCode opcode, Intrinsic intr>
+{
+  def _i16 : OneInOneOut<opcode, (outs GPRI16:$dst),
+      (ins GPRF32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRI16:$dst, (intr GPRF32:$src))]>;
+  def _v2i16 : OneInOneOut<opcode, (outs GPRV2I16:$dst),
+      (ins GPRV2F32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2I16:$dst, (intr GPRV2F32:$src))]>;
+  def _v4i16 : OneInOneOut<opcode, (outs GPRV4I16:$dst),
+      (ins GPRV4F32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV4I16:$dst, (intr GPRV4F32:$src))]>;
+}
+
+
+multiclass IntrConvertF32TOI32<ILOpCode opcode, Intrinsic intr>
+{
+  def _i32 : OneInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRF32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRI32:$dst, (intr GPRF32:$src))]>;
+  def _v2i32 : OneInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2F32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2I32:$dst, (intr GPRV2F32:$src))]>;
+  def _v4i32 : OneInOneOut<opcode, (outs GPRV4I32:$dst),
+      (ins GPRV4F32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV4I32:$dst, (intr GPRV4F32:$src))]>;
+}
+
+multiclass IntrConvertF64TOI32<ILOpCode opcode, Intrinsic intr>
+{
+  def _i32 : OneInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRF64:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRI32:$dst, (intr GPRF64:$src))]>;
+  def _v2i32 : OneInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2F64:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2I32:$dst, (intr GPRV2F64:$src))]>;
+}
+
+multiclass IntrConvertF16TOF32<ILOpCode opcode, Intrinsic intr>
+{
+  def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+      (ins GPRI16:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRF32:$dst, (intr GPRI16:$src))]>;
+  def _v2f32 : OneInOneOut<opcode, (outs GPRV2F32:$dst),
+      (ins GPRV2I16:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2F32:$dst, (intr GPRV2I16:$src))]>;
+  def _v4f32 : OneInOneOut<opcode, (outs GPRV4F32:$dst),
+      (ins GPRV4I16:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV4F32:$dst, (intr GPRV4I16:$src))]>;
+}
+
+
+multiclass IntrConvertI32TOF32<ILOpCode opcode, Intrinsic intr>
+{
+  def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+      (ins GPRI32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRF32:$dst, (intr GPRI32:$src))]>;
+  def _v2f32 : OneInOneOut<opcode, (outs GPRV2F32:$dst),
+      (ins GPRV2I32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2F32:$dst, (intr GPRV2I32:$src))]>;
+  def _v4f32 : OneInOneOut<opcode, (outs GPRV4F32:$dst),
+      (ins GPRV4I32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV4F32:$dst, (intr GPRV4I32:$src))]>;
+}
+
+multiclass BinaryIntrinsicLong<ILOpCode opcode, Intrinsic intr>
+{
+  def _i64 : TwoInOneOut<opcode, (outs GPRI64:$dst),
+      (ins GPRI64:$src, GPRI64:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRI64:$dst,
+          (intr GPRI64:$src, GPRI64:$src2))]>;
+}
+
+
+multiclass BinaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+  def _i32 : TwoInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRI32:$src, GPRI32:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRI32:$dst,
+          (intr GPRI32:$src, GPRI32:$src2))]>;
+  def _v2i32 : TwoInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2I32:$src, GPRV2I32:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRV2I32:$dst,
+          (intr GPRV2I32:$src, GPRV2I32:$src2))]>;
+  def _v4i32 : TwoInOneOut<opcode, (outs GPRV4I32:$dst),
+      (ins GPRV4I32:$src, GPRV4I32:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRV4I32:$dst,
+          (intr GPRV4I32:$src, GPRV4I32:$src2))]>;
+}
+
+multiclass TernaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+  def _i32 : ThreeInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRI32:$src, GPRI32:$src2, GPRI32:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRI32:$dst,
+          (intr GPRI32:$src, GPRI32:$src2, GPRI32:$src3))]>;
+  def _v2i32 : ThreeInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRV2I32:$dst,
+          (intr GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3))]>;
+  def _v4i32 : ThreeInOneOut<opcode, (outs GPRV4I32:$dst),
+      (ins GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRV4I32:$dst,
+          (intr GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3))]>;
+}
+
+multiclass TernaryIntrinsicFloat<ILOpCode opcode, Intrinsic intr>
+{
+  def _f32 : ThreeInOneOut<opcode, (outs GPRF32:$dst),
+      (ins GPRF32:$src, GPRF32:$src2, GPRF32:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRF32:$dst,
+          (intr GPRF32:$src, GPRF32:$src2, GPRF32:$src3))]>;
+  def _v2f32 : ThreeInOneOut<opcode, (outs GPRV2F32:$dst),
+      (ins GPRV2F32:$src, GPRV2F32:$src2, GPRV2F32:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRV2F32:$dst,
+          (intr GPRV2F32:$src, GPRV2F32:$src2, GPRV2F32:$src3))]>;
+  def _v4f32 : ThreeInOneOut<opcode, (outs GPRV4F32:$dst),
+      (ins GPRV4F32:$src, GPRV4F32:$src2, GPRV4F32:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRV4F32:$dst,
+          (intr GPRV4F32:$src, GPRV4F32:$src2, GPRV4F32:$src3))]>;
+}
+
+multiclass BinaryIntrinsicDoubleScalar<ILOpCode opcode, Intrinsic intr>
+{
+  def _f64 : ThreeInOneOut<opcode, (outs GPRF64:$dst),
+      (ins GPRF64:$src, GPRF64:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRF64:$dst,
+          (intr GPRF64:$src, GPRF64:$src2))]>;
+}
+
+multiclass TernaryIntrinsicDoubleScalar<ILOpCode opcode, Intrinsic intr>
+{
+  def _f64 : ThreeInOneOut<opcode, (outs GPRF64:$dst),
+      (ins GPRF64:$src, GPRF64:$src2, GPRF64:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRF64:$dst,
+          (intr GPRF64:$src, GPRF64:$src2, GPRF64:$src3))]>;
+}
+
+
+multiclass TernaryIntrinsicLongScalar<ILOpCode opcode, Intrinsic intr>
+{
+  def _i64 : ThreeInOneOut<opcode, (outs GPRI64:$dst),
+      (ins GPRI64:$src, GPRI64:$src2, GPRI64:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRI64:$dst,
+          (intr GPRI64:$src, GPRI64:$src2, GPRI64:$src3))]>;
+}
+
+multiclass QuaternaryIntrinsicInt<ILOpCode opcode, Intrinsic intr>
+{
+  def _i32 : FourInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRI32:$src, GPRI32:$src2, GPRI32:$src3, GPRI32:$src4),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3, $src4"),
+      [(set GPRI32:$dst,
+          (intr GPRI32:$src, GPRI32:$src2, GPRI32:$src3, GPRI32:$src4))]>;
+  def _v2i32 : FourInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3, GPRV2I32:$src4),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3, $src4"),
+      [(set GPRV2I32:$dst,
+        (intr GPRV2I32:$src, GPRV2I32:$src2, GPRV2I32:$src3, GPRV2I32:$src4))]>;
+  def _v4i32 : FourInOneOut<opcode, (outs GPRV4I32:$dst),
+      (ins GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3, GPRV4I32:$src4),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3, $src4"),
+      [(set GPRV4I32:$dst,
+        (intr GPRV4I32:$src, GPRV4I32:$src2, GPRV4I32:$src3, GPRV4I32:$src4))]>;
+}
+
+multiclass UnaryIntrinsicFloatScalar<ILOpCode opcode, Intrinsic intr>
+{
+  def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+      (ins GPRF32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRF32:$dst, (intr GPRF32:$src))]>;
+}
+
+multiclass UnaryIntrinsicFloat<ILOpCode opcode, Intrinsic intr>
+{
+  def _f32 : OneInOneOut<opcode, (outs GPRF32:$dst),
+      (ins GPRF32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRF32:$dst, (intr GPRF32:$src))]>;
+  def _v2f32 : OneInOneOut<opcode, (outs GPRV2F32:$dst),
+      (ins GPRV2F32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2F32:$dst, (intr GPRV2F32:$src))]>;
+  def _v4f32 : OneInOneOut<opcode, (outs GPRV4F32:$dst),
+      (ins GPRV4F32:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV4F32:$dst, (intr GPRV4F32:$src))]>;
+}
+
+multiclass BinaryIntrinsicFloatScalar<ILOpCode opcode, Intrinsic intr>
+{
+  def _f32 : TwoInOneOut<opcode, (outs GPRF32:$dst),
+      (ins GPRF32:$src, GPRF32:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRF32:$dst,
+          (intr GPRF32:$src, GPRF32:$src2))]>;
+}
+multiclass BinaryIntrinsicFloat<ILOpCode opcode, Intrinsic intr>
+{
+  def _f32 : TwoInOneOut<opcode, (outs GPRF32:$dst),
+      (ins GPRF32:$src, GPRF32:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRF32:$dst,
+          (intr GPRF32:$src, GPRF32:$src2))]>;
+  def _v2f32 : TwoInOneOut<opcode, (outs GPRV2F32:$dst),
+      (ins GPRV2F32:$src, GPRV2F32:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRV2F32:$dst,
+          (intr GPRV2F32:$src, GPRV2F32:$src2))]>;
+  def _v4f32 : TwoInOneOut<opcode, (outs GPRV4F32:$dst),
+      (ins GPRV4F32:$src, GPRV4F32:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRV4F32:$dst,
+          (intr GPRV4F32:$src, GPRV4F32:$src2))]>;
+}
+
+multiclass UnaryIntrinsicDoubleScalar<ILOpCode opcode, Intrinsic intr>
+{
+  def _f64 : OneInOneOut<opcode, (outs GPRF64:$dst),
+      (ins GPRF64:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRF64:$dst, (intr GPRF64:$src))]>;
+}
+
+multiclass UnaryIntrinsicDouble<ILOpCode opcode, Intrinsic intr>
+{
+  def _f64 : OneInOneOut<opcode, (outs GPRF64:$dst),
+      (ins GPRF64:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRF64:$dst, (intr GPRF64:$src))]>;
+  def _v2f64 : OneInOneOut<opcode, (outs GPRV2F64:$dst),
+      (ins GPRV2F64:$src),
+      !strconcat(opcode.Text, " $dst, $src"),
+      [(set GPRV2F64:$dst, (intr GPRV2F64:$src))]>;
+}
+
+multiclass BinaryIntrinsicDouble<ILOpCode opcode, Intrinsic intr>
+{
+  def _f64 : TwoInOneOut<opcode, (outs GPRF64:$dst),
+      (ins GPRF64:$src, GPRF64:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRF64:$dst,
+          (intr GPRF64:$src, GPRF64:$src2))]>;
+  def _v2f64 : TwoInOneOut<opcode, (outs GPRV2F64:$dst),
+      (ins GPRV2F64:$src, GPRV2F64:$src2),
+      !strconcat(opcode.Text, " $dst, $src, $src2"),
+      [(set GPRV2F64:$dst,
+          (intr GPRV2F64:$src, GPRV2F64:$src2))]>;
+}
+
+multiclass TernaryIntrinsicDouble<ILOpCode opcode, Intrinsic intr>
+{
+  def _f64 : TwoInOneOut<opcode, (outs GPRF64:$dst),
+      (ins GPRF64:$src, GPRF64:$src2, GPRF64:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRF64:$dst,
+          (intr GPRF64:$src, GPRF64:$src2, GPRF64:$src3))]>;
+  def _v2f64 : TwoInOneOut<opcode, (outs GPRV2F64:$dst),
+      (ins GPRV2F64:$src, GPRV2F64:$src2, GPRV2F64:$src3),
+      !strconcat(opcode.Text, " $dst, $src, $src2, $src3"),
+      [(set GPRV2F64:$dst,
+          (intr GPRV2F64:$src, GPRV2F64:$src2, GPRV2F64:$src3))]>;
+}
+/// Binary pattern instructions multiclass
+multiclass BinaryPatFragI32<ILOpCode opcode, PatFrag node> {
+  def _i32 : TwoInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRI32:$lhs, GPRI32:$rhs),
+      !strconcat(opcode.Text, " $dst, $lhs, $rhs"),
+      [(set GPRI32:$dst, (node GPRI32:$lhs, GPRI32:$rhs))]>;
+/*
+   // FIXME: Need to rewrite the patterns to get the vector versions t work.
+  def _v2i32 : TwoInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
+      !strconcat(opcode.Text, " $dst, $lhs, $rhs"),
+      [(set GPRV2I32:$dst, (node GPRV2I32:$lhs, GPRV2I32:$rhs))]>;
+
+  def _v4i32 : TwoInOneOut<opcode, (outs GPRV4I32:$dst),
+      (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
+      !strconcat(opcode.Text, " $dst, $lhs, $rhs"),
+      [(set GPRV4I32:$dst, (node GPRV4I32:$lhs, GPRV4I32:$rhs))]>;
+      */
+}
+
+/// Ternary pattern instructions multiclass
+multiclass TernaryPatFragI32<ILOpCode opcode, PatFrag node> {
+  def _i32 : ThreeInOneOut<opcode, (outs GPRI32:$dst),
+      (ins GPRI32:$lhs, GPRI32:$rhs, GPRI32:$bitpat),
+      !strconcat(opcode.Text, " $dst, $bitpat, $lhs, $rhs"),
+      [(set GPRI32:$dst, (node GPRI32:$lhs, GPRI32:$rhs, GPRI32:$bitpat))]>;
+
+  def _v2i32 : ThreeInOneOut<opcode, (outs GPRV2I32:$dst),
+      (ins GPRV2I32:$lhs, GPRV2I32:$rhs, GPRV2I32:$bitpat),
+      !strconcat(opcode.Text, " $dst, $bitpat, $lhs, $rhs"),
+      [(set GPRV2I32:$dst, 
+          (node GPRV2I32:$lhs, GPRV2I32:$rhs, GPRV2I32:$bitpat))]>;
+
+  def _v4i32 : ThreeInOneOut<opcode, (outs GPRV4I32:$dst),
+      (ins GPRV4I32:$lhs, GPRV4I32:$rhs, GPRV4I32:$bitpat),
+      !strconcat(opcode.Text, " $dst, $bitpat, $lhs, $rhs"),
+      [(set GPRV4I32:$dst, 
+          (node GPRV4I32:$lhs, GPRV4I32:$rhs, GPRV4I32:$bitpat))]>;
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,75 @@
+//===-- AMDILNIDevice.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILNIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+using namespace llvm;
+
+AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST)
+  : AMDILEvergreenDevice(ST)
+{
+  std::string name = ST->getDeviceName();
+  if (name == "caicos") {
+    mDeviceFlag = OCL_DEVICE_CAICOS;
+  } else if (name == "turks") {
+    mDeviceFlag = OCL_DEVICE_TURKS;
+  } else if (name == "cayman") {
+    mDeviceFlag = OCL_DEVICE_CAYMAN;
+  } else {
+    mDeviceFlag = OCL_DEVICE_BARTS;
+  }
+}
+AMDILNIDevice::~AMDILNIDevice()
+{
+}
+
+size_t
+AMDILNIDevice::getMaxLDSSize() const
+{
+  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_900;
+  } else {
+    return 0;
+  }
+}
+
+uint32_t
+AMDILNIDevice::getGeneration() const
+{
+  return AMDILDeviceInfo::HD6XXX;
+}
+
+
+AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST)
+  : AMDILNIDevice(ST)
+{
+  setCaps();
+}
+
+AMDILCaymanDevice::~AMDILCaymanDevice()
+{
+}
+
+void
+AMDILCaymanDevice::setCaps()
+{
+  if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+    mHWBits.set(AMDILDeviceInfo::DoubleOps);
+    mHWBits.set(AMDILDeviceInfo::FMA);
+  }
+  mHWBits.set(AMDILDeviceInfo::Signed24BitOps);
+  mSWBits.reset(AMDILDeviceInfo::Signed24BitOps);
+  mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,58 @@
+//===-- AMDILNIDevice.h ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILNIDEVICE_H_
+#define _AMDILNIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm
+{
+class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDILNIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDILEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities.  The
+// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+class AMDILNIDevice : public AMDILEvergreenDevice
+{
+public:
+  AMDILNIDevice(AMDILSubtarget*);
+  virtual ~AMDILNIDevice();
+  virtual size_t getMaxLDSSize() const;
+  virtual uint32_t getGeneration() const;
+protected:
+}; // AMDILNIDevice
+
+// Just as the AMDILCypressDevice is the double capable version of the
+// AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of
+// the AMDILNIDevice.  The other major difference that is not as useful from
+// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
+// NI family is a 5 wide.
+
+class AMDILCaymanDevice: public AMDILNIDevice
+{
+public:
+  AMDILCaymanDevice(AMDILSubtarget*);
+  virtual ~AMDILCaymanDevice();
+private:
+  virtual void setCaps();
+}; // AMDILCaymanDevice
+
+static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // _AMDILNIDEVICE_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNodes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNodes.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNodes.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNodes.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,309 @@
+//===-- AMDILNodes.td -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def IL_d2f : SDNode<"AMDILISD::DP_TO_FP"   , SDTIL_DPToFPOp>;
+
+def IL_inttoany: SDNode<"AMDILISD::INTTOANY", SDTIL_IntToAny>;
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond      : SDNode<"AMDILISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_cmp       : SDNode<"AMDILISD::CMP", SDTIL_Cmp>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_callseq_start : SDNode<"ISD::CALLSEQ_START", SDTIL_CallSeqStart,
+    [SDNPHasChain, SDNPOutGlue]>;
+def IL_callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDTIL_CallSeqEnd,
+    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def IL_call      : SDNode<"AMDILISD::CALL", SDTIL_Call,
+    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def IL_retflag       : SDNode<"AMDILISD::RET_FLAG", SDTNone,
+    [SDNPHasChain, SDNPOptInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic DAG Nodes
+//===----------------------------------------------------------------------===//
+// Address modification nodes
+def IL_addaddrri : SDNode<"AMDILISD::ADDADDR", SDTIL_AddAddrri,
+    [SDNPCommutative, SDNPAssociative]>;
+def IL_addaddrir : SDNode<"AMDILISD::ADDADDR", SDTIL_AddAddrir,
+    [SDNPCommutative, SDNPAssociative]>;
+
+//===--------------------------------------------------------------------===//
+// Instructions
+//===--------------------------------------------------------------------===//
+// Floating point math functions
+def IL_cmov_logical : SDNode<"AMDILISD::CMOVLOG", SDTIL_GenTernaryOp>;
+def IL_add       : SDNode<"AMDILISD::ADD"     , SDTIL_GenBinaryOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def IL_cmov        : SDNode<"AMDILISD::CMOV"    , SDTIL_GenBinaryOp>;
+def IL_or      : SDNode<"AMDILISD::OR"    ,SDTIL_GenBinaryOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def IL_and      : SDNode<"AMDILISD::AND"    ,SDTIL_GenBinaryOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def IL_xor          : SDNode<"AMDILISD::XOR", SDTIL_GenBinaryOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def IL_not          : SDNode<"AMDILISD::NOT", SDTIL_GenUnaryOp>;
+def IL_div_inf      : SDNode<"AMDILISD::DIV_INF", SDTIL_GenBinaryOp>;
+def IL_mad          : SDNode<"AMDILISD::MAD", SDTIL_GenTernaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Integer functions
+//===----------------------------------------------------------------------===//
+def IL_inegate     : SDNode<"AMDILISD::INEGATE" , SDTIntUnaryOp>;
+def IL_umul        : SDNode<"AMDILISD::UMUL"    , SDTIntBinOp,
+    [SDNPCommutative, SDNPAssociative]>;
+def IL_mov        : SDNode<"AMDILISD::MOVE", SDTIL_GenUnaryOp>;
+def IL_phimov      : SDNode<"AMDILISD::PHIMOVE", SDTIL_GenUnaryOp>;
+def IL_bitconv     : SDNode<"AMDILISD::BITCONV", SDTIL_GenBitConv>;
+def IL_ffb_hi      : SDNode<"AMDILISD::IFFB_HI", SDTIL_GenUnaryOp>;
+def IL_ffb_lo      : SDNode<"AMDILISD::IFFB_LO", SDTIL_GenUnaryOp>;
+def IL_smax        : SDNode<"AMDILISD::SMAX", SDTIL_GenBinaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Double functions
+//===----------------------------------------------------------------------===//
+def IL_dcreate     : SDNode<"AMDILISD::DCREATE"   , SDTIL_DCreate>;
+def IL_dcomphi     : SDNode<"AMDILISD::DCOMPHI"     , SDTIL_DComp>;
+def IL_dcomplo     : SDNode<"AMDILISD::DCOMPLO"     , SDTIL_DComp>;
+def IL_dcreate2     : SDNode<"AMDILISD::DCREATE2"   , SDTIL_DCreate2>;
+def IL_dcomphi2     : SDNode<"AMDILISD::DCOMPHI2"     , SDTIL_DComp2>;
+def IL_dcomplo2     : SDNode<"AMDILISD::DCOMPLO2"     , SDTIL_DComp2>;
+
+//===----------------------------------------------------------------------===//
+// Long functions
+//===----------------------------------------------------------------------===//
+def IL_lcreate     : SDNode<"AMDILISD::LCREATE"   , SDTIL_LCreate>;
+def IL_lcreate2    : SDNode<"AMDILISD::LCREATE2"   , SDTIL_LCreate2>;
+def IL_lcomphi     : SDNode<"AMDILISD::LCOMPHI"     , SDTIL_LComp>;
+def IL_lcomphi2    : SDNode<"AMDILISD::LCOMPHI2"     , SDTIL_LComp2>;
+def IL_lcomplo     : SDNode<"AMDILISD::LCOMPLO"     , SDTIL_LComp>;
+def IL_lcomplo2    : SDNode<"AMDILISD::LCOMPLO2"     , SDTIL_LComp2>;
+
+//===----------------------------------------------------------------------===//
+// Vector functions
+//===----------------------------------------------------------------------===//
+def IL_vbuild     : SDNode<"AMDILISD::VBUILD", SDTIL_GenVecBuild,
+    []>;
+def IL_vextract   : SDNode<"AMDILISD::VEXTRACT", SDTIL_GenVecExtract,
+    []>;
+def IL_vinsert    : SDNode<"AMDILISD::VINSERT", SDTIL_GenVecInsert,
+    []>;
+def IL_vconcat    : SDNode<"AMDILISD::VCONCAT", SDTIL_GenVecConcat,
+    []>;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Atomic Custom SDNodes
+//===----------------------------------------------------------------------===//
+//===-------------- 32 bit global atomics with return values --------------===//
+def atom_g_add : SDNode<"AMDILISD::ATOM_G_ADD", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_and : SDNode<"AMDILISD::ATOM_G_AND", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_cmpxchg : SDNode<"AMDILISD::ATOM_G_CMPXCHG", SDTIL_TriAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; 
+def atom_g_dec : SDNode<"AMDILISD::ATOM_G_DEC", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_inc : SDNode<"AMDILISD::ATOM_G_INC", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_max : SDNode<"AMDILISD::ATOM_G_MAX", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umax : SDNode<"AMDILISD::ATOM_G_UMAX", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_min : SDNode<"AMDILISD::ATOM_G_MIN", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umin : SDNode<"AMDILISD::ATOM_G_UMIN", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_or : SDNode<"AMDILISD::ATOM_G_OR", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_sub : SDNode<"AMDILISD::ATOM_G_SUB", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_rsub : SDNode<"AMDILISD::ATOM_G_RSUB", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xchg : SDNode<"AMDILISD::ATOM_G_XCHG", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xor : SDNode<"AMDILISD::ATOM_G_XOR", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===------------- 32 bit global atomics without return values ------------===//
+def atom_g_add_noret : SDNode<"AMDILISD::ATOM_G_ADD_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_and_noret : SDNode<"AMDILISD::ATOM_G_AND_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_cmpxchg_noret : SDNode<"AMDILISD::ATOM_G_CMPXCHG_NORET",
+    SDTIL_TriAtomNoRet, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_cmp_noret : SDNode<"AMDILISD::ATOM_G_CMPXCHG_NORET",
+    SDTIL_TriAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_dec_noret : SDNode<"AMDILISD::ATOM_G_DEC_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_inc_noret : SDNode<"AMDILISD::ATOM_G_INC_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_max_noret : SDNode<"AMDILISD::ATOM_G_MAX_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umax_noret: SDNode<"AMDILISD::ATOM_G_UMAX_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_min_noret : SDNode<"AMDILISD::ATOM_G_MIN_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_umin_noret: SDNode<"AMDILISD::ATOM_G_UMIN_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_or_noret : SDNode<"AMDILISD::ATOM_G_OR_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_sub_noret : SDNode<"AMDILISD::ATOM_G_SUB_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_rsub_noret : SDNode<"AMDILISD::ATOM_G_RSUB_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xchg_noret: SDNode<"AMDILISD::ATOM_G_XCHG_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_g_xor_noret : SDNode<"AMDILISD::ATOM_G_XOR_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===--------------- 32 bit local atomics with return values --------------===//
+def atom_l_add : SDNode<"AMDILISD::ATOM_L_ADD", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_and : SDNode<"AMDILISD::ATOM_L_AND", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_cmpxchg : SDNode<"AMDILISD::ATOM_L_CMPXCHG", SDTIL_TriAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_dec : SDNode<"AMDILISD::ATOM_L_DEC", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_inc : SDNode<"AMDILISD::ATOM_L_INC", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_max : SDNode<"AMDILISD::ATOM_L_MAX", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umax : SDNode<"AMDILISD::ATOM_L_UMAX", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_min : SDNode<"AMDILISD::ATOM_L_MIN", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umin : SDNode<"AMDILISD::ATOM_L_UMIN", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_or : SDNode<"AMDILISD::ATOM_L_OR", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_mskor : SDNode<"AMDILISD::ATOM_L_MSKOR", SDTIL_TriAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_sub : SDNode<"AMDILISD::ATOM_L_SUB", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_rsub : SDNode<"AMDILISD::ATOM_L_RSUB", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xchg : SDNode<"AMDILISD::ATOM_L_XCHG", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xor : SDNode<"AMDILISD::ATOM_L_XOR", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+
+//===-------------- 32 bit local atomics without return values ------------===//
+def atom_l_add_noret : SDNode<"AMDILISD::ATOM_L_ADD_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_and_noret : SDNode<"AMDILISD::ATOM_L_AND_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_cmpxchg_noret : SDNode<"AMDILISD::ATOM_L_CMPXCHG_NORET",
+    SDTIL_TriAtomNoRet, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_dec_noret : SDNode<"AMDILISD::ATOM_L_DEC_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_inc_noret : SDNode<"AMDILISD::ATOM_L_INC_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_max_noret : SDNode<"AMDILISD::ATOM_L_MAX_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umax_noret: SDNode<"AMDILISD::ATOM_L_UMAX_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_min_noret : SDNode<"AMDILISD::ATOM_L_MIN_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_umin_noret: SDNode<"AMDILISD::ATOM_L_UMIN_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_or_noret : SDNode<"AMDILISD::ATOM_L_OR_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_mskor_noret : SDNode<"AMDILISD::ATOM_L_MSKOR_NORET",
+    SDTIL_TriAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_sub_noret : SDNode<"AMDILISD::ATOM_L_SUB_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_rsub_noret : SDNode<"AMDILISD::ATOM_L_RSUB_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xchg_noret: SDNode<"AMDILISD::ATOM_L_XCHG_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_l_xor_noret : SDNode<"AMDILISD::ATOM_L_XOR_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===--------------- 32 bit region atomics with return values --------------===//
+def atom_r_add : SDNode<"AMDILISD::ATOM_R_ADD", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_and : SDNode<"AMDILISD::ATOM_R_AND", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_cmpxchg : SDNode<"AMDILISD::ATOM_R_CMPXCHG", SDTIL_TriAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_dec : SDNode<"AMDILISD::ATOM_R_DEC", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_inc : SDNode<"AMDILISD::ATOM_R_INC", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_max : SDNode<"AMDILISD::ATOM_R_MAX", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umax : SDNode<"AMDILISD::ATOM_R_UMAX", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_min : SDNode<"AMDILISD::ATOM_R_MIN", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umin : SDNode<"AMDILISD::ATOM_R_UMIN", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_or : SDNode<"AMDILISD::ATOM_R_OR", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_mskor : SDNode<"AMDILISD::ATOM_R_MSKOR", SDTIL_TriAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_sub : SDNode<"AMDILISD::ATOM_R_SUB", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_rsub : SDNode<"AMDILISD::ATOM_R_RSUB", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xchg : SDNode<"AMDILISD::ATOM_R_XCHG", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xor : SDNode<"AMDILISD::ATOM_R_XOR", SDTIL_BinAtom,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+
+//===-------------- 32 bit region atomics without return values ------------===//
+def atom_r_add_noret : SDNode<"AMDILISD::ATOM_R_ADD_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_and_noret : SDNode<"AMDILISD::ATOM_R_AND_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_cmpxchg_noret : SDNode<"AMDILISD::ATOM_R_CMPXCHG_NORET",
+    SDTIL_TriAtomNoRet, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_dec_noret : SDNode<"AMDILISD::ATOM_R_DEC_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_inc_noret : SDNode<"AMDILISD::ATOM_R_INC_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_max_noret : SDNode<"AMDILISD::ATOM_R_MAX_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umax_noret: SDNode<"AMDILISD::ATOM_R_UMAX_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_min_noret : SDNode<"AMDILISD::ATOM_R_MIN_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_umin_noret: SDNode<"AMDILISD::ATOM_R_UMIN_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_or_noret : SDNode<"AMDILISD::ATOM_R_OR_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_mskor_noret : SDNode<"AMDILISD::ATOM_R_MSKOR_NORET", SDTIL_TriAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_sub_noret : SDNode<"AMDILISD::ATOM_R_SUB_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_rsub_noret : SDNode<"AMDILISD::ATOM_R_RSUB_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xchg_noret: SDNode<"AMDILISD::ATOM_R_XCHG_NORET",
+    SDTIL_BinAtomNoRet, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def atom_r_xor_noret : SDNode<"AMDILISD::ATOM_R_XOR_NORET", SDTIL_BinAtomNoRet,
+    [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===--------------- 32 bit atomic counter instructions -------------------===//
+def append_alloc : SDNode<"AMDILISD::APPEND_ALLOC", SDTIL_Append,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def append_consume : SDNode<"AMDILISD::APPEND_CONSUME", SDTIL_Append,
+    [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILOperands.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILOperands.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILOperands.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILOperands.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,48 @@
+//===-- AMDILOperands.td --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def MEM3232  : Operand<i32> {
+    let PrintMethod = "printMemOperand";
+    let MIOperandInfo = (ops GPRI32, GPRI32);
+}
+
+def MEM6464 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPRI64, GPRI64);
+}
+
+def MEM3264 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPRI32, GPRI64);
+}
+
+def MEM6432 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPRI64, GPRI32);
+}
+
+// Call target types
+def calltarget   : Operand<i32>;
+def brtarget   : Operand<OtherVT>;
+
+// def v2i8imm : Operand<v2i8>;
+// def v4i8imm : Operand<v4i8>;
+// def v2i16imm : Operand<v2i16>;
+// def v4i16imm : Operand<v4i16>;
+// def v2i32imm : Operand<v2i32>;
+// def v4i32imm : Operand<v4i32>;
+// def v2i64imm : Operand<v2i64>;
+// def v2f32imm : Operand<v2f32>;
+// def v4f32imm : Operand<v4f32>;
+// def v2f64imm : Operand<v2f64>;
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPatterns.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPatterns.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPatterns.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,505 @@
+//===-- AMDILPatterns.td --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def truncstorei64 : PatFrag<(ops node:$val, node:$ptr),
+                           (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+def truncstorev2i8 : PatFrag<(ops node:$val, node:$ptr),
+                           (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i8;
+}]>;
+def truncstorev2i16 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16;
+}]>;
+def truncstorev2i32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i32;
+}]>;
+def truncstorev2i64 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i64;
+}]>;
+def truncstorev2f32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f32;
+}]>;
+def truncstorev2f64 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2f64;
+}]>;
+def truncstorev4i8 : PatFrag<(ops node:$val, node:$ptr),
+                           (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8;
+}]>;
+def truncstorev4i16 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i16;
+}]>;
+def truncstorev4i32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i32;
+}]>;
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+    (store node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_store : PatFrag<(ops node:$val, node:$ptr),
+    (store node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_store : PatFrag<(ops node:$val, node:$ptr),
+    (store node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_store : PatFrag<(ops node:$val, node:$ptr),
+    (store node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei8 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei16 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei32 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei64 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref32 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref64 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i8 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i16 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i32 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i64 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f32 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f64 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i8 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i16 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i32 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def global_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4f32 node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstore node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei8 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei16 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei32 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei64 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref32 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref64 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i8 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i16 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i32 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i64 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f32 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f64 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i8 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i16 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i32 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def private_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4f32 node:$val, node:$ptr), [{
+        return isPrivateStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def local_trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstore node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei8 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei16 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei32 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei64 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref32 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref64 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i8 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i16 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i32 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i64 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f32 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f64 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i8 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i16 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i32 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def local_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4f32 node:$val, node:$ptr), [{
+        return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def region_trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstore node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei8 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei16 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei32 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorei64 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref32 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstoref64 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i8 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i16 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i32 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2i64 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f32 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev2f64 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i8 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i16 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4i32 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+def region_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr),
+    (truncstorev4f32 node:$val, node:$ptr), [{
+        return isRegionStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Load pattern fragments
+//===----------------------------------------------------------------------===//
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def global_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def global_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def global_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Private address space loads
+def private_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def private_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def private_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def private_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+    return isPrivateLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Local address space loads
+def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def local_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def local_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def local_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+    return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Region address space loads
+def region_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def region_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+    return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def region_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+    return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def region_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+    return isRegionLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+def constant_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+def constant_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+def constant_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+// Constant pool loads
+def cp_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def cp_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def cp_zext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+def cp_aext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return isCPLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Complex addressing mode patterns
+//===----------------------------------------------------------------------===//
+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Instruction Pattern Leafs
+//===----------------------------------------------------------------------===//
+class IL_CC_Op<int N> : PatLeaf<(i32 N)>;
+def IL_CC_D_EQ  : IL_CC_Op<0>;
+def IL_CC_D_GE  : IL_CC_Op<1>;
+def IL_CC_D_LT  : IL_CC_Op<2>;
+def IL_CC_D_NE  : IL_CC_Op<3>;
+def IL_CC_F_EQ  : IL_CC_Op<4>;
+def IL_CC_F_GE  : IL_CC_Op<5>;
+def IL_CC_F_LT  : IL_CC_Op<6>;
+def IL_CC_F_NE  : IL_CC_Op<7>;
+def IL_CC_I_EQ  : IL_CC_Op<8>;
+def IL_CC_I_GE  : IL_CC_Op<9>;
+def IL_CC_I_LT  : IL_CC_Op<10>;
+def IL_CC_I_NE  : IL_CC_Op<11>;
+def IL_CC_U_GE  : IL_CC_Op<12>;
+def IL_CC_U_LT  : IL_CC_Op<13>;
+// Pseudo IL comparison instructions that aren't natively supported
+def IL_CC_F_GT  : IL_CC_Op<14>;
+def IL_CC_U_GT  : IL_CC_Op<15>;
+def IL_CC_I_GT  : IL_CC_Op<16>;
+def IL_CC_D_GT  : IL_CC_Op<17>;
+def IL_CC_F_LE  : IL_CC_Op<18>;
+def IL_CC_U_LE  : IL_CC_Op<19>;
+def IL_CC_I_LE  : IL_CC_Op<20>;
+def IL_CC_D_LE  : IL_CC_Op<21>;
+def IL_CC_F_UNE : IL_CC_Op<22>;
+def IL_CC_F_UEQ : IL_CC_Op<23>;
+def IL_CC_F_ULT : IL_CC_Op<24>;
+def IL_CC_F_UGT : IL_CC_Op<25>;
+def IL_CC_F_ULE : IL_CC_Op<26>;
+def IL_CC_F_UGE : IL_CC_Op<27>;
+def IL_CC_F_ONE : IL_CC_Op<28>;
+def IL_CC_F_OEQ : IL_CC_Op<29>;
+def IL_CC_F_OLT : IL_CC_Op<30>;
+def IL_CC_F_OGT : IL_CC_Op<31>;
+def IL_CC_F_OLE : IL_CC_Op<32>;
+def IL_CC_F_OGE : IL_CC_Op<33>;
+def IL_CC_D_UNE : IL_CC_Op<34>;
+def IL_CC_D_UEQ : IL_CC_Op<35>;
+def IL_CC_D_ULT : IL_CC_Op<36>;
+def IL_CC_D_UGT : IL_CC_Op<37>;
+def IL_CC_D_ULE : IL_CC_Op<38>;
+def IL_CC_D_UGE : IL_CC_Op<39>;
+def IL_CC_D_ONE : IL_CC_Op<30>;
+def IL_CC_D_OEQ : IL_CC_Op<41>;
+def IL_CC_D_OLT : IL_CC_Op<42>;
+def IL_CC_D_OGT : IL_CC_Op<43>;
+def IL_CC_D_OLE : IL_CC_Op<44>;
+def IL_CC_D_OGE : IL_CC_Op<45>;
+def IL_CC_U_EQ  : IL_CC_Op<46>;
+def IL_CC_U_NE  : IL_CC_Op<47>;
+def IL_CC_F_O   : IL_CC_Op<48>;
+def IL_CC_D_O   : IL_CC_Op<49>;
+def IL_CC_F_UO  : IL_CC_Op<50>;
+def IL_CC_D_UO  : IL_CC_Op<51>;
+def IL_CC_L_LE  : IL_CC_Op<52>;
+def IL_CC_L_GE  : IL_CC_Op<53>;
+def IL_CC_L_EQ  : IL_CC_Op<54>;
+def IL_CC_L_NE  : IL_CC_Op<55>;
+def IL_CC_L_LT  : IL_CC_Op<56>;
+def IL_CC_L_GT  : IL_CC_Op<57>;
+def IL_CC_UL_LE  : IL_CC_Op<58>;
+def IL_CC_UL_GE  : IL_CC_Op<59>;
+def IL_CC_UL_EQ  : IL_CC_Op<60>;
+def IL_CC_UL_NE  : IL_CC_Op<61>;
+def IL_CC_UL_LT  : IL_CC_Op<62>;
+def IL_CC_UL_GT  : IL_CC_Op<63>;

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,2033 @@
+//===-- AMDILPeepholeOptimizer.cpp ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <sstream>
+STATISTIC(LocalFuncs, "Number of get_local_size(N) functions removed");
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace
+{
+class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass
+{
+public:
+  TargetMachine &TM;
+  static char ID;
+  AMDILPeepholeOpt(TargetMachine &tm, CodeGenOpt::Level OL);
+  ~AMDILPeepholeOpt();
+  const char *getPassName() const;
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  bool doFinalization(Module &M);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+  // Function to initiate all of the instruction level optimizations.
+  // Returns true if a new instruction was generated and false otherwise.
+  bool instLevelOptimizations(Instruction *inst);
+  // Quick check to see if we need to dump all of the pointers into the
+  // arena. If this is correct, then we set all pointers to exist in arena. This
+  // is a workaround for aliasing of pointers in a struct/union.
+  bool dumpAllIntoArena(Function &F);
+  // Because I don't want to invalidate any pointers while in the
+  // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+  // it later. This function does the conversions if required.
+  void doAtomicConversionIfNeeded(Function &F);
+  // Because __amdil_is_constant cannot be properly evaluated if
+  // optimizations are disabled, the call's are placed in a vector
+  // and evaluated after the __amdil_image* functions are evaluated
+  // which should allow the __amdil_is_constant function to be
+  // evaluated correctly.
+  void doIsConstCallConversionIfNeeded();
+  bool mChanged;
+  bool mDebug;
+  bool mRWGOpt;
+  bool mConvertAtomics;
+  CodeGenOpt::Level optLevel;
+  // Run a series of tests to see if we can optimize a CALL instruction.
+  bool optimizeCallInst(Instruction *inst);
+  // A peephole optimization to optimize bit extract sequences.
+  bool optimizeBitExtract(Instruction *inst);
+  // A peephole optimization to optimize bit insert sequences.
+  bool optimizeBitInsert(Instruction *inst);
+  // A peephole optimization that does the following transform:
+  // ((((B ^ -1) | C) & A) ^ -1)
+  // ==>
+  // BFI(A, (B & (C ^ -1)), -1)
+  bool optimizeBFI(Instruction *inst);
+  bool setupBitInsert(Instruction *base,
+                      Instruction *&src,
+                      Constant *&mask,
+                      Constant *&shift);
+  // A peephole optimization to optimize [d]class calls that or the results.
+  bool optimizeClassInst(Instruction *inst);
+  // Generate F2U4 intrinisic
+  bool genIntrF2U4(Instruction *inst);
+
+  // Expand the bit field insert instruction on versions of OpenCL that
+  // don't support it.
+  bool expandBFI(CallInst *CI);
+  // Expand the bit field mask instruction on version of OpenCL that
+  // don't support it.
+  bool expandBFM(CallInst *CI);
+  // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+  // this case we need to expand them. These functions check for 24bit functions
+  // and then expand.
+  bool isSigned24BitOps(CallInst *CI);
+  void expandSigned24BitOps(CallInst *CI);
+  // One optimization that can occur is that if the required workgroup size is
+  // specified then the result of get_local_size is known at compile time and
+  // can be returned accordingly.
+  bool isRWGLocalOpt(CallInst *CI);
+  void expandRWGLocalOpt(CallInst *CI);
+  // On northern island cards, the division is slightly less accurate than on
+  // previous generations, so we need to utilize a more accurate division. So we
+  // can translate the accurate divide to a normal divide on all other cards.
+  bool convertAccurateDivide(CallInst *CI);
+  void expandAccurateDivide(CallInst *CI);
+  // If the alignment is set incorrectly, it can produce really inefficient
+  // code. This checks for this scenario and fixes it if possible.
+  bool correctMisalignedMemOp(Instruction *inst);
+
+  // If we are in no opt mode, then we need to make sure that
+  // local samplers are properly propagated as constant propagation
+  // doesn't occur and we need to know the value of kernel defined
+  // samplers at compile time.
+  bool propagateSamplerInst(CallInst *CI);
+
+  LLVMContext *mCTX;
+  Function *mF;
+  const AMDILSubtarget *mSTM;
+  AMDILModuleInfo *mAMI;
+  SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+  SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDILPeepholeOpt
+char AMDILPeepholeOpt::ID = 0;
+
+/*
+   getMaskBitfield() returns true if 'val' is a mask, which is defined to
+   be a value whose 1 bits (in binary format) are all next to each other.
+   And 'start_bit' set to the first 1 bit and 'bitwidth' sets to the width
+   of all the 1-bits (the number of 1 bits). Bit number always starts from 0.
+
+   For exaample:  given that val = 0xFF00;  start_bit = 8 and bitwidth = 8.
+*/
+bool
+getMaskBitfield(unsigned int val, unsigned int &start_bit, unsigned int &bitwidth)
+{
+  if (val == 0) {
+    // zero, no bitfield
+    return false;
+  }
+
+  bitwidth = 0;
+  start_bit = 0;
+  while ((val & 1) == 0) {
+    ++start_bit;
+    val = (val >> 1);
+  }
+  if (val > 0) {
+    while ( (val & 1)  == 1) {
+      ++bitwidth;
+      val = (val >> 1);
+    }
+  }
+
+  if (val > 0) {
+    // non-continguous 1 bits.
+    return false;
+  }
+  return true;
+}
+
+bool getVectorComponent(Instruction *inst, int tid, unsigned int numElem,
+                        Value*& vecval, unsigned& whichelem)
+{
+  ExtractElementInst *einst = dyn_cast<ExtractElementInst>(inst);
+  if (!einst) {
+    return false;
+  }
+
+  vecval = einst->getVectorOperand();
+  VectorType *vt = dyn_cast<VectorType>(vecval->getType());
+  assert (vt && "ExtractElementInst must have a vector type as its first argument");
+  Type *et = vt->getElementType();
+  if ( (vt->getNumElements() != numElem) ||
+       (et->getTypeID() != tid) ) {
+    return false;
+  }
+  ConstantInt *cv = dyn_cast<ConstantInt>(einst->getIndexOperand());
+  if (!cv) {
+    return false;
+  }
+
+  whichelem = (unsigned)cv->getZExtValue();
+  return true;
+}
+
+bool getIntValue(Instruction *Inst, Value *& Src, unsigned int &src_start,
+                 unsigned int &dst_start, unsigned int &dst_width)
+{
+  Value *intval, *opnd1;
+  bool hasmask = false;
+
+  if (!Inst->getType()->isIntegerTy(32)) {
+    return false;
+  }
+  intval = Inst;
+
+  unsigned int start_pos = 0;
+  unsigned int nbits = 32;
+  if (Inst->getOpcode() == Instruction::And) {
+    intval = Inst->getOperand(0);
+    opnd1 = Inst->getOperand(1);
+
+    ConstantInt *CI0 = dyn_cast<ConstantInt>(intval);
+    ConstantInt *CI1 = dyn_cast<ConstantInt>(opnd1);
+    if ((!CI0 && !CI1) || (CI0 && CI1)) {
+      return false;
+    }
+
+    if (CI0) {
+      Value *tmp = intval;
+      intval = opnd1;
+      opnd1 = tmp;
+      CI1 = CI0;
+    }
+
+    unsigned int mask = CI1->getZExtValue();
+    hasmask = getMaskBitfield(mask, start_pos, nbits);
+    if (!hasmask) {
+      return false;
+    }
+  }
+
+  Instruction *tinst = dyn_cast<Instruction>(intval);
+  if (!tinst) {
+    return false;
+  }
+
+  unsigned int src_pos = start_pos;
+  if (tinst->getOpcode() == Instruction::Shl) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(tinst->getOperand(1));
+    if (hasmask && CI) {
+      unsigned int amt = CI->getZExtValue();
+      if (amt > src_pos) {
+        return false;
+      }
+      src_pos -= amt;
+    } else if (!hasmask && CI) {
+      unsigned int amt = CI->getZExtValue();
+      Instruction *tinst1 = dyn_cast<Instruction>(tinst->getOperand(0));
+      if (tinst1 && tinst1->getOpcode() == Instruction::LShr) {
+        //  {shl; Lshr} pattern
+        if (ConstantInt *CI1 = dyn_cast<ConstantInt>(tinst1->getOperand(1))) {
+          unsigned int amt1 = CI1->getZExtValue();
+          if (amt >= amt1) {
+            start_pos = amt - amt1;
+            src_pos   = 0;
+            nbits = 32 - amt;
+
+            intval = tinst1->getOperand(0);
+          } else {
+            return false;
+          }
+        }
+      } else if (amt < 32) {
+        // Only shl
+        start_pos = amt;
+        src_pos   = 0;
+        nbits = 32 - amt;
+
+        intval = tinst->getOperand(0);
+      }
+    } else {
+      return false;
+    }
+    intval = tinst->getOperand(0);
+  }
+
+  Src = intval;
+  src_start = src_pos;
+  dst_start = start_pos;
+  dst_width = nbits;
+  return true;
+}
+
+} // anonymous namespace
+
+namespace llvm
+{
+FunctionPass *
+createAMDILPeepholeOpt(TargetMachine &tm, CodeGenOpt::Level OL)
+{
+  return new AMDILPeepholeOpt(tm, OL);
+}
+} // llvm namespace
+
+AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm, CodeGenOpt::Level OL)
+  : FunctionPass(ID), TM(tm)
+{
+  mDebug = DEBUGME;
+  optLevel = OL;
+}
+
+AMDILPeepholeOpt::~AMDILPeepholeOpt()
+{
+}
+
+const char *
+AMDILPeepholeOpt::getPassName() const
+{
+  return "AMDIL PeepHole Optimization Pass";
+}
+
+bool
+containsPointerType(Type *Ty)
+{
+  if (!Ty) {
+    return false;
+  }
+  switch(Ty->getTypeID()) {
+  default:
+    return false;
+  case Type::StructTyID: {
+    const StructType *ST = dyn_cast<StructType>(Ty);
+    for (StructType::element_iterator stb = ST->element_begin(),
+         ste = ST->element_end(); stb != ste; ++stb) {
+      if (!containsPointerType(*stb)) {
+        continue;
+      }
+      return true;
+    }
+    break;
+  }
+  case Type::VectorTyID:
+  case Type::ArrayTyID:
+    return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+  case Type::PointerTyID:
+    return true;
+  };
+  return false;
+}
+
+bool
+AMDILPeepholeOpt::dumpAllIntoArena(Function &F)
+{
+  bool dumpAll = false;
+  for (Function::const_arg_iterator cab = F.arg_begin(),
+       cae = F.arg_end(); cab != cae; ++cab) {
+    const Argument *arg = cab;
+    const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+    if (!PT) {
+      continue;
+    }
+    Type *DereferencedType = PT->getElementType();
+    if (!dyn_cast<StructType>(DereferencedType)
+       ) {
+      continue;
+    }
+    if (!containsPointerType(DereferencedType)) {
+      continue;
+    }
+    // FIXME: Because a pointer inside of a struct/union may be aliased to
+    // another pointer we need to take the conservative approach and place all
+    // pointers into the arena until more advanced detection is implemented.
+    dumpAll = true;
+  }
+  return dumpAll;
+}
+void
+AMDILPeepholeOpt::doIsConstCallConversionIfNeeded()
+{
+  if (isConstVec.empty()) {
+    return;
+  }
+  for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+    CallInst *CI = isConstVec[x];
+    Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+    Type *aType = Type::getInt32Ty(*mCTX);
+    Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+                 : ConstantInt::get(aType, 0);
+    CI->replaceAllUsesWith(Val);
+    CI->eraseFromParent();
+  }
+  isConstVec.clear();
+}
+void
+AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
+{
+  // Don't do anything if we don't have any atomic operations.
+  if (atomicFuncs.empty()) {
+    return;
+  }
+  // Change the function name for the atomic if it is required
+  uint32_t size = atomicFuncs.size();
+  for (uint32_t x = 0; x < size; ++x) {
+    atomicFuncs[x].first->setOperand(
+      atomicFuncs[x].first->getNumOperands()-1,
+      atomicFuncs[x].second);
+
+  }
+  mChanged = true;
+  if (mConvertAtomics) {
+    return;
+  }
+  // If we did not convert all of the atomics, then we need to make sure that
+  // the atomics that were not converted have their base pointers set to use the
+  // arena path.
+  Function::arg_iterator argB = F.arg_begin();
+  Function::arg_iterator argE = F.arg_end();
+  AMDILKernelManager *KM = mSTM->getKernelManager();
+  AMDILMachineFunctionInfo *mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
+                                   .getInfo<AMDILMachineFunctionInfo>();
+  for (; argB != argE; ++argB) {
+    if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
+      KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
+      mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
+    } else {
+      KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
+      mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
+    }
+  }
+}
+
+bool
+AMDILPeepholeOpt::runOnFunction(Function &MF)
+{
+  mChanged = false;
+  mF = &MF;
+  mSTM = &TM.getSubtarget<AMDILSubtarget>();
+  if (mDebug) {
+    MF.dump();
+  }
+  mCTX = &MF.getType()->getContext();
+  mConvertAtomics = true;
+  if (dumpAllIntoArena(MF)) {
+    for (Function::const_arg_iterator cab = MF.arg_begin(),
+         cae = MF.arg_end(); cab != cae; ++cab) {
+      const Argument *arg = cab;
+      AMDILKernelManager *KM = mSTM->getKernelManager();
+      KM->setUAVID(getBasePointerValue(arg),
+                   mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
+    }
+  }
+  mAMI = &(getAnalysis<MachineFunctionAnalysis>().getMF()
+           .getMMI().getObjFileInfo<AMDILModuleInfo>());
+  const AMDILKernel *kernel = mAMI->getKernel(MF.getName());
+  if (kernel && kernel->mKernel && kernel->sgv) {
+    mRWGOpt = kernel->sgv->mHasRWG;
+  }
+  for (inst_iterator I = inst_end(MF), E = inst_begin(MF); I != E; --I) {
+    inst_iterator nextI = I;
+    Instruction *inst = &*(--nextI);
+    // If we don't optimize to a new instruction, decrement the
+    // iterator, otherwise test the new instruction for further
+    // optimizations.
+    if (instLevelOptimizations(inst)) {
+      // We have to check against inst_begin at each iteration of the loop
+      // as it can be invalidated and 'I' can point to the first instruction.
+      E = inst_begin(MF);
+      if (I == E) break;
+    }
+  }
+
+  doAtomicConversionIfNeeded(MF);
+  doIsConstCallConversionIfNeeded();
+
+  if (mDebug) {
+    MF.dump();
+  }
+  return mChanged;
+}
+
+bool
+AMDILPeepholeOpt::optimizeCallInst(Instruction *inst)
+{
+  CallInst *CI = dyn_cast<CallInst>(inst);
+
+  assert(CI && "optimizeCallInst() expects Call instruction");
+
+  if (isSigned24BitOps(CI)) {
+    expandSigned24BitOps(CI);
+    CI->eraseFromParent();
+    return true;
+  }
+  if (isRWGLocalOpt(CI)) {
+    expandRWGLocalOpt(CI);
+    return true;
+  }
+  if (propagateSamplerInst(CI)) {
+    return true;
+  }
+  if (expandBFI(CI) || expandBFM(CI)) {
+    CI->eraseFromParent();
+    return true;
+  }
+  if (convertAccurateDivide(CI)) {
+    expandAccurateDivide(CI);
+    CI->eraseFromParent();
+    return true;
+  }
+
+  StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+  if (calleeName.startswith("__amdil_is_constant")) {
+    // If we do not have optimizations, then this
+    // cannot be properly evaluated, so we add the
+    // call instruction to a vector and process
+    // them at the end of processing after the
+    // samplers have been correctly handled.
+    if (optLevel == CodeGenOpt::None) {
+      isConstVec.push_back(CI);
+      return false;
+    } else {
+      Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+      Type *aType = Type::getInt32Ty(*mCTX);
+      Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+                   : ConstantInt::get(aType, 0);
+      CI->replaceAllUsesWith(Val);
+      CI->eraseFromParent();
+      return true;
+    }
+  }
+
+  if (calleeName.equals("__amdil_is_asic_id_i32")) {
+    ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+    Type *aType = Type::getInt32Ty(*mCTX);
+    Value *Val = CV;
+    if (Val) {
+      Val = ConstantInt::get(aType,
+                             mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+    } else {
+      Val = ConstantInt::get(aType, 0);
+    }
+    CI->replaceAllUsesWith(Val);
+    CI->eraseFromParent();
+    return true;
+  }
+  Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+  if (!F) {
+    return false;
+  }
+  if (F->getName().startswith("__atom") && !CI->getNumUses()
+      && F->getName().find("_xchg") == StringRef::npos
+      && F->getName().find("_noret") == StringRef::npos) {
+    std::string buffer(F->getName().str() + "_noret");
+    std::vector<Type*> callTypes;
+    FunctionType *ptr = F->getFunctionType();
+    callTypes.insert(callTypes.begin(), ptr->param_begin(), ptr->param_end());
+    FunctionType *newFunc = FunctionType::get(Type::getVoidTy(F->getContext()),
+                            callTypes, false);
+    std::vector<Value*> args;
+    for (unsigned x = 0, y = CI->getNumArgOperands(); x < y; ++x) {
+      args.push_back(CI->getArgOperand(x));
+    }
+
+    Function *newF = dyn_cast<Function>(
+                       F->getParent()->getOrInsertFunction(buffer, newFunc));
+    CallInst *newCI = CallInst::Create(newF, args);
+    newCI->insertAfter(CI);
+    CI->eraseFromParent();
+    return true;
+  }
+
+  if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)
+      && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+    return false;
+  }
+  if (!mConvertAtomics) {
+    return false;
+  }
+  StringRef name = F->getName();
+  if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+    Value *ptr = CI->getOperand(0);
+    const Value *basePtr = getBasePointerValue(ptr);
+    const Argument *Arg = dyn_cast<Argument>(basePtr);
+    if (Arg) {
+      int32_t id = mAMI->getArgID(Arg);
+      if (id >= 0) {
+        std::stringstream ss;
+        ss << name.data() << "_" << id << '\n';
+        std::string val;
+        ss >> val;
+        F = dyn_cast<Function>(
+              F->getParent() ->getOrInsertFunction(val, F->getFunctionType()));
+        atomicFuncs.push_back(std::make_pair(CI, F));
+      } else {
+        mConvertAtomics = false;
+      }
+    } else {
+      mConvertAtomics = false;
+    }
+  }
+  return false;
+}
+
+bool
+AMDILPeepholeOpt::setupBitInsert(Instruction *base,
+                                 Instruction *&src,
+                                 Constant *&mask,
+                                 Constant *&shift)
+{
+  if (!base) {
+    if (mDebug) {
+      dbgs() << "Null pointer passed into function.\n";
+    }
+    return false;
+  }
+  bool andOp = false;
+  if (base->getOpcode() == Instruction::Shl) {
+    shift = dyn_cast<Constant>(base->getOperand(1));
+  } else if (base->getOpcode() == Instruction::And) {
+    mask = dyn_cast<Constant>(base->getOperand(1));
+    andOp = true;
+  } else {
+    if (mDebug) {
+      dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+    }
+    // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+    return false;
+  }
+  src = dyn_cast<Instruction>(base->getOperand(0));
+  if (!src) {
+    if (mDebug) {
+      dbgs() << "Failed setup since the base operand is not an instruction!\n";
+    }
+    return false;
+  }
+  // If we find an 'and' operation, then we don't need to
+  // find the next operation as we already know the
+  // bits that are valid at this point.
+  if (andOp) {
+    return true;
+  }
+  if (src->getOpcode() == Instruction::Shl && !shift) {
+    shift = dyn_cast<Constant>(src->getOperand(1));
+    src = dyn_cast<Instruction>(src->getOperand(0));
+  } else if (src->getOpcode() == Instruction::And && !mask) {
+    mask = dyn_cast<Constant>(src->getOperand(1));
+  }
+  if (!mask && !shift) {
+    if (mDebug) {
+      dbgs() << "Failed setup since both mask and shift are NULL!\n";
+    }
+    // Did not find a constant mask or a shift.
+    return false;
+  }
+  return true;
+}
+bool
+AMDILPeepholeOpt::optimizeClassInst(Instruction *inst)
+{
+  assert (inst && (inst->getOpcode() == Instruction::Or) &&
+          "optimizeClassInst() expects OR instruction");
+
+  if (optLevel == CodeGenOpt::None) {
+    return false;
+  }
+  // We want to optimize multiple __amdil_class_f[32|64] that are
+  // seperated by 'or' instructions into a single call with the
+  // second argument or'd together.
+  CallInst *LHS = dyn_cast<CallInst>(inst->getOperand(0));
+  CallInst *RHS = dyn_cast<CallInst>(inst->getOperand(1));
+  if (!LHS || !RHS) {
+    return false;
+  }
+  Value *LHSFunc, *LHSConst, *LHSVar;
+  Value *RHSFunc, *RHSConst, *RHSVar;
+  LHSFunc = LHS->getOperand(LHS->getNumOperands() - 1);
+  LHSConst = dyn_cast<Constant>(LHS->getOperand(1));
+  LHSVar = LHS->getOperand(0);
+  RHSFunc = RHS->getOperand(RHS->getNumOperands() - 1);
+  RHSConst = dyn_cast<Constant>(RHS->getOperand(1));
+  RHSVar = RHS->getOperand(0);
+  // If the functions aren't the class intrinsic, then fail.
+  // If the names are not the same, then fail.
+  if ((!LHSFunc->getName().startswith("__amdil_class_f")
+       || !RHSFunc->getName().startswith("__amdil_class_f"))
+      || LHSFunc->getName() != RHSFunc->getName()) {
+    return false;
+  }
+  // We don't want to merge two class calls from different variables.
+  if (LHSVar != RHSVar) {
+    return false;
+  }
+  // If we don't have two constants, then fail.
+  if (!LHSConst || !RHSConst) {
+    return false;
+  }
+  Value *Operands[2] = {
+    LHSVar,
+    LHSConst
+  };
+  CallInst *newCall = CallInst::Create(dyn_cast<Function>(LHSFunc),
+                                       Operands, "new_class");
+  // Or the constants together, and then call the function all over again.
+  inst->setOperand(0, LHSConst);
+  inst->setOperand(1, RHSConst);
+  inst->replaceAllUsesWith(newCall);
+  newCall->insertAfter(inst);
+  newCall->setOperand(1, inst);
+
+  // We need to remove the functions if they only have a single
+  // use.
+  if (LHS->use_empty()) {
+    LHS->eraseFromParent();
+  }
+  if (RHS->use_empty()) {
+    RHS->eraseFromParent();
+  }
+  return true;
+}
+
+bool
+AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst)
+{
+  assert (inst && (inst->getOpcode() == Instruction::Or) &&
+          "optimizeBitInserti() expects OR instruction");
+
+  if (optLevel == CodeGenOpt::None) {
+    return false;
+  }
+  // We want to do an optimization on a sequence of ops that in the end equals a
+  // single ISA instruction.
+  // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+  // Some simplified versions of this pattern are as follows:
+  // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+  // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+  // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+  // (A & B) | (D << F) when (1 << F) >= B
+  // (A << C) | (D & E) when (1 << C) >= E
+  if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    // The HD4XXX hardware doesn't support the ubit_insert instruction.
+    return false;
+  }
+  Type *aType = inst->getType();
+  bool isVector = aType->isVectorTy();
+  int numEle = 1;
+  // This optimization only works on 32bit integers.
+  if (aType->getScalarType()
+      != Type::getInt32Ty(inst->getContext())) {
+    return false;
+  }
+  if (isVector) {
+    const VectorType *VT = dyn_cast<VectorType>(aType);
+    numEle = VT->getNumElements();
+    // We currently cannot support more than 4 elements in a intrinsic and we
+    // cannot support Vec3 types.
+    if (numEle > 4 || numEle == 3) {
+      return false;
+    }
+  }
+  // TODO: Handle vectors.
+  if (isVector) {
+    if (mDebug) {
+      dbgs() << "!!! Vectors are not supported yet!\n";
+    }
+    return false;
+  }
+  Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+  Constant *LHSMask = NULL, *RHSMask = NULL;
+  Constant *LHSShift = NULL, *RHSShift = NULL;
+  Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+  Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+  if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+    if (mDebug) {
+      dbgs() << "Found an OR Operation that failed setup!\n";
+      inst->dump();
+      if (LHS) {
+        LHS->dump();
+      }
+      if (LHSSrc) {
+        LHSSrc->dump();
+      }
+      if (LHSMask) {
+        LHSMask->dump();
+      }
+      if (LHSShift) {
+        LHSShift->dump();
+      }
+    }
+    // There was an issue with the setup for BitInsert.
+    return false;
+  }
+  if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+    if (mDebug) {
+      dbgs() << "Found an OR Operation that failed setup!\n";
+      inst->dump();
+      if (RHS) {
+        RHS->dump();
+      }
+      if (RHSSrc) {
+        RHSSrc->dump();
+      }
+      if (RHSMask) {
+        RHSMask->dump();
+      }
+      if (RHSShift) {
+        RHSShift->dump();
+      }
+    }
+    // There was an issue with the setup for BitInsert.
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+    dbgs() << "Op:        ";
+    inst->dump();
+    dbgs() << "LHS:       ";
+    if (LHS) {
+      LHS->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+    dbgs() << "LHS Src:   ";
+    if (LHSSrc) {
+      LHSSrc->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+    dbgs() << "LHS Mask:  ";
+    if (LHSMask) {
+      LHSMask->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+    dbgs() << "LHS Shift: ";
+    if (LHSShift) {
+      LHSShift->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+    dbgs() << "RHS:       ";
+    if (RHS) {
+      RHS->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+    dbgs() << "RHS Src:   ";
+    if (RHSSrc) {
+      RHSSrc->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+    dbgs() << "RHS Mask:  ";
+    if (RHSMask) {
+      RHSMask->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+    dbgs() << "RHS Shift: ";
+    if (RHSShift) {
+      RHSShift->dump();
+    } else {
+      dbgs() << "(None)\n";
+    }
+  }
+  Constant *offset = NULL;
+  Constant *width = NULL;
+  int32_t lhsMaskVal = 0, rhsMaskVal = 0;
+  int32_t lhsShiftVal = 0, rhsShiftVal = 0;
+  int32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+  int32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+  lhsMaskVal = (int32_t)(LHSMask
+                         ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+  rhsMaskVal = (int32_t)(RHSMask
+                         ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+  lhsShiftVal = (int32_t)(LHSShift
+                          ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+  rhsShiftVal = (int32_t)(RHSShift
+                          ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+  lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+  rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+  lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+  rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+  // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+  if (mDebug) {
+    dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")");
+    dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ;
+    dbgs() << (RHSMask ? " & E)" : ")");
+    dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n");
+    dbgs() << "A = LHSSrc\t\tD = RHSSrc \n";
+    dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n";
+    dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n";
+    dbgs() << "width(B) = " << lhsMaskWidth;
+    dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n";
+    dbgs() << "offset(B) = " << lhsMaskOffset;
+    dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n";
+    dbgs() << "Constraints: \n";
+    dbgs() << "\t(1) B ^ E == 0\n";
+    dbgs() << "\t(2-LHS) B is a mask\n";
+    dbgs() << "\t(2-LHS) E is a mask\n";
+    dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n";
+    dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n";
+  }
+  if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+    if (mDebug) {
+      dbgs() << lhsMaskVal << " ^ " << rhsMaskVal;
+      dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n";
+      dbgs() << "Failed constraint 1!\n";
+    }
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "LHS = " << lhsMaskOffset << "";
+    dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = ";
+    dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset));
+    dbgs() << "\nRHS = " << rhsMaskOffset << "";
+    dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = ";
+    dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset));
+    dbgs() << "\n";
+  }
+  if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+    offset = ConstantInt::get(aType, lhsMaskOffset, false);
+    width = ConstantInt::get(aType, lhsMaskWidth, false);
+    RHSSrc = RHS;
+    if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+      if (mDebug) {
+        dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n";
+        dbgs() << "Failed constraint 2!\n";
+      }
+      return false;
+    }
+    if (!LHSShift) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+                                      "MaskShr", LHS);
+    } else if (lhsShiftVal != lhsMaskOffset) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+                                      "MaskShr", LHS);
+    }
+    if (mDebug) {
+      dbgs() << "Optimizing LHS!\n";
+    }
+  } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+    offset = ConstantInt::get(aType, rhsMaskOffset, false);
+    width = ConstantInt::get(aType, rhsMaskWidth, false);
+    LHSSrc = RHSSrc;
+    RHSSrc = LHS;
+    if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+      if (mDebug) {
+        dbgs() << "Non-Mask: " << rhsMaskVal << "\n";
+        dbgs() << "Failed constraint 2!\n";
+      }
+      return false;
+    }
+    if (!RHSShift) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+                                      "MaskShr", RHS);
+    } else if (rhsShiftVal != rhsMaskOffset) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+                                      "MaskShr", RHS);
+    }
+    if (mDebug) {
+      dbgs() << "Optimizing RHS!\n";
+    }
+  } else {
+    if (mDebug) {
+      dbgs() << "Failed constraint 3!\n";
+    }
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "Width:  ";
+    if (width) {
+      width->dump();
+    } else {
+      dbgs() << "(0)\n";
+    }
+    dbgs() << "Offset: ";
+    if (offset) {
+      offset->dump();
+    } else {
+      dbgs() << "(0)\n";
+    }
+    dbgs() << "LHSSrc: ";
+    if (LHSSrc) {
+      LHSSrc->dump();
+    } else {
+      dbgs() << "(0)\n";
+    }
+    dbgs() << "RHSSrc: ";
+    if (RHSSrc) {
+      RHSSrc->dump();
+    } else {
+      dbgs() << "(0)\n";
+    }
+  }
+  if (!offset || !width) {
+    if (mDebug) {
+      dbgs() << "Either width or offset are NULL, failed detection!\n";
+    }
+    return false;
+  }
+  // Lets create the function signature.
+  std::vector<Type *> callTypes;
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+  std::string name = "__amdil_ubit_insert";
+  if (isVector) {
+    name += "_v" + itostr(numEle) + "u32";
+  } else {
+    name += "_u32";
+  }
+  Function *Func =
+    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(llvm::StringRef(name), funcType));
+  Value *Operands[4] = {
+    width,
+    offset,
+    LHSSrc,
+    RHSSrc
+  };
+  CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+  if (mDebug) {
+    dbgs() << "Old Inst: ";
+    inst->dump();
+    dbgs() << "New Inst: ";
+    CI->dump();
+    dbgs() << "\n\n";
+  }
+  CI->insertBefore(inst);
+  inst->replaceAllUsesWith(CI);
+  inst->eraseFromParent();
+  return true;
+}
+
+bool
+AMDILPeepholeOpt::optimizeBFI(Instruction *inst)
+{
+  assert (inst && (inst->getOpcode() == Instruction::Xor) &&
+          "optimizeBitExtract() expects Xor instruction");
+  if (mDebug) {
+    dbgs() << "\nInst: ";
+    inst->dump();
+  }
+  if (optLevel == CodeGenOpt::None) {
+    return false;
+  }
+  if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    // The HD4XXX hardware doesn't support the ubit_insert instruction.
+    return false;
+  }
+  Type *aType = inst->getType();
+  // This optimization only works on 32bit integers.
+  if (aType->getScalarType()
+      != Type::getInt32Ty(inst->getContext())) {
+    return false;
+  }
+  int numEle = 1;
+  if (aType->isVectorTy()) {
+    numEle = dyn_cast<VectorType>(aType)->getNumElements();
+    if (numEle > 4 || numEle == 3) {
+      return false;
+    }
+  }
+  // The optimization we are doing is:
+  // B` = B ^ -1
+  // C` = B` | C
+  // A` = C` & A
+  // inst = A` ^ -1
+  // (((B` | C) & A) ^ -1)
+  // ==>
+  // BFI(A, (B & (C ^ -1)), -1)
+  Constant *Apneg1 = dyn_cast<Constant>(inst->getOperand(1));
+  Instruction *Ap = dyn_cast<Instruction>(inst->getOperand(0));
+  // Not a -1 or an 'AND' instruction, so can't proceed.
+  if (Apneg1 == NULL || (Ap != NULL && Ap->getOpcode() != Instruction::And)) {
+    // Inverted operands, swap them.
+    Apneg1 = dyn_cast<Constant>(inst->getOperand(0));
+    Ap = dyn_cast<Instruction>(inst->getOperand(1));
+
+  }
+  if (Apneg1 == NULL || Ap == NULL ||
+      Ap->getOpcode() != Instruction::And) {
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "Ap: ";
+    Ap->dump();
+    dbgs() << "Ap-1: ";
+    Apneg1->dump();
+  }
+  Instruction *Cp = dyn_cast<Instruction>(Ap->getOperand(0));
+  Instruction *A = dyn_cast<Instruction>(Ap->getOperand(1));
+  if (Cp == NULL || A == NULL) {
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "A: ";
+    A->dump();
+    dbgs() << "Cp: ";
+    Cp->dump();
+  }
+  if (Cp->getOpcode() != Instruction::Or
+      && A->getOpcode() == Instruction::Or) {
+    // Operands are inverted, lets swap them.
+    Cp = dyn_cast<Instruction>(Ap->getOperand(1));
+    A = dyn_cast<Instruction>(Ap->getOperand(0));
+  }
+  if (Cp->getOpcode() != Instruction::Or) {
+    // We don't have the right opcode.
+    return false;
+  }
+  Instruction *Bp = dyn_cast<Instruction>(Cp->getOperand(0));
+  Instruction *C = dyn_cast<Instruction>(Cp->getOperand(1));
+  if (Bp != NULL || Bp->getOpcode() != Instruction::Xor) {
+    // Operands are inverted, lets swap them.
+    Bp = dyn_cast<Instruction>(Cp->getOperand(1));
+    C = dyn_cast<Instruction>(Cp->getOperand(0));
+  }
+  if (Bp == NULL || Bp->getOpcode() != Instruction::Xor) {
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "C: ";
+    C->dump();
+    dbgs() << "Bp: ";
+    Bp->dump();
+  }
+  Constant *Bpneg1 = dyn_cast<Constant>(Bp->getOperand(1));
+  Instruction *B = dyn_cast<Instruction>(Bp->getOperand(0));
+  if (B == NULL || Bpneg1 == NULL) {
+    B = dyn_cast<Instruction>(Bp->getOperand(1));
+    Bpneg1 = dyn_cast<Constant>(Bp->getOperand(0));
+  }
+  if (B == NULL || Bpneg1 == NULL) {
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "B: ";
+    B->dump();
+    dbgs() << "Bp-1: ";
+    Bpneg1->dump();
+  }
+  if (aType->isVectorTy()) {
+    ConstantDataVector *Bpneg1v = dyn_cast<ConstantDataVector>(Bpneg1);
+    ConstantDataVector *Apneg1v = dyn_cast<ConstantDataVector>(Apneg1);
+    if (Bpneg1v == NULL ||
+        Apneg1v == NULL) {
+      return false;
+    }
+
+    for (size_t x = 0, y = Bpneg1v->getNumElements(); x < y; ++x) {
+      ConstantInt *neg1 = dyn_cast<ConstantInt>(Bpneg1v->getElementAsConstant(x));
+      if (neg1 == NULL) {
+        return false;
+      }
+      uint32_t maskVal = (uint32_t)neg1->getZExtValue();
+      if (!isMask_32(maskVal)
+          || CountTrailingOnes_32(maskVal) != 32) {
+        return false;
+      }
+    }
+    for (size_t x = 0, y = Apneg1v->getNumElements(); x < y; ++x) {
+      ConstantInt *neg1 = dyn_cast<ConstantInt>(Apneg1v->getElementAsConstant(x));
+      if (neg1 == NULL) {
+        return false;
+      }
+      uint32_t maskVal = (uint32_t)neg1->getZExtValue();
+      if (!isMask_32(maskVal)
+          || CountTrailingOnes_32(maskVal) != 32) {
+        return false;
+      }
+    }
+  } else {
+    ConstantInt *Bpneg1i = dyn_cast<ConstantInt>(Bpneg1);
+    ConstantInt *Apneg1i = dyn_cast<ConstantInt>(Apneg1);
+    if (Bpneg1i == NULL
+        || Apneg1i == NULL) {
+      return false;
+    }
+    uint32_t maskVal = Bpneg1i->getZExtValue();
+    if (!isMask_32(maskVal)
+        || CountTrailingOnes_32(maskVal) != 32) {
+      return false;
+    }
+    maskVal = Apneg1i->getZExtValue();
+    if (!isMask_32(maskVal)
+        || CountTrailingOnes_32(maskVal) != 32) {
+      return false;
+    }
+  }
+  if (mDebug) {
+    dbgs() << "Creating pattern BFI(A, (B & (C ^ -1)), -1)\n";
+  }
+  // Now that we have verified everything, lets create our result.
+  std::vector<Type *> callTypes;
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+  std::string name = "__amdil_bfi";
+  if (aType->isVectorTy()) {
+    name += "_v" + itostr(numEle) + "u32";
+  } else {
+    name += "_u32";
+  }
+  Function *Func =
+    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(llvm::StringRef(name), funcType));
+  C = BinaryOperator::Create(Instruction::Xor, C, Bpneg1, "bfiXor", inst);
+  B = BinaryOperator::Create(Instruction::And, B, C, "bfiAnd", inst);
+  Value *Operands[3] = {
+    A,
+    B,
+    Bpneg1
+  };
+  CallInst *CI = CallInst::Create(Func, Operands, "BFI");
+  if (mDebug) {
+    dbgs() << "Old Inst: ";
+    inst->dump();
+    dbgs() << "New Inst: ";
+    CI->dump();
+    dbgs() << "\n\n";
+  }
+  CI->insertBefore(inst);
+  inst->replaceAllUsesWith(CI);
+  inst->eraseFromParent();
+  return true;
+}
+
+bool
+AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
+{
+  assert (inst && (inst->getOpcode() == Instruction::And) &&
+          "optimizeBitExtract() expects And instruction");
+
+  if (optLevel == CodeGenOpt::None) {
+    return false;
+  }
+  // We want to do some simple optimizations on Shift right/And patterns. The
+  // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+  // value smaller than 32 and C is a mask. If C is a constant value, then the
+  // following transformation can occur. For signed integers, it turns into the
+  // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+  // integers, it turns into the function call dst =
+  // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+  // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+  // Evergreen hardware.
+  if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+    // This does not work on HD4XXX hardware.
+    return false;
+  }
+  Type *aType = inst->getType();
+  bool isVector = aType->isVectorTy();
+  int numEle = 1;
+  // This only works on 32bit integers
+  if (aType->getScalarType()
+      != Type::getInt32Ty(inst->getContext())) {
+    return false;
+  }
+  if (isVector) {
+    const VectorType *VT = dyn_cast<VectorType>(aType);
+    numEle = VT->getNumElements();
+    // We currently cannot support more than 4 elements in a intrinsic and we
+    // cannot support Vec3 types.
+    if (numEle > 4 || numEle == 3) {
+      return false;
+    }
+  }
+  BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+  // If the first operand is not a shift instruction, then we can return as it
+  // doesn't match this pattern.
+  if (!ShiftInst || !ShiftInst->isShift()) {
+    return false;
+  }
+  // If we are a shift left, then we need don't match this pattern.
+  if (ShiftInst->getOpcode() == Instruction::Shl) {
+    return false;
+  }
+  bool isSigned = ShiftInst->isArithmeticShift();
+  Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+  Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+  // Lets make sure that the shift value and the and mask are constant integers.
+  if (!AndMask || !ShrVal) {
+    return false;
+  }
+  Constant *newMaskConst;
+  Constant *shiftValConst;
+  if (isVector) {
+    // Handle the vector case
+    std::vector<Constant *> maskVals;
+    std::vector<Constant *> shiftVals;
+    ConstantDataVector *AndMaskVec = dyn_cast<ConstantDataVector>(AndMask);
+    ConstantDataVector *ShrValVec = dyn_cast<ConstantDataVector>(ShrVal);
+    Type *scalarType = AndMaskVec->getType()->getScalarType();
+    assert(AndMaskVec->getNumElements() ==
+           ShrValVec->getNumElements() && "cannot have a "
+           "combination where the number of elements to a "
+           "shift and an and are different!");
+    for (size_t x = 0, y = AndMaskVec->getNumElements(); x < y; ++x) {
+      ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getElementAsConstant(x));
+      ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getElementAsConstant(x));
+      if (!AndCI || !ShiftIC) {
+        return false;
+      }
+      uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+      if (!isMask_32(maskVal)) {
+        return false;
+      }
+      maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+      uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+      // If the mask or shiftval is greater than the bitcount, then break out.
+      if (maskVal >= 32 || shiftVal >= 32) {
+        return false;
+      }
+      // If the mask val is greater than the the number of original bits left
+      // then this optimization is invalid.
+      if (maskVal > (32 - shiftVal)) {
+        return false;
+      }
+      maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+      shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+    }
+    newMaskConst = ConstantVector::get(maskVals);
+    shiftValConst = ConstantVector::get(shiftVals);
+  } else {
+    // Handle the scalar case
+    uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+    // This must be a mask value where all lower bits are set to 1 and then any
+    // bit higher is set to 0.
+    if (!isMask_32(maskVal)) {
+      return false;
+    }
+    maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+    // Count the number of bits set in the mask, this is the width of the
+    // resulting bit set that is extracted from the source value.
+    uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+    // If the mask or shift val is greater than the bitcount, then break out.
+    if (maskVal >= 32 || shiftVal >= 32) {
+      return false;
+    }
+    // If the mask val is greater than the the number of original bits left then
+    // this optimization is invalid.
+    if (maskVal > (32 - shiftVal)) {
+      return false;
+    }
+    newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+    shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+  }
+  // Lets create the function signature.
+  std::vector<Type *> callTypes;
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+  std::string name = "__amdil_ubit_extract";
+  if (isVector) {
+    name += "_v" + itostr(numEle) + "i32";
+  } else {
+    name += "_i32";
+  }
+  // Lets create the function.
+  Function *Func =
+    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(llvm::StringRef(name), funcType));
+  Value *Operands[3] = {
+    newMaskConst,
+    shiftValConst,
+    ShiftInst->getOperand(0)
+  };
+  // Lets create the Call with the operands
+  CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+  CI->insertBefore(inst);
+  inst->replaceAllUsesWith(CI);
+  inst->eraseFromParent();
+  return true;
+}
+
+bool
+getVectorComponent(Instruction *inst, int tid, unsigned int numElem,
+                   Value*& vecval, unsigned& whichelem)
+{
+  ExtractElementInst *einst = dyn_cast<ExtractElementInst>(inst);
+  if (!einst) {
+    return false;
+  }
+
+  vecval = einst->getVectorOperand();
+  VectorType *vt = dyn_cast<VectorType>(vecval->getType());
+  assert (!vt && "ExtractElementInst must have a vector type as its first argument");
+  Type       *et = vt->getElementType();
+  if ( (vt->getNumElements() != numElem) ||
+       (et->getTypeID() != tid) ) {
+    return false;
+  }
+  ConstantInt *cv = dyn_cast<ConstantInt>(einst->getIndexOperand());
+  if (!cv) {
+    return false;
+  }
+
+  whichelem = (unsigned)cv->getZExtValue();
+  return true;
+}
+
+bool getIntValue(Instruction *Inst, Value *& Val,
+                 unsigned int &start_pos, unsigned int &nbits)
+{
+  Value *intval = NULL, *opnd1;
+  bool hasmask = false;
+
+  if (!Inst->getType()->isIntegerTy(32)) {
+    return false;
+  }
+
+  start_pos = 0;
+  nbits = 0;
+  if (Inst->getOpcode() == Instruction::And) {
+    intval = Inst->getOperand(0);
+    opnd1 = Inst->getOperand(1);
+
+    ConstantInt *CI0 = dyn_cast<ConstantInt>(intval);
+    ConstantInt *CI1 = dyn_cast<ConstantInt>(opnd1);
+    if ((!CI0 && !CI1) || (CI0 && CI1)) {
+      return false;
+    }
+
+    if (CI0) {
+      Value *tmp = intval;
+      intval = opnd1;
+      opnd1 = tmp;
+      CI1 = CI0;
+    }
+
+    unsigned int mask = CI1->getZExtValue();
+    hasmask = getMaskBitfield(mask, start_pos, nbits);
+    if (!hasmask) {
+      return false;
+    }
+  }
+
+  Instruction *tinst = dyn_cast<Instruction>(intval);
+  if (!tinst) {
+    return false;
+  }
+
+  if (tinst->getOpcode() == Instruction::Shl) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(tinst->getOperand(1));
+    if (hasmask && CI) {
+      unsigned int amt = CI->getZExtValue();
+      if (amt > start_pos) {
+        return false;
+      }
+      start_pos -= amt;
+    } else {
+      return false;
+    }
+    intval = tinst->getOperand(0);
+  } else if (tinst->getOpcode() == Instruction::LShr) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(tinst->getOperand(1))) {
+      unsigned int amt = CI->getZExtValue();
+      if (!hasmask) {
+        start_pos = amt;
+        nbits = 32 - amt;
+      } else if ((amt + start_pos + nbits) > 31) {
+        return false;
+      } else {
+        start_pos += amt;
+      }
+    } else {
+      return false;
+    }
+    intval = tinst->getOperand(0);
+  }
+
+  Val = intval;
+  return true;
+}
+
+/*
+   format:     f_2_u4 dst, src
+   semantics:  dist.xyzw =
+                 (((uint32)src.x) & 0xFF) |
+                 ((((uint32)src.y) & 0xFF) << 8) |
+                 ((((uint32)src.z) & 0xFF) << 16) |
+                 ((((uint32)src.w) & 0xFF) << 24);
+
+   If this pattern is found, change the sequence of operations into a intrinic call
+   to u32 __amdil_f_2_u4 (v4f32) (int_AMDIL_media_convert_f2v4u8).
+
+   TODO: if src are not from the same vector, create a new vector.
+*/
+bool
+AMDILPeepholeOpt::genIntrF2U4(Instruction *inst)
+{
+  // Try to handle the pattern:
+  //   inst = or0 | or1 | or2 | or3
+  Instruction *or0, *or1, *or2, *or3;
+  or0 = dyn_cast<Instruction>(inst->getOperand(0));
+  or1 = dyn_cast<Instruction>(inst->getOperand(1));
+  if (!or0 || !or1) {
+    return false;
+  }
+
+  bool is_or0 = (or0->getOpcode() == Instruction::Or);
+  bool is_or1 = (or1->getOpcode() == Instruction::Or);
+  if (is_or0 && is_or1) {
+    Instruction *t0 = or0, *t1 = or1;
+    or0 = dyn_cast<Instruction>(t0->getOperand(0));
+    or1 = dyn_cast<Instruction>(t0->getOperand(1));
+    or2 = dyn_cast<Instruction>(t1->getOperand(0));
+    or3 = dyn_cast<Instruction>(t1->getOperand(1));
+  } else if (is_or0 || is_or1) {
+    if (is_or0) {
+      // swap or0 and or1
+      or2 = or0;
+      or0 = or1;
+      or1 = or2;
+    }
+    or2 = dyn_cast<Instruction>(or1->getOperand(0));
+    or1 = dyn_cast<Instruction>(or1->getOperand(1));
+    if (!or1 || !or2) {
+      return false;
+    } else {
+      bool b1 = (or1->getOpcode() == Instruction::Or);
+      bool b2 = (or2->getOpcode() == Instruction::Or);
+      if ((b1 && b2) || (!b1 && !b2)) {
+        return false;
+      } else {
+        if (b1) {
+          // swap or1 and or2
+          or3 = or1;
+          or1 = or2;
+          or2 = or3;
+        }
+        or3 = dyn_cast<Instruction>(or2->getOperand(0));
+        or2 = dyn_cast<Instruction>(or2->getOperand(1));
+      }
+    }
+  } else {
+    return false;
+  }
+
+  // Sanity check
+  if (!or0 || !or1 || !or2 || !or3) {
+    return false;
+  }
+
+  // Check to see if all or's are from the same vector (v4f32), and each
+  // one is converted to 8 bit integer...
+  unsigned int dst_start[4], dst_width[4], src_start[4];
+  Value *src[4];
+  Instruction *dst[4];
+
+  dst[0] = or0;
+  dst[1] = or1;
+  dst[2] = or2;
+  dst[3] = or3;
+
+  Value *v4f32val = NULL;
+  for (int i=0; i < 4; ++i) {
+    if (!getIntValue(dst[i], src[i], src_start[i],
+                     dst_start[i], dst_width[i]) ||
+        (dst_width[i] != 8) || (src_start[i] != 0) ||
+        (src_start[i] > 24) || ((src_start[i] % 8) != 0)) {
+      return false;
+    }
+
+    Instruction *tinst = dyn_cast<Instruction>(src[i]);
+    if (!tinst ||
+        (tinst->getOpcode() != Instruction::FPToUI)) {
+      return false;
+    }
+    src[i] = tinst->getOperand(0);
+    tinst = dyn_cast<Instruction>(src[i]);
+    if (!tinst) {
+      return false;
+    }
+
+    Value *vecval;
+    unsigned int which;
+    if (!getVectorComponent(tinst, Type::FloatTyID, 4,  vecval, which)) {
+      return false;
+    }
+
+    if (v4f32val == NULL) {
+      v4f32val = vecval;
+    } else if (v4f32val != vecval) {
+      return false;
+    }
+
+    if (which != (dst_start[i]/8)) {
+      return false;
+    }
+  }
+
+  // Check and record the correct order in pos[].
+  int pos[4];
+  for (int i=0; i < 4; ++i) {
+    pos[i] = -1;
+  }
+  for (int i=0; i < 4; ++i) {
+    unsigned int ix = (dst_start[i] / 8);
+    if (pos[ix] != -1) {
+      return false;
+    }
+    pos[ix] = i;
+  }
+
+  // Generate the intrinsic
+  Type *rtype = inst->getType();
+  std::vector<Type *> argtypes;
+  argtypes.push_back(v4f32val->getType());
+  FunctionType *functype = FunctionType::get(rtype, argtypes, false);
+  Function *proto_f2u4 = dyn_cast<Function>(
+                           mF->getParent()->getOrInsertFunction("__amdil_f_2_u4", functype));
+
+  CallInst *call_f2u4 = CallInst::Create(proto_f2u4, v4f32val, "F_2_U4", inst);
+  inst->replaceAllUsesWith(call_f2u4);
+  inst->eraseFromParent();
+
+  return true;
+}
+
+bool
+AMDILPeepholeOpt::expandBFI(CallInst *CI)
+{
+  if (!CI || mSTM->calVersion() > CAL_VERSION_SC_150) {
+    return false;
+  }
+  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+  if (!LHS->getName().startswith("__amdil_bfi")) {
+    return false;
+  }
+  Type* type = CI->getOperand(0)->getType();
+  Constant *negOneConst = NULL;
+  if (type->isVectorTy()) {
+    std::vector<Constant *> negOneVals;
+    negOneConst = ConstantInt::get(CI->getContext(),
+                                   APInt(32, StringRef("-1"), 10));
+    for (size_t x = 0,
+         y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+      negOneVals.push_back(negOneConst);
+    }
+    negOneConst = ConstantVector::get(negOneVals);
+  } else {
+    negOneConst = ConstantInt::get(CI->getContext(),
+                                   APInt(32, StringRef("-1"), 10));
+  }
+  // __amdil_bfi => (A & B) | (~A & C)
+  BinaryOperator *lhs =
+    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+                           CI->getOperand(1), "bfi_and", CI);
+  BinaryOperator *rhs =
+    BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+                           "bfi_not", CI);
+  rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+                               "bfi_and", CI);
+  lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+  CI->replaceAllUsesWith(lhs);
+  return true;
+}
+
+bool
+AMDILPeepholeOpt::expandBFM(CallInst *CI)
+{
+  if (!CI || mSTM->calVersion() > CAL_VERSION_SC_150) {
+    return false;
+  }
+  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+  if (!LHS->getName().startswith("__amdil_bfm")) {
+    return false;
+  }
+  // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+  Constant *newMaskConst = NULL;
+  Constant *newShiftConst = NULL;
+  Type* type = CI->getOperand(0)->getType();
+  if (type->isVectorTy()) {
+    std::vector<Constant*> newMaskVals, newShiftVals;
+    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+    for (size_t x = 0,
+         y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+      newMaskVals.push_back(newMaskConst);
+      newShiftVals.push_back(newShiftConst);
+    }
+    newMaskConst = ConstantVector::get(newMaskVals);
+    newShiftConst = ConstantVector::get(newShiftVals);
+  } else {
+    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+  }
+  BinaryOperator *lhs =
+    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+                           newMaskConst, "bfm_mask", CI);
+  lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+                               lhs, "bfm_shl", CI);
+  lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+                               newShiftConst, "bfm_sub", CI);
+  BinaryOperator *rhs =
+    BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+                           newMaskConst, "bfm_mask", CI);
+  lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+  CI->replaceAllUsesWith(lhs);
+  return true;
+}
+
+bool
+AMDILPeepholeOpt::instLevelOptimizations(Instruction* inst)
+{
+  assert (inst && "inst should not be NULL");
+
+  bool isDebug = (optLevel == CodeGenOpt::None);
+  bool isEGOrLater = (mSTM->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX);
+
+  // Remove dead inst (probably should do it in caller)
+  if (!isDebug && isInstructionTriviallyDead(inst)) {
+    inst->eraseFromParent();
+    return true;
+  }
+
+  const unsigned opc = inst->getOpcode();
+
+  if ((opc == Instruction::Or) && !isDebug && isEGOrLater &&
+      genIntrF2U4(inst)) {
+    return true;
+  }
+  if ((opc == Instruction::Call) && optimizeCallInst(inst)) {
+    return true;
+  }
+  if ((opc == Instruction::And) && optimizeBitExtract(inst)) {
+    return true;
+  }
+  if ((opc == Instruction::Or) && optimizeBitInsert(inst)) {
+    return true;
+  }
+  if ((opc == Instruction::Xor) && optimizeBFI(inst)) {
+    return true;
+  }
+  if (((opc == Instruction::Load) || (opc == Instruction::Store)) &&
+      correctMisalignedMemOp(inst)) {
+    return true;
+  }
+  // If we are loading from a NULL pointer, replace the load with 0.
+  if ((opc == Instruction::Load)) {
+    const Value *ptr = dyn_cast<LoadInst>(inst)->getPointerOperand();
+    if (ptr && dyn_cast<ConstantPointerNull>(ptr)) {
+      inst->replaceAllUsesWith(Constant::getNullValue(inst->getType()));
+      return true;
+    }
+  }
+  // If we are storing to a NULL pointer, then drop the store.
+  if (opc == Instruction::Store) {
+    const Value *ptr = dyn_cast<StoreInst>(inst)->getPointerOperand();
+    if (ptr && dyn_cast<ConstantPointerNull>(ptr)) {
+      inst->eraseFromParent();
+      return true;
+    }
+  }
+  if ((opc == Instruction::Or) && optimizeClassInst(inst)) {
+    return true;
+  }
+  return false;
+}
+
+bool
+AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst)
+{
+  LoadInst *linst = dyn_cast<LoadInst>(inst);
+  StoreInst *sinst = dyn_cast<StoreInst>(inst);
+  unsigned alignment;
+  Type* Ty = inst->getType();
+  if (linst) {
+    alignment = linst->getAlignment();
+    Ty = inst->getType();
+  } else if (sinst) {
+    alignment = sinst->getAlignment();
+    Ty = sinst->getValueOperand()->getType();
+  } else {
+    return false;
+  }
+  unsigned size = TM.getTargetData()->getTypeAllocSize(Ty);
+  if (size == alignment || size < alignment) {
+    return false;
+  }
+  if (!Ty->isStructTy()) {
+    return false;
+  }
+  if (alignment < 4) {
+    if (linst) {
+      linst->setAlignment(0);
+      return true;
+    } else if (sinst) {
+      sinst->setAlignment(0);
+      return true;
+    }
+  }
+  return false;
+}
+bool
+AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI)
+{
+  if (!CI) {
+    return false;
+  }
+  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+  std::string namePrefix = LHS->getName().substr(0, 14);
+  if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+      && namePrefix != "__amdil__imul24_high") {
+    return false;
+  }
+  if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+    return false;
+  }
+  return true;
+}
+
+void
+AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
+{
+  assert(isSigned24BitOps(CI) && "Must be a "
+         "signed 24 bit operation to call this function!");
+  Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+  // On 7XX and 8XX we do not have signed 24bit, so we need to
+  // expand it to the following:
+  // imul24 turns into 32bit imul
+  // imad24 turns into 32bit imad
+  // imul24_high turns into 32bit imulhigh
+  if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+    Type *aType = CI->getOperand(0)->getType();
+    bool isVector = aType->isVectorTy();
+    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+    std::vector<Type*> callTypes;
+    callTypes.push_back(CI->getOperand(0)->getType());
+    callTypes.push_back(CI->getOperand(1)->getType());
+    callTypes.push_back(CI->getOperand(2)->getType());
+    FunctionType *funcType =
+      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+    std::string name = "__amdil_imad";
+    if (isVector) {
+      name += "_v" + itostr(numEle) + "i32";
+    } else {
+      name += "_i32";
+    }
+    Function *Func = dyn_cast<Function>(
+                       CI->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(llvm::StringRef(name), funcType));
+    Value *Operands[3] = {
+      CI->getOperand(0),
+      CI->getOperand(1),
+      CI->getOperand(2)
+    };
+    CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+    nCI->insertBefore(CI);
+    CI->replaceAllUsesWith(nCI);
+  } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+    BinaryOperator *mulOp =
+      BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+                             CI->getOperand(1), "imul24", CI);
+    CI->replaceAllUsesWith(mulOp);
+  } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+    Type *aType = CI->getOperand(0)->getType();
+
+    bool isVector = aType->isVectorTy();
+    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+    std::vector<Type*> callTypes;
+    callTypes.push_back(CI->getOperand(0)->getType());
+    callTypes.push_back(CI->getOperand(1)->getType());
+    FunctionType *funcType =
+      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+    std::string name = "__amdil_imul_high";
+    if (isVector) {
+      name += "_v" + itostr(numEle) + "i32";
+    } else {
+      name += "_i32";
+    }
+    Function *Func = dyn_cast<Function>(
+                       CI->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(llvm::StringRef(name), funcType));
+    Value *Operands[2] = {
+      CI->getOperand(0),
+      CI->getOperand(1)
+    };
+    CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+    nCI->insertBefore(CI);
+    CI->replaceAllUsesWith(nCI);
+  }
+}
+
+bool
+AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
+{
+  return (CI != NULL && mRWGOpt
+          && CI->getOperand(CI->getNumOperands() - 1)->getName()
+          == "__amdil_get_local_size_int"
+          // We have to check if we are a kernel currently
+          // because we inline everything and only kernels
+          // should be left. However, in some cases, other
+          // functions exist and we don't want to
+          // optimize them because we don't track that
+          // information.
+          && mAMI->getKernel(mF->getName()));
+}
+
+void
+AMDILPeepholeOpt::expandRWGLocalOpt(CallInst *CI)
+{
+  assert(isRWGLocalOpt(CI) &&
+         "This optmization only works when the call inst is get_local_size!");
+  std::vector<Constant *> consts;
+  const AMDILKernel *kernel = mAMI->getKernel(mF->getName());
+  for (uint32_t x = 0; x < 3; ++x) {
+    // We don't have to check if sgv is valid or not as we
+    // checked this case before we set mRWGOpt to true.
+    uint32_t val = kernel->sgv->reqGroupSize[x];
+    consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), val));
+  }
+  consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), 0));
+  Value *cVec = ConstantVector::get(consts);
+  CI->replaceAllUsesWith(cVec);
+  ++LocalFuncs;
+  return;
+}
+
+bool
+AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
+{
+  if (!CI) {
+    return false;
+  }
+  if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX
+      && (mSTM->getDeviceName() == "cayman"
+          || mSTM->getDeviceName() == "kauai"
+          || mSTM->getDeviceName() == "trinity")) {
+    return false;
+  }
+  return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
+         == "__amdil_improved_div";
+}
+
+void
+AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI)
+{
+  assert(convertAccurateDivide(CI)
+         && "expanding accurate divide can only happen if it is expandable!");
+  BinaryOperator *divOp =
+    BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+                           CI->getOperand(1), "fdiv32", CI);
+  CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI)
+{
+  if (optLevel != CodeGenOpt::None) {
+    return false;
+  }
+
+  if (!CI) {
+    return false;
+  }
+
+  unsigned funcNameIdx = 0;
+  funcNameIdx = CI->getNumOperands() - 1;
+  StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+  if (
+    calleeName != "__amdil_image1d_read_norm"
+    && calleeName != "__amdil_image1d_read_unnorm"
+    && calleeName != "__amdil_image1d_array_read_norm"
+    && calleeName != "__amdil_image1d_array_read_unnorm"
+    && calleeName != "__amdil_image1d_buffer_read_norm"
+    && calleeName != "__amdil_image1d_buffer_read_unnorm"
+    && calleeName != "__amdil_image2d_read_norm"
+    && calleeName != "__amdil_image2d_read_unnorm"
+    && calleeName != "__amdil_image2d_array_read_norm"
+    && calleeName != "__amdil_image2d_array_read_unnorm"
+    && calleeName != "__amdil_image3d_read_norm"
+    && calleeName != "__amdil_image3d_read_unnorm") {
+    return false;
+  }
+
+  unsigned samplerIdx = 2;
+  samplerIdx = 1;
+  Value *sampler = CI->getOperand(samplerIdx);
+  LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+  if (!lInst) {
+    return false;
+  }
+
+  if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) {
+    return false;
+  }
+
+  GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+  // If we are loading from what is not a global value, then we
+  // fail and return.
+  if (!gv) {
+    return false;
+  }
+
+  // If we don't have an initializer or we have an initializer and
+  // the initializer is not a 32bit integer, we fail.
+  if (!gv->hasInitializer()
+      || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+    return false;
+  }
+
+  // Now that we have the global variable initializer, lets replace
+  // all uses of the load instruction with the samplerVal and
+  // reparse the __amdil_is_constant() function.
+  Constant *samplerVal = gv->getInitializer();
+  lInst->replaceAllUsesWith(samplerVal);
+  return true;
+}
+
+bool
+AMDILPeepholeOpt::doInitialization(Module &M)
+{
+  return false;
+}
+
+bool
+AMDILPeepholeOpt::doFinalization(Module &M)
+{
+  return false;
+}
+
+void
+AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
+{
+  AU.addRequired<MachineFunctionAnalysis>();
+  FunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,2541 @@
+//===-- AMDILPointerManager.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation for the AMDILPointerManager classes. See header file for
+// more documentation of class. TODO: This fails when function calls are enabled,
+// must always be inlined.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILPointerManager.h"
+#include "AMDILPointerManagerImpl.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDeviceInfo.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include <iostream>
+#include <set>
+#include <map>
+#include <list>
+#include <queue>
+#include <cstdio>
+#define SAMPLER_INDEX 3
+#define SAMPLER_ARG_COUNT 5
+using namespace llvm;
+char AMDILPointerManager::ID = 0;
+namespace llvm
+{
+FunctionPass*
+createAMDILPointerManager(TargetMachine &tm, CodeGenOpt::Level OL)
+{
+  return tm.getSubtarget<AMDILSubtarget>()
+         .device()->getPointerManager(tm, OL);
+}
+}
+
+AMDILPointerManager::AMDILPointerManager(
+  TargetMachine &tm,
+  CodeGenOpt::Level OL) :
+  MachineFunctionPass(ID),
+  TM(tm)
+{
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+}
+
+AMDILPointerManager::~AMDILPointerManager()
+{
+}
+
+const char*
+AMDILPointerManager::getPassName() const
+{
+  return "AMD IL Default Pointer Manager Pass";
+}
+
+void
+AMDILPointerManager::getAnalysisUsage(AnalysisUsage &AU) const
+{
+  AU.setPreservesAll();
+  AU.addRequiredID(MachineDominatorsID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+// The default pointer manager just assigns the default ID's to
+// each load/store instruction and does nothing else. This is
+// the pointer manager for the 7XX series of cards.
+bool
+AMDILPointerManager::runOnMachineFunction(MachineFunction &MF)
+{
+  if (DEBUGME) {
+    dbgs() << getPassName() << "\n";
+    dbgs() << MF.getFunction()->getName() << "\n";
+    MF.dump();
+  }
+  // On the 7XX we don't have to do any special processing, so we
+  // can just allocate the default ID and be done with it.
+  AMDILPointerManagerImpl impl(MF, TM);
+  impl.allocateDefaultIDs(TM);
+  clearTempMIFlags(MF);
+  return false;
+}
+
+void
+AMDILPointerManager::clearTempMIFlags(MachineFunction &MF)
+{
+  for (MachineFunction::iterator mfBegin = MF.begin(),
+       mfEnd = MF.end(); mfBegin != mfEnd; ++mfBegin) {
+    MachineBasicBlock *MB = mfBegin;
+    for (MachineBasicBlock::instr_iterator mbb = MB->instr_begin(), mbe = MB->instr_end();
+         mbb != mbe; ++mbb) {
+      MachineInstr *MI = mbb;
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(MI, curRes);
+      // Clear temporary flas
+      curRes.bits.isImage = 0;
+      curRes.bits.ConflictPtr = 0;
+      curRes.bits.ByteStore = 0;
+      curRes.bits.PointerPath = 0;
+      setAsmPrinterFlags(MI, curRes);
+    }
+  }
+}
+
+
+AMDILEGPointerManager::AMDILEGPointerManager(
+  TargetMachine &tm,
+  CodeGenOpt::Level OL) :
+  AMDILPointerManager(tm, OL)
+{
+}
+
+AMDILEGPointerManager::~AMDILEGPointerManager()
+{
+}
+
+bool
+AMDILEGPointerManager::runOnMachineFunction(MachineFunction &MF)
+{
+  if (DEBUGME) {
+    dbgs() << getPassName() << "\n";
+    dbgs() << MF.getFunction()->getName() << "\n";
+    MF.dump();
+  }
+
+  AMDILPointerManagerImpl impl(MF, TM);
+  bool changed = impl.perform();
+  clearTempMIFlags(MF);
+  return changed;
+}
+
+const char*
+AMDILEGPointerManager::getPassName() const
+{
+  return "AMD IL EG Pointer Manager Pass";
+}
+
+AMDILPointerManagerImpl::AMDILPointerManagerImpl(MachineFunction& mf,
+    TargetMachine& tm)
+  : MF(mf), TM(tm)
+{
+  ATM = reinterpret_cast<const AMDILTargetMachine*>(&TM);
+  STM = ATM->getSubtargetImpl();
+  KM = STM->getKernelManager();
+  mAMI = &(MF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+  mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+  numWriteImages = 0;
+}
+
+std::string
+AMDILPointerManagerImpl::findSamplerName(TargetMachine &TM, MachineInstr* MI, unsigned &val)
+{
+  std::string sampler = "unknown";
+  assert(MI->getNumOperands() == SAMPLER_ARG_COUNT && "Only an "
+         "image read instruction with SAMPLER_ARG_COUNT arguments can "
+         "have a sampler.");
+  assert(MI->getOperand(SAMPLER_INDEX).isReg() &&
+         "Argument SAMPLER_INDEX must be a register to call this function");
+  val = ~0U;
+  unsigned reg = MI->getOperand(SAMPLER_INDEX).getReg();
+  // If this register points to an argument, then
+  // we can return the argument name.
+  if (dyn_cast_or_null<Argument>(lookupTable[reg].second.second)) {
+    return lookupTable[reg].second.second->getName();
+  }
+  // Otherwise the sampler is coming from memory somewhere.
+  // If the sampler memory location can be tracked, then
+  // we ascertain the sampler name that way.
+  // The most common case is when optimizations are disabled
+  // or mem2reg is not enabled, then the sampler when it is
+  // an argument is passed through the frame index.
+
+  // In the optimized case, the instruction that defined
+  // register from operand #3 is a private load.
+  MachineRegisterInfo &regInfo = MF.getRegInfo();
+  assert(!regInfo.def_empty(reg)
+         && "We don't have any defs of this register, but we aren't an argument!");
+
+  MachineOperand& defOp = regInfo.reg_begin(reg).getOperand();
+
+  MachineInstr *defMI = defOp.getParent();
+  if (isPrivateInst(TM, defMI) && isLoadInst(TM, defMI)) {
+    if (defMI->getOperand(1).isFI()) {
+      RegValPair &fiRVP = FIToPtrMap[defMI->getOperand(1).getIndex()];
+      if (fiRVP.second.second && dyn_cast<Argument>(fiRVP.second.second)) {
+        return fiRVP.second.second->getName();
+      } else if (!fiRVP.second.second && fiRVP.first) {
+        defOp = regInfo.reg_begin(fiRVP.first).getOperand();
+        defMI = defOp.getParent();
+        if (defMI->getOperand(1).isImm()) {
+          val = defMI->getOperand(1).getImm();
+          char buffer[1024];
+          sprintf(buffer, "_%d", val);
+          return sampler + std::string(buffer);
+        } else {
+          // FIXME: Fix the case where a sampler is loaded from
+          // a frame index, but the source instruction was not
+          // created from the AMDdbgmove pass.
+          assert(!"Found a case of the AMDdbgmove pass that we don't handle!");
+        }
+      } else {
+        // FIXME: Fix the case where the value stored is not a kernel argument and not a situation which is modified by AMDdbgmove pass.
+        assert(!"Found a private load of a sampler where the value isn't an argument!");
+      }
+    } else {
+      // FIXME: Fix the case where someone dynamically loads a sampler value
+      // from private memory. This is problematic because we need to know the
+      // sampler value at compile time and if it is dynamically loaded, we won't
+      // know what sampler value to use.
+      assert(!"Found a private load of a sampler that isn't from a frame index!");
+    }
+  } else {
+    // FIXME: Handle the case where the def is neither a private instruction
+    // and not a load instruction. This shouldn't occur, but putting an assertion
+    // just to make sure that it doesn't.
+    assert(!"Found a case which we don't handle.");
+  }
+  return sampler;
+}
+
+
+// Helper function to determine if the current pointer is from the
+// local, region or private address spaces.
+static bool
+isLRPInst(TargetMachine &TM,
+          MachineInstr *MI,
+          const AMDILTargetMachine *ATM)
+{
+  const AMDILSubtarget *STM
+  = ATM->getSubtargetImpl();
+  if (!MI) {
+    return false;
+  }
+  if ((isRegionInst(TM, MI)
+       && STM->device()->usesHardware(AMDILDeviceInfo::RegionMem))
+      || (isLocalInst(TM, MI)
+          && STM->device()->usesHardware(AMDILDeviceInfo::LocalMem))
+      || (isPrivateInst(TM, MI)
+          && STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem))
+      // FIXME: This is a hack since the frontend doesn't recognize semaphores yet.
+      || isSemaphoreInst(TM, MI)) {
+    return true;
+  }
+  return false;
+}
+
+/// Helper function to determine if the I/O instruction uses
+/// global device memory or not.
+static bool
+usesGlobal(
+  TargetMachine &TM,
+  const AMDILTargetMachine *ATM,
+  MachineInstr *MI)
+{
+  const AMDILSubtarget *STM = ATM->getSubtargetImpl();
+  return (isGlobalInst(TM, MI)
+          || (isRegionInst(TM, MI)
+              && !STM->device()->usesHardware(AMDILDeviceInfo::RegionMem))
+          || (isLocalInst(TM, MI)
+              && !STM->device()->usesHardware(AMDILDeviceInfo::LocalMem))
+          || (isConstantInst(TM, MI)
+              && !STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem))
+          || (isPrivateInst(TM, MI)
+              && !STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)));
+}
+
+// Helper function that allocates the default resource ID for the
+// respective I/O types.
+void
+AMDILPointerManagerImpl::allocateDefaultID(
+  TargetMachine &TM,
+  AMDILAS::InstrResEnc &curRes,
+  MachineInstr *MI,
+  bool addID)
+{
+  if (DEBUGME) {
+    dbgs() << "Assigning instruction to default ID. Inst:";
+    MI->dump();
+  }
+  // If we use global memory, lets set the Operand to
+  // the ARENA_UAV_ID.
+  if (usesGlobal(TM, ATM, MI)
+      || isGlobalAtomic(TM, MI) || is64BitGlobalAtomic(TM, MI)
+      || isArenaAtomic(TM, MI)) {
+    curRes.bits.ResourceID =
+      STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
+    if (isAtomicInst(TM, MI)) {
+      MI->getOperand(MI->getNumOperands()-1)
+      .setImm(curRes.bits.ResourceID);
+    }
+    if (curRes.bits.ResourceID == 8
+        && !STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+      KM->setUAVID(NULL, curRes.bits.ResourceID);
+      mMFI->uav_insert(curRes.bits.ResourceID);
+    }
+    if (addID) {
+      mMFI->uav_insert(curRes.bits.ResourceID);
+    }
+  } else if (isPrivateInst(TM, MI)) {
+    curRes.bits.ResourceID =
+      STM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+    mMFI->setUsesScratch();
+  } else if (isLocalInst(TM, MI)
+             || isLocalAtomic(TM, MI) || is64BitLocalAtomic(TM, MI)) {
+    curRes.bits.ResourceID =
+      STM->device()->getResourceID(AMDILDevice::LDS_ID);
+    mMFI->setUsesLDS();
+    if (isAtomicInst(TM, MI)) {
+      assert(curRes.bits.ResourceID && "Atomic resource ID "
+             "cannot be zero!");
+      MI->getOperand(MI->getNumOperands()-1)
+      .setImm(curRes.bits.ResourceID);
+    }
+    mMFI->setUsesLDS();
+  } else if (isRegionInst(TM, MI)
+             || isRegionAtomic(TM, MI) || is64BitRegionAtomic(TM, MI)) {
+    curRes.bits.ResourceID =
+      STM->device()->getResourceID(AMDILDevice::GDS_ID);
+    mMFI->setUsesGDS();
+    if (isAtomicInst(TM, MI)) {
+      assert(curRes.bits.ResourceID && "Atomic resource ID "
+             "cannot be zero!");
+      (MI)->getOperand((MI)->getNumOperands()-1)
+      .setImm(curRes.bits.ResourceID);
+    }
+    mMFI->setUsesGDS();
+  } else if (isConstantInst(TM, MI)) {
+    // If we are unknown constant instruction and the base pointer is known.
+    // Set the resource ID accordingly, otherwise use the default constant ID.
+    // FIXME: this should not require the base pointer to know what constant
+    // it is from.
+    if (mAMI->isKernel(MF.getFunction()->getName())) {
+      const AMDILKernel *krnl = mAMI->getKernel(MF.getFunction()->getName());
+      const Value *V = getBasePointerValue(MI);
+      if (V && !dyn_cast<AllocaInst>(V)) {
+        curRes.bits.ResourceID = mAMI->getConstPtrCB(krnl, V->getName());
+        curRes.bits.HardwareInst = 1;
+      } else if (V && dyn_cast<AllocaInst>(V)) {
+        // FIXME: Need a better way to fix this. Requires a rewrite of how
+        // we lower global addresses to various address spaces.
+        // So for now, lets assume that there is only a single
+        // constant buffer that can be accessed from a load instruction
+        // that is derived from an alloca instruction.
+        curRes.bits.ResourceID = 2;
+        curRes.bits.HardwareInst = 1;
+      } else {
+        if (isStoreInst(TM, MI)) {
+          if (DEBUGME) {
+            dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+            MI->dump();
+          }
+          curRes.bits.ByteStore = 1;
+        }
+        curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::CONSTANT_ID);
+      }
+      mMFI->setUsesConstant();
+    } else {
+      if (isStoreInst(TM, MI)) {
+        if (DEBUGME) {
+          dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+          MI->dump();
+        }
+        curRes.bits.ByteStore = 1;
+      }
+      curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
+      KM->setUAVID(NULL, curRes.bits.ResourceID);
+      mMFI->uav_insert(curRes.bits.ResourceID);
+    }
+  } else if (isAppendInst(TM, MI)) {
+    unsigned opcode = MI->getOpcode();
+    if (opcode == AMDIL::APPEND_ALLOC || opcode == AMDIL::APPEND64_ALLOC) {
+      curRes.bits.ResourceID = 1;
+    } else {
+      curRes.bits.ResourceID = 2;
+    }
+  }
+  setAsmPrinterFlags(MI, curRes);
+}
+
+// Function that parses the arguments and updates the lookupTable with the
+// pointer -> register mapping. This function also checks for cacheable
+// pointers and updates the CacheableSet with the arguments that
+// can be cached based on the readonlypointer annotation. The final
+// purpose of this function is to update the images and counters
+// with all pointers that are either images or atomic counters.
+uint32_t
+AMDILPointerManagerImpl::parseArguments()
+{
+  uint32_t writeOnlyImages = 0;
+  uint32_t readOnlyImages = 0;
+  std::string cachedKernelName = "llvm.readonlypointer.annotations.";
+  cachedKernelName.append(MF.getFunction()->getName());
+  GlobalVariable *GV = MF.getFunction()->getParent()
+                       ->getGlobalVariable(cachedKernelName);
+  unsigned cbNum = 0;
+  unsigned regNum = 0;
+  for (Function::const_arg_iterator I = MF.getFunction()->arg_begin(),
+       E = MF.getFunction()->arg_end(); I != E; ++I) {
+    const Argument *curArg = I;
+    if (DEBUGME) {
+      dbgs() << "Argument: ";
+      curArg->dump();
+    }
+    Type *curType = curArg->getType();
+    // We are either a scalar or vector type that
+    // is passed by value that is not a opaque/struct
+    // type. We just need to increment regNum
+    // the correct number of times to match the number
+    // of registers that it takes up.
+    if (curType->isFPOrFPVectorTy() ||
+        curType->isIntOrIntVectorTy()) {
+      // We are scalar, so increment once and
+      // move on
+      if (!curType->isVectorTy()) {
+        lookupTable[mMFI->getArgReg(regNum)] =
+          std::make_pair(~0U, createStrValPair(curArg));
+        ++regNum;
+        ++cbNum;
+        continue;
+      }
+      VectorType *VT = dyn_cast<VectorType>(curType);
+      // We are a vector type. If we are 64bit type, then
+      // we increment length / 2 times, otherwise we
+      // increment length / 4 times. The only corner case
+      // is with vec3 where the vector gets scalarized and
+      // therefor we need a loop count of 3.
+      size_t loopCount = VT->getNumElements();
+      if (loopCount != 3) {
+        if (VT->getScalarSizeInBits() == 64) {
+          loopCount = loopCount >> 1;
+        } else {
+          loopCount = (loopCount + 2) >> 2;
+        }
+        cbNum += loopCount;
+      } else {
+        cbNum++;
+      }
+      while (loopCount--) {
+        lookupTable[mMFI->getArgReg(regNum)] =
+          std::make_pair(~0U, createStrValPair(curArg));
+        ++regNum;
+      }
+    } else if (curType->isPointerTy()) {
+      Type *CT = dyn_cast<PointerType>(curType)->getElementType();
+      const StructType *ST = dyn_cast<StructType>(CT);
+      if (ST && ST->isOpaque()) {
+        StringRef name = ST->getName();
+        bool i1d_type  = name.startswith("struct._image1d_t");
+        bool i1da_type = name.startswith("struct._image1d_array_t");
+        bool i1db_type = name.startswith("struct._image1d_buffer_t");
+        bool i2d_type  = name.startswith("struct._image2d_t");
+        bool i2da_type = name.startswith("struct._image2d_array_t");
+        bool i3d_type  = name.startswith("struct._image3d_t");
+        bool c32_type  = name.startswith("struct._counter32_t");
+        bool c64_type  = name.startswith("struct._counter64_t");
+        bool sema_type = name.startswith("struct._sema_t");
+        if (i2d_type || i3d_type || i2da_type ||
+            i1d_type || i1db_type || i1da_type) {
+          images.insert(createStrValPair(I));
+          uint32_t imageNum = readOnlyImages + writeOnlyImages;
+          if (mAMI->isReadOnlyImage(MF.getFunction()->getName(), imageNum)) {
+            if (DEBUGME) {
+              dbgs() << "Pointer: '" << curArg->getName()
+                     << "' is a read only image # " << readOnlyImages << "!\n";
+            }
+            // We store the cbNum along with the image number so that we can
+            // correctly encode the 'info' intrinsics.
+            lookupTable[mMFI->getArgReg(regNum)] =
+              std::make_pair
+              ((cbNum << 16 | readOnlyImages++), createStrValPair(curArg));
+          } else if (mAMI->isWriteOnlyImage(MF.getFunction()->getName(), imageNum)) {
+            if (DEBUGME) {
+              dbgs() << "Pointer: '" << curArg->getName()
+                     << "' is a write only image # " << writeOnlyImages << "!\n";
+            }
+            // We store the cbNum along with the image number so that we can
+            // correctly encode the 'info' intrinsics.
+            lookupTable[mMFI->getArgReg(regNum)] =
+              std::make_pair
+              ((cbNum << 16 | writeOnlyImages++), createStrValPair(curArg));
+          } else {
+            assert(!"Read/Write images are not supported!");
+          }
+          ++regNum;
+          cbNum += 2;
+          continue;
+        } else if (c32_type || c64_type) {
+          if (DEBUGME) {
+            dbgs() << "Pointer: '" << curArg->getName()
+                   << "' is a " << (c32_type ? "32" : "64")
+                   << " bit atomic counter type!\n";
+          }
+          counters.push_back(createStrValPair(I));
+        } else if (sema_type) {
+          if (DEBUGME) {
+            dbgs() << "Pointer: '" << curArg->getName()
+                   << "' is a semaphore type!\n";
+          }
+          semaphores.push_back(createStrValPair(I));
+        }
+      }
+
+      if (STM->device()->isSupported(AMDILDeviceInfo::CachedMem)
+          && GV && GV->hasInitializer()) {
+        const ConstantArray *nameArray
+        = dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+        if (nameArray) {
+          for (unsigned x = 0, y = nameArray->getNumOperands(); x < y; ++x) {
+            const GlobalVariable *gV= dyn_cast_or_null<GlobalVariable>(
+                                        nameArray->getOperand(x)->getOperand(0));
+            const ConstantDataArray *argName =
+              dyn_cast_or_null<ConstantDataArray>(gV->getInitializer());
+            if (!argName) {
+              continue;
+            }
+            std::string argStr = argName->getAsString();
+            std::string curStr = curArg->getName().str();
+            if (!strcmp(argStr.data(), curStr.data())) {
+              if (DEBUGME) {
+                dbgs() << "Pointer: '" << curArg->getName()
+                       << "' is cacheable!\n";
+              }
+              cacheablePtrs.insert(createStrValPair(curArg));
+            }
+          }
+        }
+      }
+      uint32_t as = dyn_cast<PointerType>(curType)->getAddressSpace();
+      // Handle the case where the kernel argument is a pointer
+      if (DEBUGME) {
+        dbgs() << "Pointer: " << curArg->getName() << " is assigned ";
+        if (as == AMDILAS::GLOBAL_ADDRESS) {
+          dbgs() << "uav " << STM->device()
+                 ->getResourceID(AMDILDevice::GLOBAL_ID);
+        } else if (as == AMDILAS::PRIVATE_ADDRESS) {
+          dbgs() << "scratch " << STM->device()
+                 ->getResourceID(AMDILDevice::SCRATCH_ID);
+        } else if (as == AMDILAS::LOCAL_ADDRESS) {
+          dbgs() << "lds " << STM->device()
+                 ->getResourceID(AMDILDevice::LDS_ID);
+        } else if (as == AMDILAS::CONSTANT_ADDRESS) {
+          dbgs() << "cb " << STM->device()
+                 ->getResourceID(AMDILDevice::CONSTANT_ID);
+        } else if (as == AMDILAS::REGION_ADDRESS) {
+          dbgs() << "gds " << STM->device()
+                 ->getResourceID(AMDILDevice::GDS_ID);
+        } else {
+          assert(!"Found an address space that we don't support!");
+        }
+        dbgs() << " @ register " << mMFI->getArgReg(regNum) << ". Inst: ";
+        curArg->dump();
+      }
+      switch (as) {
+      default:
+        lookupTable[mMFI->getArgReg(regNum)] = std::make_pair
+                                               (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), createStrValPair(curArg));
+        break;
+      case AMDILAS::LOCAL_ADDRESS:
+        lookupTable[mMFI->getArgReg(regNum)] = std::make_pair
+                                               (STM->device()->getResourceID(AMDILDevice::LDS_ID), createStrValPair(curArg));
+        mMFI->setHasLDSArg();
+        break;
+      case AMDILAS::REGION_ADDRESS:
+        lookupTable[mMFI->getArgReg(regNum)] = std::make_pair
+                                               (STM->device()->getResourceID(AMDILDevice::GDS_ID), createStrValPair(curArg));
+        mMFI->setHasGDSArg();
+        break;
+      case AMDILAS::CONSTANT_ADDRESS:
+        lookupTable[mMFI->getArgReg(regNum)] = std::make_pair
+                                               (STM->device()->getResourceID(AMDILDevice::CONSTANT_ID), createStrValPair(curArg));
+        mMFI->setHasConstantArg();
+        break;
+      case AMDILAS::PRIVATE_ADDRESS:
+        lookupTable[mMFI->getArgReg(regNum)] = std::make_pair
+                                               (STM->device()->getResourceID(AMDILDevice::SCRATCH_ID), createStrValPair(curArg));
+        mMFI->setHasScratchArg();
+        break;
+      }
+      // In this case we need to increment it once.
+      ++regNum;
+      ++cbNum;
+    } else {
+      // Is anything missing that is legal in CL?
+      assert(0 && "Current type is not supported!");
+      lookupTable[mMFI->getArgReg(regNum)] = std::make_pair
+                                             (STM->device()->getResourceID(AMDILDevice::GLOBAL_ID), createStrValPair(curArg));
+      ++regNum;
+      ++cbNum;
+    }
+  }
+  return writeOnlyImages;
+}
+
+// The call stack is interesting in that even in SSA form, it assigns
+// registers to the same value's over and over again. So we need to
+// ignore the values that are assigned and just deal with the input
+// and return registers.
+void
+AMDILPointerManagerImpl::parseCall(
+  MachineBasicBlock::iterator &mBegin,
+  MachineBasicBlock::iterator mEnd)
+{
+  SmallVector<unsigned, 8> inputRegs;
+  AMDILAS::InstrResEnc curRes;
+  if (DEBUGME) {
+    dbgs() << "Parsing Call Stack Start.\n";
+  }
+  MachineBasicBlock::iterator callInst = mBegin;
+  MachineInstr *CallMI = callInst;
+  getAsmPrinterFlags(CallMI, curRes);
+  MachineInstr *MI = --mBegin;
+  unsigned reg = AMDIL::R1;
+  // First we need to check the input registers.
+  do {
+    // We stop if we hit the beginning of the call stack
+    // adjustment.
+    if (MI->getOpcode() == AMDIL::ADJCALLSTACKDOWN
+        || MI->getOpcode() == AMDIL::ADJCALLSTACKUP
+        || MI->getNumOperands() != 2
+        || !MI->getOperand(0).isReg()) {
+      break;
+    }
+    reg = MI->getOperand(0).getReg();
+    if (MI->getOperand(1).isReg()) {
+      unsigned reg1 = MI->getOperand(1).getReg();
+      inputRegs.push_back(reg1);
+      if (lookupTable[reg1].second.second) {
+        curRes.bits.PointerPath = 1;
+      }
+    }
+    lookupTable.erase(reg);
+    if ((signed)reg < 0
+        || mBegin == CallMI->getParent()->begin()) {
+      break;
+    }
+    MI = --mBegin;
+  } while (1);
+  mBegin = callInst;
+  MI = ++mBegin;
+  // If the next registers operand 1 is not a register or that register
+  // is not R1, then we don't have any return values.
+  if (MI->getNumOperands() == 2
+      && MI->getOperand(1).isReg()
+      && (MI->getOperand(1).getReg() == AMDIL::R1
+          || MI->getOperand(1).getReg() == AMDIL::Rx1
+          || MI->getOperand(1).getReg() == AMDIL::Ry1
+          || MI->getOperand(1).getReg() == AMDIL::Rz1
+          || MI->getOperand(1).getReg() == AMDIL::Rw1
+          || MI->getOperand(1).getReg() == AMDIL::Rxy1
+          || MI->getOperand(1).getReg() == AMDIL::Rzw1)) {
+    // Next we check the output register.
+    reg = MI->getOperand(0).getReg();
+    // Now we link the inputs to the output.
+    for (unsigned x = 0; x < inputRegs.size(); ++x) {
+      if (lookupTable[inputRegs[x]].second.second) {
+        curRes.bits.PointerPath = 1;
+        lookupTable[reg] = lookupTable[inputRegs[x]];
+        InstToPtrMap[CallMI].insert(
+          lookupTable[reg].second);
+        break;
+      }
+    }
+    lookupTable.erase(MI->getOperand(1).getReg());
+  }
+  setAsmPrinterFlags(CallMI, curRes);
+  if (DEBUGME) {
+    dbgs() << "Parsing Call Stack End.\n";
+  }
+  return;
+}
+// Detect if the current instruction conflicts with another instruction
+// and add the instruction to the correct location accordingly.
+void
+AMDILPointerManagerImpl::detectConflictInst(
+  MachineInstr *MI,
+  AMDILAS::InstrResEnc &curRes,
+  bool isLoadStore,
+  unsigned reg,
+  unsigned dstReg)
+{
+  // If the instruction does not have a point path flag
+  // associated with it, then we know that no other pointer
+  // hits this instruciton.
+  if (!curRes.bits.PointerPath) {
+    if (dyn_cast<PointerType>(lookupTable[reg].second.second->getType())) {
+      curRes.bits.PointerPath = 1;
+    }
+    // We don't want to transfer to the register number
+    // between load/store because the load dest can be completely
+    // different pointer path and the store doesn't have a real
+    // destination register.
+    if (!isLoadStore) {
+      if (DEBUGME) {
+        if (dyn_cast<PointerType>(lookupTable[reg].second.second->getType())) {
+          dbgs() << "Pointer: " << lookupTable[reg].second.second->getName();
+          assert(dyn_cast<PointerType>(lookupTable[reg].second.second->getType())
+                 && "Must be a pointer type for an instruction!");
+          switch (dyn_cast<PointerType>(
+                    lookupTable[reg].second.second->getType())->getAddressSpace()) {
+          case AMDILAS::GLOBAL_ADDRESS:
+            dbgs() << " UAV: ";
+            break;
+          case AMDILAS::LOCAL_ADDRESS:
+            dbgs() << " LDS: ";
+            break;
+          case AMDILAS::REGION_ADDRESS:
+            dbgs() << " GDS: ";
+            break;
+          case AMDILAS::PRIVATE_ADDRESS:
+            dbgs() << " SCRATCH: ";
+            break;
+          case AMDILAS::CONSTANT_ADDRESS:
+            dbgs() << " CB: ";
+            break;
+
+          }
+          dbgs() << lookupTable[reg].first << " Reg: " << reg
+                 << " assigned to reg " << dstReg << ". Inst: ";
+          MI->dump();
+        }
+      }
+      // We don't want to do any copies if the register is not virtual
+      // as it is the result of a CALL. ParseCallInst handles the
+      // case where the input and output need to be linked up
+      // if it occurs. The easiest way to check for virtual
+      // is to check the top bit.
+      lookupTable[dstReg] = lookupTable[reg];
+    }
+  } else {
+    if (dyn_cast<PointerType>(lookupTable[reg].second.second->getType())) {
+      // Otherwise we have a conflict between two pointers somehow.
+      curRes.bits.ConflictPtr = 1;
+      if (DEBUGME) {
+        dbgs() << "Pointer: " << lookupTable[reg].second.second->getName();
+        assert(dyn_cast<PointerType>(lookupTable[reg].second.second->getType())
+               && "Must be a pointer type for a conflict instruction!");
+        switch (dyn_cast<PointerType>(
+                  lookupTable[reg].second.second->getType())->getAddressSpace()) {
+        case AMDILAS::GLOBAL_ADDRESS:
+          dbgs() << " UAV: ";
+          break;
+        case AMDILAS::LOCAL_ADDRESS:
+          dbgs() << " LDS: ";
+          break;
+        case AMDILAS::REGION_ADDRESS:
+          dbgs() << " GDS: ";
+          break;
+        case AMDILAS::PRIVATE_ADDRESS:
+          dbgs() << " SCRATCH: ";
+          break;
+        case AMDILAS::CONSTANT_ADDRESS:
+          dbgs() << " CB: ";
+          break;
+
+        }
+        dbgs() << lookupTable[reg].first << " Reg: " << reg;
+        if (InstToPtrMap[MI].size() > 1) {
+          dbgs() << " conflicts with:\n ";
+          for (PtrSet::iterator psib = InstToPtrMap[MI].begin(),
+               psie = InstToPtrMap[MI].end(); psib != psie; ++psib) {
+            dbgs() << "\t\tPointer: " << psib->second->getName() << " ";
+            assert(dyn_cast<PointerType>(psib->second->getType())
+                   && "Must be a pointer type for a conflict instruction!");
+            psib->second->dump();
+          }
+        } else {
+          dbgs() << ".";
+        }
+        dbgs() << " Inst: ";
+        MI->dump();
+      }
+    }
+    // Add the conflicting values to the pointer set for the instruction
+    InstToPtrMap[MI].insert(lookupTable[reg].second);
+    // We don't want to add the destination register if
+    // we are a load or store.
+    if (!isLoadStore) {
+      InstToPtrMap[MI].insert(lookupTable[dstReg].second);
+    }
+  }
+  setAsmPrinterFlags(MI, curRes);
+}
+
+// In this case we want to handle a load instruction.
+void
+AMDILPointerManagerImpl::parseLoadInst(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(isLoadInst(TM, MI) && "Only a load instruction can be parsed by "
+         "the parseLoadInst function.");
+  AMDILAS::InstrResEnc curRes;
+  getAsmPrinterFlags(MI, curRes);
+  unsigned dstReg = MI->getOperand(0).getReg();
+  unsigned idx = 0;
+  const Value *basePtr = NULL;
+  if (MI->getOperand(1).isReg()) {
+    idx = MI->getOperand(1).getReg();
+    basePtr = lookupTable[idx].second.second;
+    // If we don't know what value the register
+    // is assigned to, then we need to special case
+    // this instruction.
+  } else if (MI->getOperand(1).isFI()) {
+    if (DEBUGME) {
+      dbgs() << "Found an instruction with a frame index #"
+             << MI->getOperand(1).getIndex() << " with reg "
+             << dstReg << "!\n";
+    }
+    idx = MI->getOperand(1).getIndex();
+    lookupTable[dstReg] = FIToPtrMap[idx];
+  } else if (MI->getOperand(1).isCPI()) {
+    if (DEBUGME) {
+      dbgs() << "Found an instruction with a CPI index #"
+             << MI->getOperand(1).getIndex() << " with reg "
+             << dstReg << "!\n";
+    }
+    cpool.insert(MI);
+  }
+  // If we are a hardware local, then we don't need to track as there
+  // is only one resource ID that we need to know about, so we
+  // map it using allocateDefaultID, which maps it to the default.
+  // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
+  if (isLRPInst(TM, MI, ATM) || !basePtr) {
+    allocateDefaultID(TM, curRes, MI, true);
+    return;
+  }
+  // We have a load instruction so we map this instruction
+  // to the pointer and insert it into the set of known
+  // load instructions.
+  InstToPtrMap[MI].insert(createStrValPair(basePtr));
+  PtrToInstMap[basePtr].push_back(MI);
+
+  if (isGlobalInst(TM, MI)) {
+    // Add to the cacheable set for the block. If there was a store earlier
+    // in the block, this call won't actually add it to the cacheable set.
+    bbCacheable[MI->getParent()].addPossiblyCacheableInst(MI);
+  }
+
+  if (DEBUGME) {
+    dbgs() << "Assigning instruction to pointer ";
+    dbgs() << basePtr->getName() << ". Inst: ";
+    MI->dump();
+  }
+  detectConflictInst(MI, curRes, true, idx, dstReg);
+}
+
+// In this case we want to handle a store instruction.
+void
+AMDILPointerManagerImpl::parseStoreInst(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(isStoreInst(TM, MI) && "Only a store instruction can be parsed by "
+         "the parseStoreInst function.");
+  AMDILAS::InstrResEnc curRes;
+  getAsmPrinterFlags(MI, curRes);
+  unsigned dstReg;
+  if (MI->getOperand(0).isFI()) {
+    dstReg = MI->getOperand(0).getIndex();
+  } else {
+    dstReg = MI->getOperand(0).getReg();
+  }
+
+  // If the data part of the store instruction is known to
+  // be a pointer, then we need to mark this pointer as being
+  // a byte pointer. This is the conservative case that needs
+  // to be handled correctly.
+  if (lookupTable[dstReg].second.second && lookupTable[dstReg].first != ~0U) {
+    curRes.bits.ConflictPtr = 1;
+    if (DEBUGME) {
+      dbgs() << "Found a case where the pointer is being stored!\n";
+      MI->dump();
+      dbgs() << "Pointer is ";
+      lookupTable[dstReg].second.second->print(dbgs());
+      dbgs() << "\n";
+    }
+    if (lookupTable[dstReg].second.second->getType()->isPointerTy()) {
+      conflictPtrs.insert(lookupTable[dstReg].second);
+    }
+  }
+
+  // Before we go through the special cases, for the cacheable information
+  // all we care is if the store if global or not.
+  if (!isLRPInst(TM, MI, ATM)) {
+    bbCacheable[MI->getParent()].setReachesExit();
+  }
+
+  // If the address is not a register address,
+  // then we need to lower it as an unknown id.
+  if (!MI->getOperand(1).isReg()) {
+    if (MI->getOperand(1).isCPI()) {
+      if (DEBUGME) {
+        dbgs() << "Found an instruction with a CPI index #"
+               << MI->getOperand(1).getIndex() << " with ";
+        if (MI->getOperand(0).isReg()) {
+          dbgs() << "reg " << dstReg << "!\n";
+        } else if (MI->getOperand(0).isFI()) {
+          dbgs() << "frameindex " << dstReg << "!\n";
+        }
+      }
+      cpool.insert(MI);
+    } else if (MI->getOperand(1).isFI()) {
+      if (DEBUGME) {
+        dbgs() << "Found an instruction with a frame index #"
+               << MI->getOperand(1).getIndex() << " with reg ";
+        if (MI->getOperand(0).isReg()) {
+          dbgs() << "reg " << dstReg << "!\n";
+        } else if (MI->getOperand(0).isFI()) {
+          dbgs() << "frameindex " << dstReg << "!\n";
+        }
+      }
+      // If we are a frame index and we are storing a pointer there, lets
+      // go ahead and assign the pointer to the location within the frame
+      // index map so that we can get the value out later.
+      RegValPair &tmp = lookupTable[dstReg];
+      if (MI->getOperand(0).isFI()) {
+        tmp = FIToPtrMap[dstReg];
+      }
+      if (!tmp.second.second) {
+        // If we don't have a valid pointer, then renumber the
+        // register from 0 to the VREG/FI that we are
+        // storing the data of.
+        tmp.first = dstReg;
+      }
+      FIToPtrMap[MI->getOperand(1).getIndex()] = tmp;
+    }
+
+    allocateDefaultID(TM, curRes, MI, true);
+    return;
+  }
+  unsigned reg = MI->getOperand(1).getReg();
+  // If we don't know what value the register
+  // is assigned to, then we need to special case
+  // this instruction.
+  if (!lookupTable[reg].second.second) {
+    allocateDefaultID(TM, curRes, MI, true);
+    return;
+  }
+  // If we are a hardware local, then we don't need to track as there
+  // is only one resource ID that we need to know about, so we
+  // map it using allocateDefaultID, which maps it to the default.
+  // This is also the case for REGION_ADDRESS and PRIVATE_ADDRESS.
+  if (isLRPInst(TM, MI, ATM)) {
+    allocateDefaultID(TM, curRes, MI, true);
+    return;
+  }
+
+  // We have a store instruction so we map this instruction
+  // to the pointer and insert it into the set of known
+  // store instructions.
+  InstToPtrMap[MI].insert(lookupTable[reg].second);
+  PtrToInstMap[lookupTable[reg].second.second].push_back(MI);
+  uint16_t RegClass = MI->getDesc().OpInfo[0].RegClass;
+  switch (RegClass) {
+  default:
+    break;
+  case AMDIL::GPRI8RegClassID:
+  case AMDIL::GPRV2I8RegClassID:
+  case AMDIL::GPRI16RegClassID:
+    if (usesGlobal(TM, ATM, MI)) {
+      if (DEBUGME) {
+        dbgs() << "Annotating instruction as Byte Store. Inst: ";
+        MI->dump();
+      }
+      curRes.bits.ByteStore = 1;
+      setAsmPrinterFlags(MI, curRes);
+      const PointerType *PT = dyn_cast<PointerType>(
+                                lookupTable[reg].second.second->getType());
+      if (PT) {
+        bytePtrs.insert(lookupTable[reg].second);
+      }
+    }
+    break;
+  };
+  // If we are a truncating store, then we need to determine the
+  // size of the pointer that we are truncating to, and if we
+  // are less than 32 bits, we need to mark the pointer as a
+  // byte store pointer.
+  switch (MI->getOpcode()) {
+  case AMDIL::GLOBALTRUNCSTORE_i16i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
+  case AMDIL::GLOBALTRUNCSTORE_i32i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
+  case AMDIL::GLOBALTRUNCSTORE_i64i8:
+  case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
+  case AMDIL::GLOBALTRUNCSTORE_i32i16:
+  case AMDIL::GLOBALTRUNCSTORE_i64i16:
+  case AMDIL::GLOBALSTORE_i8:
+  case AMDIL::GLOBALSTORE_i16:
+    curRes.bits.ByteStore = 1;
+    setAsmPrinterFlags(MI, curRes);
+    bytePtrs.insert(lookupTable[reg].second);
+    break;
+  default:
+    break;
+  }
+
+  if (DEBUGME) {
+    dbgs() << "Assigning instruction to pointer ";
+    dbgs() << lookupTable[reg].second.second->getName() << ". Inst: ";
+    MI->dump();
+  }
+  detectConflictInst(MI, curRes, true, reg, dstReg);
+}
+
+// In this case we want to handle an atomic instruction.
+void
+AMDILPointerManagerImpl::parseAtomicInst(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(isAtomicInst(TM, MI) && "Only an atomic instruction can be parsed by "
+         "the parseAtomicInst function.");
+  AMDILAS::InstrResEnc curRes;
+  unsigned dstReg = MI->getOperand(0).getReg();
+  unsigned reg = 0;
+  getAsmPrinterFlags(MI, curRes);
+  unsigned numOps = MI->getNumOperands();
+  bool found = false;
+  while (--numOps) {
+    MachineOperand &Op = MI->getOperand(numOps);
+    if (!Op.isReg()) {
+      continue;
+    }
+    reg = Op.getReg();
+    // If the register is not known to be owned by a pointer
+    // then we can ignore it
+    if (!lookupTable[reg].second.second) {
+      continue;
+    }
+    // if the pointer is known to be local, region or private, then we
+    // can ignore it.  Although there are no private atomics, we still
+    // do this check so we don't have to write a new function to check
+    // for only local and region.
+    if (isLRPInst(TM, MI, ATM)) {
+      continue;
+    }
+    found = true;
+    InstToPtrMap[MI].insert(lookupTable[reg].second);
+    PtrToInstMap[lookupTable[reg].second.second].push_back(MI);
+
+    // We now know we have an atomic operation on global memory.
+    // This is a store so must update the cacheable information.
+    bbCacheable[MI->getParent()].setReachesExit();
+
+    // Only do if have SC with arena atomic bug fix (EPR 326883).
+    if (STM->calVersion() >= CAL_VERSION_SC_150) {
+      // Force pointers that are used by atomics to be in the arena.
+      // If they were allowed to be accessed as RAW they would cause
+      // all access to use the slow complete path.
+      if (DEBUGME) {
+        dbgs() << __LINE__ << ": Setting byte store bit on atomic instruction: ";
+        MI->dump();
+      }
+      curRes.bits.ByteStore = 1;
+      bytePtrs.insert(lookupTable[reg].second);
+    }
+
+    if (DEBUGME) {
+      dbgs() << "Assigning instruction to pointer ";
+      dbgs() << lookupTable[reg].second.second->getName()<< ". Inst: ";
+      MI->dump();
+    }
+    detectConflictInst(MI, curRes, true, reg, dstReg);
+  }
+  if (!found) {
+    allocateDefaultID(TM, curRes, MI, true);
+  }
+}
+// In this case we want to handle a counter instruction.
+void
+AMDILPointerManagerImpl::parseAppendInst(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(isAppendInst(TM, MI) && "Only an atomic counter instruction can be "
+         "parsed by the parseAppendInst function.");
+  AMDILAS::InstrResEnc curRes;
+  unsigned dstReg = MI->getOperand(0).getReg();
+  unsigned reg = MI->getOperand(1).getReg();
+  getAsmPrinterFlags(MI, curRes);
+  // If the register is not known to be owned by a pointer
+  // then we set it to the default
+  if (!lookupTable[reg].second.second) {
+    allocateDefaultID(TM, curRes, MI, true);
+    return;
+  }
+  InstToPtrMap[MI].insert(lookupTable[reg].second);
+  PtrToInstMap[lookupTable[reg].second.second].push_back(MI);
+  if (DEBUGME) {
+    dbgs() << "Assigning instruction to pointer ";
+    dbgs() << lookupTable[reg].second.second->getName() << ". Inst: ";
+    MI->dump();
+  }
+  detectConflictInst(MI, curRes, true, reg, dstReg);
+}
+/// In this case we want to handle a counter instruction.
+void
+AMDILPointerManagerImpl::parseSemaInst(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(isSemaphoreInst(TM, MI) && "Only an semaphore instruction can be "
+         "parsed by the parseSemaInst function.");
+  AMDILAS::InstrResEnc curRes;
+  unsigned dstReg = MI->getOperand(0).getReg();
+  getAsmPrinterFlags(MI, curRes);
+  InstToPtrMap[MI].insert(lookupTable[dstReg].second);
+  PtrToInstMap[lookupTable[dstReg].second.second].push_back(MI);
+  if (DEBUGME) {
+    dbgs() << "Assigning instruction to pointer ";
+    dbgs() << lookupTable[dstReg].second.second->getName() << ". Inst: ";
+    MI->dump();
+  }
+}
+// In this case we want to handle an Image instruction.
+void
+AMDILPointerManagerImpl::parseImageInst(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(isImageInst(TM, MI) && "Only an image instruction can be "
+         "parsed by the parseImageInst function.");
+  AMDILAS::InstrResEnc curRes;
+  getAsmPrinterFlags(MI, curRes);
+  if (isWriteImageInst(TM, MI)) {
+    unsigned dstReg = MI->getOperand(0).getReg();
+    curRes.bits.ResourceID = lookupTable[dstReg].first & 0xFFFF;
+    curRes.bits.isImage = 1;
+    InstToPtrMap[MI].insert(lookupTable[dstReg].second);
+    PtrToInstMap[lookupTable[dstReg].second.second].push_back(MI);
+    if (DEBUGME) {
+      dbgs() << "Assigning instruction to image ";
+      dbgs() << lookupTable[dstReg].second.second->getName() << ". Inst: ";
+      MI->dump();
+    }
+  } else {
+    unsigned reg = MI->getOperand(1).getReg();
+
+    // If the register is not known to be owned by a pointer
+    // then we set it to the default
+    if (!lookupTable[reg].second.second) {
+      assert(!"This should not happen for images!");
+      allocateDefaultID(TM, curRes, MI, true);
+      return;
+    }
+    InstToPtrMap[MI].insert(lookupTable[reg].second);
+    PtrToInstMap[lookupTable[reg].second.second].push_back(MI);
+    if (DEBUGME) {
+      dbgs() << "Assigning instruction to image ";
+      dbgs() << lookupTable[reg].second.second->getName() << ". Inst: ";
+      MI->dump();
+    }
+    if (isImageTXLDInst(TM, MI)) {
+      curRes.bits.ResourceID = lookupTable[reg].first & 0xFFFF;
+    } else if (isReadImageInst(TM, MI)) {
+      curRes.bits.ResourceID = lookupTable[reg].first & 0xFFFF;
+      if (MI->getOperand(SAMPLER_INDEX).isReg()) {
+        // Our sampler is not a literal value.
+        char buffer[256];
+        memset(buffer, 0, sizeof(buffer));
+        std::string sampler_name = "";
+        unsigned reg = MI->getOperand(SAMPLER_INDEX).getReg();
+        if (lookupTable[reg].second.second) {
+          sampler_name = lookupTable[reg].second.second->getName();
+        }
+        uint32_t val = ~0U;
+        if (sampler_name.empty()) {
+          sampler_name = findSamplerName(TM, MI, val);
+        }
+        val = mMFI->addSampler(sampler_name, val);
+        if (DEBUGME) {
+          dbgs() << "Mapping kernel sampler " << sampler_name
+                 << " to sampler number " << val << " for Inst:\n";
+          MI->dump();
+        }
+        MI->getOperand(SAMPLER_INDEX).ChangeToImmediate(val);
+      } else {
+        // Our sampler is known at runtime as a literal, lets make sure
+        // that the metadata for it is known.
+        char buffer[256];
+        memset(buffer, 0, sizeof(buffer));
+        ::sprintf(buffer,"_%d", (int32_t)MI->getOperand(SAMPLER_INDEX).getImm());
+        std::string sampler_name = std::string("unknown") + std::string(buffer);
+        uint32_t val = mMFI->addSampler(sampler_name, MI->getOperand(SAMPLER_INDEX).getImm());
+        if (DEBUGME) {
+          dbgs() << "Mapping internal sampler " << sampler_name
+                 << " to sampler number " << val << " for Inst:\n";
+          MI->dump();
+        }
+        MI->getOperand(SAMPLER_INDEX).setImm(val);
+      }
+    } else if (isImageInfo0Inst(TM, MI)) {
+      curRes.bits.ResourceID = lookupTable[reg].first >> 16;
+    } else if (isImageInfo1Inst(TM, MI)) {
+      curRes.bits.ResourceID = (lookupTable[reg].first >> 16) + 1;
+    }
+    curRes.bits.isImage = 1;
+  }
+  setAsmPrinterFlags(MI, curRes);
+}
+
+// This case handles the rest of the instructions
+void
+AMDILPointerManagerImpl::parseInstruction(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(!isAtomicInst(TM, MI) && !isStoreInst(TM, MI) && !isLoadInst(TM, MI) &&
+         !isAppendInst(TM, MI) && !isImageInst(TM, MI) &&
+         "Atomic/Load/Store/Append/Image insts should not be handled here!");
+  unsigned numOps = MI->getNumOperands();
+  // If we don't have any operands, we can skip this instruction
+  if (!numOps) {
+    return;
+  }
+  // if the dst operand is not a register, then we can skip
+  // this instruction. That is because we are probably a branch
+  // or jump instruction.
+  if (!MI->getOperand(0).isReg()) {
+    return;
+  }
+  // If we are a LOADCONST_i32, we might be a sampler, so we need
+  // to propogate the LOADCONST to IMAGE[1|2|3]D[A|B][64]_READ instructions.
+  if (MI->getOpcode() == AMDIL::LOADCONST_i32) {
+    uint32_t val = MI->getOperand(1).getImm();
+
+    for(MachineRegisterInfo::reg_iterator
+        RI = MF.getRegInfo().reg_begin(MI->getOperand(0).getReg()),
+        RE = MF.getRegInfo().reg_end();
+        RI != RE; ++RI) {
+      if (isReadImageInst(TM, RI.getOperand().getParent())) {
+        if (DEBUGME) {
+          dbgs() << "Found a constant sampler for image read inst: ";
+          RI.getOperand().print(dbgs());
+        }
+        RI.getOperand().ChangeToImmediate(val);
+      }
+    }
+  }
+
+  AMDILAS::InstrResEnc curRes;
+  getAsmPrinterFlags(MI, curRes);
+  unsigned dstReg = MI->getOperand(0).getReg();
+  unsigned reg = 0;
+  while (--numOps) {
+    MachineOperand &Op = MI->getOperand(numOps);
+    // if the operand is not a register, then we can ignore it
+    if (!Op.isReg()) {
+      if (Op.isCPI()) {
+        cpool.insert(MI);
+      }
+      continue;
+    }
+    reg = Op.getReg();
+    // If the register is not known to be owned by a pointer
+    // then we can ignore it
+    if (!lookupTable[reg].second.second) {
+      continue;
+    }
+    detectConflictInst(MI, curRes, false, reg, dstReg);
+
+  }
+}
+
+// This function parses the basic block and based on the instruction type,
+// calls the function to finish parsing the instruction.
+void
+AMDILPointerManagerImpl::parseBasicBlock(TargetMachine &TM, MachineBasicBlock *MB)
+{
+  for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end();
+       mbb != mbe; ++mbb) {
+    MachineInstr *MI = mbb;
+    if (MI->getOpcode() == AMDIL::CALL) {
+      parseCall(mbb, mbe);
+    } else if (isLoadInst(TM, MI)) {
+      parseLoadInst(TM, MI);
+    } else if (isStoreInst(TM, MI)) {
+      parseStoreInst(TM, MI);
+    } else if (isAtomicInst(TM, MI)) {
+      parseAtomicInst(TM, MI);
+    } else if (isAppendInst(TM, MI)) {
+      parseAppendInst(TM, MI);
+    } else if (isSemaphoreInst(TM, MI)) {
+      parseSemaInst(TM, MI);
+    } else if (isImageInst(TM, MI)) {
+      parseImageInst(TM, MI);
+    } else {
+      parseInstruction(TM, MI);
+    }
+  }
+}
+
+// Follows the Reverse Post Order Traversal of the basic blocks to
+// determine which order to parse basic blocks in.
+void
+AMDILPointerManagerImpl::parseFunction(TargetMachine &TM)
+{
+  std::list<MachineBasicBlock*> prop_worklist;
+
+  ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+  for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+       curBlock = RPOT.begin(), endBlock = RPOT.end();
+       curBlock != endBlock; ++curBlock) {
+    MachineBasicBlock *MB = (*curBlock);
+    BlockCacheableInfo &bci = bbCacheable[MB];
+    for (MachineBasicBlock::pred_iterator mbbit = MB->pred_begin(),
+         mbbitend = MB->pred_end();
+         mbbit != mbbitend;
+         mbbit++) {
+      MBBCacheableMap::const_iterator mbbcmit = bbCacheable.find(*mbbit);
+      if (mbbcmit != bbCacheable.end() &&
+          mbbcmit->second.storeReachesExit()) {
+        bci.setReachesTop();
+        break;
+      }
+    }
+
+    if (DEBUGME) {
+      dbgs() << "[BlockOrdering] Parsing CurrentBlock: "
+             << MB->getNumber() << "\n";
+    }
+    parseBasicBlock(TM, MB);
+
+    if (bci.storeReachesExit())
+      prop_worklist.push_back(MB);
+
+    if (DEBUGME) {
+      dbgs() << "BCI info: Top: " << bci.storeReachesTop() << " Exit: "
+             << bci.storeReachesExit() << "\n Instructions:\n";
+      for (CacheableInstrSet::const_iterator cibit = bci.cacheableBegin(),
+           cibitend = bci.cacheableEnd();
+           cibit != cibitend;
+           cibit++) {
+        (*cibit)->dump();
+      }
+    }
+  }
+
+  // This loop pushes any "storeReachesExit" flags into successor
+  // blocks until the flags have been fully propagated. This will
+  // ensure that blocks that have reachable stores due to loops
+  // are labeled appropriately.
+  while (!prop_worklist.empty()) {
+    MachineBasicBlock *wlb = prop_worklist.front();
+    prop_worklist.pop_front();
+    for (MachineBasicBlock::succ_iterator mbbit = wlb->succ_begin(),
+         mbbitend = wlb->succ_end();
+         mbbit != mbbitend;
+         mbbit++) {
+      BlockCacheableInfo &blockCache = bbCacheable[*mbbit];
+      if (!blockCache.storeReachesTop()) {
+        blockCache.setReachesTop();
+        prop_worklist.push_back(*mbbit);
+      }
+      if (DEBUGME) {
+        dbgs() << "BCI Prop info: " << (*mbbit)->getNumber() << " Top: "
+               << blockCache.storeReachesTop() << " Exit: "
+               << blockCache.storeReachesExit()
+               << "\n";
+      }
+    }
+  }
+}
+
+// Helper function that dumps to dbgs() information about
+// a pointer set.
+void
+AMDILPointerManagerImpl::dumpPointers(AppendSet &Ptrs, const char *str)
+{
+  if (Ptrs.empty()) {
+    return;
+  }
+  dbgs() << "[Dump]" << str << " found: " << "\n";
+  for (AppendSet::iterator sb = Ptrs.begin();
+       sb != Ptrs.end(); ++sb) {
+    sb->second->dump();
+  }
+  dbgs() << "\n";
+}
+// Helper function that dumps to dbgs() information about
+// a pointer set.
+void
+AMDILPointerManagerImpl::dumpPointers(PtrSet &Ptrs, const char *str)
+{
+  if (Ptrs.empty()) {
+    return;
+  }
+  dbgs() << "[Dump]" << str << " found: " << "\n";
+  for (PtrSet::iterator sb = Ptrs.begin();
+       sb != Ptrs.end(); ++sb) {
+    sb->second->dump();
+  }
+  dbgs() << "\n";
+}
+// Function that detects all the conflicting pointers and adds
+// the pointers that are detected to the conflict set, otherwise
+// they are added to the raw or byte set based on their usage.
+void
+AMDILPointerManagerImpl::detectConflictingPointers(TargetMachine &TM)
+{
+  if (InstToPtrMap.empty()) {
+    return;
+  }
+  // find all pointer that belong to bytePtrs
+  std::set<const MachineInstr*> byteInsts;
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    for (InstPMap::iterator
+         mapIter = InstToPtrMap.begin(), iterEnd = InstToPtrMap.end();
+         mapIter != iterEnd; ++mapIter) {
+      MachineInstr* MI = mapIter->first;
+      if (byteInsts.count(MI)) {
+        // already detected as byte-inst
+        continue;
+      }
+      if (isLRPInst(TM, MI, ATM)) {
+        // We don't need to deal with pointers to local/region/private
+        // memory regions
+        continue;
+      }
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(MI, curRes);
+      if (curRes.bits.isImage) {
+        continue;
+      }
+      bool byte = false;
+      // We might have a case where more than 1 pointers is going to the same
+      // I/O instruction
+      for (PtrSet::iterator cfIter = mapIter->second.begin(),
+           cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
+        const Value *ptr = cfIter->second;
+        const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
+        if (PT == NULL) {
+          continue;
+        }
+        if (bytePtrs.count(*cfIter)) {
+          if (DEBUGME) {
+            dbgs() << "Instruction: ";
+            (mapIter)->first->dump();
+            dbgs() << "Base Pointer[s]:\n";
+            cfIter->second->dump();
+            dbgs() << "Byte pointer found!\n";
+          }
+          byte = true;
+          break;
+        }
+      }
+      if (byte) {
+        byteInsts.insert(MI);
+        for (PtrSet::iterator cfIter = mapIter->second.begin(),
+             cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
+          const Value *ptr = cfIter->second;
+          const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
+          if (PT && !bytePtrs.count(*cfIter)) {
+            if (DEBUGME) {
+              dbgs() << "Adding pointer " << (ptr)->getName()
+                     << " to byte set!\n";
+            }
+            bytePtrs.insert(createStrValPair(ptr));
+            changed = true;
+          }
+        }
+      }
+    }
+  }
+  PtrSet aliasedPtrs;
+  for (InstPMap::iterator
+       mapIter = InstToPtrMap.begin(), iterEnd = InstToPtrMap.end();
+       mapIter != iterEnd; ++mapIter) {
+    if (DEBUGME) {
+      dbgs() << "Instruction: ";
+      (mapIter)->first->dump();
+    }
+    MachineInstr* MI = mapIter->first;
+    AMDILAS::InstrResEnc curRes;
+    getAsmPrinterFlags(MI, curRes);
+    if (curRes.bits.isImage) {
+      continue;
+    }
+    bool byte = byteInsts.count(MI);
+    if (!byte) {
+      // We might have a case where more than 1 pointers is going to the same
+      // I/O instruction
+      if (DEBUGME) {
+        dbgs() << "Base Pointer[s]:\n";
+      }
+      for (PtrSet::iterator cfIter = mapIter->second.begin(),
+           cfEnd = mapIter->second.end(); cfIter != cfEnd; ++cfIter) {
+        const Value *ptr = cfIter->second;
+        if (DEBUGME) {
+          cfIter->second->dump();
+        }
+        // bool aliased = false;
+        if (isLRPInst(TM, mapIter->first, ATM)) {
+          // We don't need to deal with pointers to local/region/private
+          // memory regions
+          continue;
+        }
+        const Argument *arg = dyn_cast_or_null<Argument>(cfIter->second);
+        if (!arg) {
+          continue;
+        }
+        if (!STM->device()->isSupported(AMDILDeviceInfo::NoAlias)
+            && !arg->hasNoAliasAttr()) {
+          if (DEBUGME) {
+            dbgs() << "Possible aliased pointer found!\n";
+          }
+          aliasedPtrs.insert(createStrValPair(ptr));
+        }
+        if (mapIter->second.size() > 1) {
+          const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
+          if (PT) {
+            if (DEBUGME) {
+              dbgs() << "Adding pointer " << ptr->getName()
+                     << " to conflict set!\n";
+            }
+            conflictPtrs.insert(createStrValPair(ptr));
+          }
+        }
+        const PointerType *PT = dyn_cast<PointerType>(ptr->getType());
+        if (PT) {
+          if (DEBUGME) {
+            dbgs() << "Adding pointer " << ptr->getName()
+                   << " to raw set!\n";
+          }
+          rawPtrs.insert(createStrValPair(ptr));
+        }
+      }
+    }
+    if (DEBUGME) {
+      dbgs() << "\n";
+    }
+  }
+  // If we have any aliased pointers and byte pointers exist,
+  // then make sure that all of the aliased pointers are
+  // part of the byte pointer set.
+  if (!bytePtrs.empty()) {
+    for (PtrSet::iterator aIter = aliasedPtrs.begin(),
+         aEnd = aliasedPtrs.end(); aIter != aEnd; ++aIter) {
+      if (DEBUGME) {
+        dbgs() << "Moving " << aIter->second->getName()
+               << " from raw to byte.\n";
+      }
+      bytePtrs.insert(*aIter);
+      rawPtrs.erase(*aIter);
+    }
+  }
+}
+// Function that detects aliased constant pool operations.
+void
+AMDILPointerManagerImpl::detectAliasedCPoolOps()
+{
+  if (DEBUGME && !cpool.empty()) {
+    dbgs() << "Instructions w/ CPool Ops: \n";
+  }
+  // The algorithm for detecting aliased cpool is as follows.
+  // For each instruction that has a cpool argument
+  // follow def-use chain
+  //   if instruction is a load and load is a private load,
+  //      switch to constant pool load
+  for (CPoolSet::iterator cpb = cpool.begin(), cpe = cpool.end();
+       cpb != cpe; ++cpb) {
+    if (DEBUGME) {
+      (*cpb)->dump();
+    }
+    std::queue<MachineInstr*> queue;
+    std::set<MachineInstr*> visited;
+    queue.push(*cpb);
+    MachineInstr *cur;
+    while (!queue.empty()) {
+      cur = queue.front();
+      queue.pop();
+      if (visited.count(cur)) {
+        continue;
+      }
+      if (isLoadInst(TM, cur) && isPrivateInst(TM, cur)) {
+        // If we are a private load and the register is
+        // used in the address register, we need to
+        // switch from private to constant pool load.
+        if (DEBUGME) {
+          dbgs() << "Found an instruction that is a private load "
+                 << "but should be a constant pool load.\n";
+          cur->print(dbgs());
+          dbgs() << "\n";
+        }
+        AMDILAS::InstrResEnc curRes;
+        getAsmPrinterFlags(cur, curRes);
+        curRes.bits.ResourceID = STM->device()->getResourceID(AMDILDevice::GLOBAL_ID);
+        curRes.bits.ConflictPtr = 1;
+        setAsmPrinterFlags(cur, curRes);
+        cur->setDesc(TM.getInstrInfo()->get(
+                       (cur->getOpcode() - AMDIL::PRIVATEAEXTLOAD_f32)
+                       + AMDIL::CPOOLAEXTLOAD_f32));
+      } else {
+        if (cur->getOperand(0).isReg()) {
+          for(MachineRegisterInfo::reg_iterator
+              RI = MF.getRegInfo().reg_begin(cur->getOperand(0).getReg()),
+              RE = MF.getRegInfo().reg_end();
+              RI != RE && RI.getOperand().isDef() && RI.getOperand().isReg(); ++RI) {
+            queue.push(RI.getOperand().getParent());
+          }
+        }
+      }
+      visited.insert(cur);
+    }
+  }
+}
+// Function that detects fully cacheable pointers. Fully cacheable pointers
+// are pointers that have no writes to them and no-alias is specified.
+void
+AMDILPointerManagerImpl::detectFullyCacheablePointers(TargetMachine &TM)
+{
+  if (PtrToInstMap.empty()) {
+    return;
+  }
+  // 4XXX hardware doesn't support cached uav opcodes and we assume
+  // no aliasing for this to work. Also in debug mode we don't do
+  // any caching.
+  if (STM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX
+      || !STM->device()->isSupported(AMDILDeviceInfo::CachedMem)) {
+    return;
+  }
+  if (STM->device()->isSupported(AMDILDeviceInfo::NoAlias)) {
+    for (PtrIMap::iterator mapIter = PtrToInstMap.begin(),
+         iterEnd = PtrToInstMap.end(); mapIter != iterEnd; ++mapIter) {
+      if (DEBUGME) {
+        dbgs() << "Instruction: ";
+        mapIter->first->dump();
+      }
+      // Skip the pointer if we have already detected it.
+      if (cacheablePtrs.count(createStrValPair(mapIter->first))) {
+        continue;
+      }
+      bool cacheable = true;
+      for (std::vector<MachineInstr*>::iterator
+           miBegin = mapIter->second.begin(),
+           miEnd = mapIter->second.end(); miBegin != miEnd; ++miBegin) {
+        if (isStoreInst(TM, *miBegin)  ||
+            isImageInst(TM, *miBegin)  ||
+            isAtomicInst(TM, *miBegin) ||
+            isAppendInst(TM, *miBegin) ||
+            isSemaphoreInst(TM, *miBegin)) {
+          cacheable = false;
+          break;
+        }
+      }
+      // we aren't cacheable, so lets move on to the next instruction
+      if (!cacheable) {
+        continue;
+      }
+      // If we are in the conflict set, lets move to the next instruction
+      // FIXME: we need to check to see if the pointers that conflict with
+      // the current pointer are also cacheable. If they are, then add them
+      // to the cacheable list and not fail.
+      if (conflictPtrs.count(createStrValPair(mapIter->first))) {
+        continue;
+      }
+      // Otherwise if we have no stores and no conflicting pointers, we can
+      // be added to the cacheable set.
+      if (DEBUGME) {
+        dbgs() << "Adding pointer " << mapIter->first->getName();
+        dbgs() << " to cached set!\n";
+      }
+      const PointerType *PT = dyn_cast<PointerType>(mapIter->first->getType());
+      if (PT) {
+        cacheablePtrs.insert(createStrValPair(mapIter->first));
+      }
+    }
+  }
+}
+
+// Are any of the pointers in PtrSet also in the BytePtrs or the CachePtrs?
+bool
+AMDILPointerManagerImpl::ptrSetIntersectsByteOrCache(PtrSet &cacheSet)
+{
+  for (PtrSet::const_iterator psit = cacheSet.begin(),
+       psitend = cacheSet.end();
+       psit != psitend;
+       psit++) {
+    if (bytePtrs.find(*psit) != bytePtrs.end() ||
+        cacheablePtrs.find(*psit) != cacheablePtrs.end()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Function that detects which instructions are cacheable even if
+// all instructions of the pointer are not cacheable. The resulting
+// set of instructions will not contain Ptrs that are in the cacheable
+// ptr set (under the assumption they will get marked cacheable already)
+// or pointers in the byte set, since they are not cacheable.
+void
+AMDILPointerManagerImpl::detectCacheableInstrs()
+
+{
+  for (MBBCacheableMap::const_iterator mbbcit = bbCacheable.begin(),
+       mbbcitend = bbCacheable.end();
+       mbbcit != mbbcitend;
+       mbbcit++) {
+    for (CacheableInstrSet::const_iterator bciit
+         = mbbcit->second.cacheableBegin(),
+         bciitend
+         = mbbcit->second.cacheableEnd();
+         bciit != bciitend;
+         bciit++) {
+      if (!ptrSetIntersectsByteOrCache(InstToPtrMap[*bciit])) {
+        cacheableSet.insert(*bciit);
+      }
+    }
+  }
+}
+// This function annotates the cacheable pointers with the
+// CacheableRead bit. The cacheable read bit is set
+// when the number of write images is not equal to the max
+// or if the default RAW_UAV_ID is equal to 11. The first
+// condition means that there is a raw uav between 0 and 7
+// that is available for cacheable reads and the second
+// condition means that UAV 11 is available for cacheable
+// reads.
+void
+AMDILPointerManagerImpl::annotateCacheablePtrs()
+{
+  PtrSet::iterator siBegin, siEnd;
+  std::vector<MachineInstr*>::iterator miBegin, miEnd;
+  // First we can check the cacheable pointers
+  for (siBegin = cacheablePtrs.begin(), siEnd = cacheablePtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    assert(!bytePtrs.count(*siBegin) && "Found a cacheable pointer "
+           "that also exists as a byte pointer!");
+    // If we have any kind of conflict, don't add it as cacheable.
+    if (conflictPtrs.count(*siBegin)) {
+      continue;
+    }
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      if (DEBUGME) {
+        dbgs() << "Annotating pointer as cacheable. Inst: ";
+        (*miBegin)->dump();
+      }
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+      assert(!curRes.bits.ByteStore && "No cacheable pointers should have the "
+             "byte Store flag set!");
+      // If UAV11 is enabled, then we can enable cached reads.
+      if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
+        curRes.bits.CacheableRead = 1;
+        curRes.bits.ResourceID = 11;
+        setAsmPrinterFlags(*miBegin, curRes);
+        mMFI->uav_insert(curRes.bits.ResourceID);
+      }
+    }
+  }
+}
+static unsigned switchAtomicToArena(unsigned op)
+{
+#define ATOM_CASE(OP) \
+  case AMDIL::ATOM_G_##OP: return AMDIL::ATOM_A_##OP; \
+  case AMDIL::ATOM_G_##OP##_NORET: return AMDIL::ATOM_A_##OP##_NORET;
+  switch (op) {
+  default:
+    break;
+    ATOM_CASE(ADD);
+    ATOM_CASE(AND);
+    ATOM_CASE(CMPXCHG);
+    ATOM_CASE(DEC);
+    ATOM_CASE(INC);
+    ATOM_CASE(MAX);
+    ATOM_CASE(MIN);
+    ATOM_CASE(OR);
+    ATOM_CASE(RSUB);
+    ATOM_CASE(SUB);
+    ATOM_CASE(UMAX);
+    ATOM_CASE(UMIN);
+    ATOM_CASE(XOR);
+  case AMDIL::ATOM_G_XCHG:
+    return AMDIL::ATOM_A_XCHG;
+  }
+  assert(!"Unknown atomic opcode found!");
+  return 0;
+}
+// A byte pointer is a pointer that along the pointer path has a
+// byte store assigned to it.
+void
+AMDILPointerManagerImpl::annotateBytePtrs()
+{
+  PtrSet::iterator siBegin, siEnd;
+  std::vector<MachineInstr*>::iterator miBegin, miEnd;
+  uint32_t arenaID = STM->device()
+                     ->getResourceID(AMDILDevice::ARENA_UAV_ID);
+  if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+    arenaID = ARENA_SEGMENT_RESERVED_UAVS + 1;
+  }
+  for (siBegin = bytePtrs.begin(), siEnd = bytePtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    const Value* val = siBegin->second;
+    const PointerType *PT = dyn_cast<PointerType>(val->getType());
+    if (!PT) {
+      continue;
+    }
+    const Argument *curArg = dyn_cast<Argument>(val);
+    assert(!rawPtrs.count(*siBegin) && "Found a byte pointer "
+           "that also exists as a raw pointer!");
+    bool arenaInc = false;
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      if (DEBUGME) {
+        dbgs() << "Annotating pointer as arena. Inst: ";
+        (*miBegin)->dump();
+      }
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+      if (!mMFI) {
+        mMFI = (*miBegin)->getParent()->getParent()
+               ->getInfo<AMDILMachineFunctionInfo>();
+      }
+
+      if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
+          && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+        // If hardware constant mem is enabled, then we need to
+        // get the constant pointer CB number and use that to specify
+        // the resource ID.
+        const StringRef funcName = MF.getFunction()->getName();
+        if (mAMI->isKernel(funcName)) {
+          const AMDILKernel *krnl = mAMI->getKernel(funcName);
+          curRes.bits.ResourceID = mAMI->getConstPtrCB(krnl,
+                                   siBegin->second->getName());
+          curRes.bits.HardwareInst = 1;
+        } else {
+          curRes.bits.ResourceID = STM->device()
+                                   ->getResourceID(AMDILDevice::CONSTANT_ID);
+        }
+        mMFI->setUsesConstant();
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+                 && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+        // If hardware local mem is enabled, get the local mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::LDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+        mMFI->setUsesLDS();
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
+                 && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+        // If hardware region mem is enabled, get the gds mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::GDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+        mMFI->setUsesGDS();
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
+                 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::SCRATCH_ID);
+        mMFI->setUsesScratch();
+      } else {
+        if (DEBUGME) {
+          dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+          (*miBegin)->print(dbgs());
+        }
+        curRes.bits.ByteStore = 1;
+        curRes.bits.ResourceID = (curArg
+                                  && (STM->device()->isSupported(AMDILDeviceInfo::NoAlias)
+                                      || curArg->hasNoAliasAttr())) ?
+                                 arenaID : STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID);
+        if (STM->device()->isSupported(AMDILDeviceInfo::ArenaSegment)) {
+          arenaInc = true;
+        }
+        if (isAtomicInst(TM, *miBegin) &&
+            STM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+          // If we are an arena instruction, we need to switch the atomic opcode
+          // from the global version to the arena version.
+          MachineInstr *MI = *miBegin;
+          MI->setDesc(TM.getInstrInfo()->get(
+                        switchAtomicToArena(MI->getOpcode())));
+        }
+        if (DEBUGME) {
+          dbgs() << "Annotating pointer as arena. Inst: ";
+          (*miBegin)->dump();
+        }
+      }
+      setAsmPrinterFlags(*miBegin, curRes);
+      KM->setUAVID(siBegin->second, curRes.bits.ResourceID);
+      mMFI->uav_insert(curRes.bits.ResourceID);
+    }
+    if (arenaInc) {
+      ++arenaID;
+    }
+  }
+}
+// A semaphore pointer is a opaque object that has semaphore instructions
+// in its path.
+void
+AMDILPointerManagerImpl::annotateSemaPtrs()
+{
+  unsigned currentSemaphore = 1;
+  for (SemaSet::iterator asBegin = semaphores.begin(),
+       asEnd = semaphores.end(); asBegin != asEnd; ++asBegin) {
+    const Value* curVal = asBegin->second;
+    if (DEBUGME) {
+      dbgs() << "Semaphore: " << curVal->getName()
+             << " assigned the counter " << currentSemaphore << "\n";
+    }
+    for (std::vector<MachineInstr*>::iterator
+         miBegin = PtrToInstMap[curVal].begin(),
+         miEnd = PtrToInstMap[curVal].end(); miBegin != miEnd; ++miBegin) {
+      MachineInstr *MI = *miBegin;
+      unsigned opcode = MI->getOpcode();
+      switch (opcode) {
+      default:
+        if (DEBUGME) {
+          dbgs() << "Skipping instruction: ";
+          MI->dump();
+        }
+        break;
+      case AMDIL::SEMAPHORE_INIT: {
+        MachineRegisterInfo &regInfo = MI->getParent()->getParent()->getRegInfo();
+        MachineOperand &init_value = MI->getOperand(2);
+        MachineOperand& defOp = regInfo.reg_begin(init_value.getReg()).getOperand();
+        MachineInstr *defMI = defOp.getParent();
+        if (!defOp.isReg()
+            || defMI->getOpcode() != AMDIL::LOADCONST_i32
+            || MI->getNumOperands() != 3) {
+          mMFI->addErrorMsg(
+            amd::CompilerErrorMessage[INVALID_INIT_VALUE]);
+        } else {
+          MI->getOperand(2).ChangeToImmediate(defMI->getOperand(1).getImm());
+        }
+      }
+      case AMDIL::SEMAPHORE_WAIT:
+      case AMDIL::SEMAPHORE_SIGNAL:
+        MI->getOperand(0).ChangeToImmediate(currentSemaphore);
+        mMFI->sema_insert(currentSemaphore);
+        if (DEBUGME) {
+          dbgs() << "Assigning semaphore " << currentSemaphore << " to Inst: ";
+          MI->dump();
+        }
+        break;
+      };
+    }
+    if (currentSemaphore >= OPENCL_MAX_NUM_SEMAPHORES) {
+      mMFI->addErrorMsg(
+        amd::CompilerErrorMessage[INSUFFICIENT_SEMAPHORE_RESOURCES]);
+    }
+    ++currentSemaphore;
+  }
+}
+/// An append pointer is a opaque object that has append instructions
+// in its path.
+void
+AMDILPointerManagerImpl::annotateAppendPtrs()
+{
+  unsigned currentCounter = 0;
+  for (AppendSet::iterator asBegin = counters.begin(),
+       asEnd = counters.end(); asBegin != asEnd; ++asBegin) {
+    bool usesWrite = false;
+    bool usesRead = false;
+    const Value* curVal = asBegin->second;
+    if (DEBUGME) {
+      dbgs() << "Counter: " << curVal->getName()
+             << " assigned the counter " << currentCounter << "\n";
+    }
+    for (std::vector<MachineInstr*>::iterator
+         miBegin = PtrToInstMap[curVal].begin(),
+         miEnd = PtrToInstMap[curVal].end(); miBegin != miEnd; ++miBegin) {
+      MachineInstr *MI = *miBegin;
+      unsigned opcode = MI->getOpcode();
+      switch (opcode) {
+      default:
+        if (DEBUGME) {
+          dbgs() << "Skipping instruction: ";
+          MI->dump();
+        }
+        break;
+      case AMDIL::APPEND_ALLOC:
+      case AMDIL::APPEND64_ALLOC:
+        usesWrite = true;
+        MI->getOperand(1).ChangeToImmediate(currentCounter);
+        if (DEBUGME) {
+          dbgs() << "Assigning counter " << currentCounter << " to Inst: ";
+          MI->dump();
+        }
+        break;
+      case AMDIL::APPEND_CONSUME:
+      case AMDIL::APPEND64_CONSUME:
+        usesRead = true;
+        MI->getOperand(1).ChangeToImmediate(currentCounter);
+        if (DEBUGME) {
+          dbgs() << "Assigning counter " << currentCounter << " to Inst: ";
+          MI->dump();
+        }
+        break;
+      };
+    }
+    if (usesWrite && usesRead) {
+      mMFI->addErrorMsg(amd::CompilerErrorMessage[INCORRECT_COUNTER_USAGE]);
+    }
+    ++currentCounter;
+  }
+}
+// A raw pointer is any pointer that does not have byte store in its path.
+void
+AMDILPointerManagerImpl::annotateRawPtrs()
+{
+  PtrSet::iterator siBegin, siEnd;
+  std::vector<MachineInstr*>::iterator miBegin, miEnd;
+
+  // Now all of the raw pointers will go to the raw uav.
+  for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    const PointerType *PT = dyn_cast<PointerType>(siBegin->second->getType());
+    if (!PT) {
+      continue;
+    }
+    assert(!bytePtrs.count(*siBegin) && "Found a raw pointer "
+           " that also exists as a byte pointers!");
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      if (DEBUGME) {
+        dbgs() << "Annotating pointer as raw. Inst: ";
+        (*miBegin)->dump();
+      }
+      if (!mMFI) {
+        mMFI = (*miBegin)->getParent()->getParent()
+               ->getInfo<AMDILMachineFunctionInfo>();
+      }
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+      if (!curRes.bits.ConflictPtr) {
+        assert(!curRes.bits.ByteStore
+               && "Found a instruction that is marked as "
+               "raw but has a byte store bit set!");
+      } else if (curRes.bits.ConflictPtr) {
+        if (curRes.bits.ByteStore) {
+          curRes.bits.ByteStore = 0;
+        }
+      }
+      if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
+          && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+        // If hardware constant mem is enabled, then we need to
+        // get the constant pointer CB number and use that to specify
+        // the resource ID.
+        const StringRef funcName = (*miBegin)->getParent()->getParent()
+                                   ->getFunction()->getName();
+        if (mAMI->isKernel(funcName)) {
+          const AMDILKernel *krnl = mAMI->getKernel(funcName);
+          curRes.bits.ResourceID = mAMI->getConstPtrCB(krnl,
+                                   siBegin->second->getName());
+          curRes.bits.HardwareInst = 1;
+        } else {
+          curRes.bits.ResourceID = STM->device()
+                                   ->getResourceID(AMDILDevice::CONSTANT_ID);
+        }
+        mMFI->setUsesConstant();
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+                 && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+        // If hardware local mem is enabled, get the local mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::LDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+        mMFI->setUsesLDS();
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
+                 && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+        // If hardware region mem is enabled, get the gds mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::GDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+        mMFI->setUsesGDS();
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
+                 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::SCRATCH_ID);
+        mMFI->setUsesScratch();
+      } else if (!STM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+        // If multi uav is enabled, then the resource ID is either the
+        // number of write images that are available or the device
+        // raw uav id if it is 11.
+        if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) >
+            STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+          curRes.bits.ResourceID = STM->device()
+                                   ->getResourceID(AMDILDevice::RAW_UAV_ID);
+        } else if (numWriteImages != OPENCL_MAX_WRITE_IMAGES) {
+          if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID)
+              < numWriteImages) {
+            curRes.bits.ResourceID = numWriteImages;
+          } else {
+            curRes.bits.ResourceID = STM->device()
+                                     ->getResourceID(AMDILDevice::RAW_UAV_ID);
+          }
+        } else {
+          if (DEBUGME) {
+            dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+            (*miBegin)->print(dbgs());
+          }
+          curRes.bits.ByteStore = 1;
+          curRes.bits.ResourceID = STM->device()
+                                   ->getResourceID(AMDILDevice::ARENA_UAV_ID);
+        }
+        if (isAtomicInst(TM, *miBegin)) {
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+          if (curRes.bits.ResourceID
+              == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+            assert(0 && "Found an atomic instruction that has "
+                   "an arena uav id!");
+          }
+        }
+        KM->setUAVID(siBegin->second, curRes.bits.ResourceID);
+        mMFI->uav_insert(curRes.bits.ResourceID);
+      }
+      if (DEBUGME) {
+        dbgs() << "Setting pointer to resource ID "
+               << curRes.bits.ResourceID << ": ";
+        siBegin->second->dump();
+      }
+      setAsmPrinterFlags(*miBegin, curRes);
+    }
+  }
+
+}
+
+void
+AMDILPointerManagerImpl::annotateCacheableInstrs()
+{
+  CacheableInstrSet::iterator miBegin, miEnd;
+
+  for (miBegin = cacheableSet.begin(),
+       miEnd = cacheableSet.end();
+       miBegin != miEnd; ++miBegin) {
+    if (DEBUGME) {
+      dbgs() << "Annotating instr as cacheable. Inst: ";
+      (*miBegin)->dump();
+    }
+    AMDILAS::InstrResEnc curRes;
+    getAsmPrinterFlags(*miBegin, curRes);
+    // If UAV11 is enabled, then we can enable cached reads.
+    if (STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID) == 11) {
+      curRes.bits.CacheableRead = 1;
+      curRes.bits.ResourceID = 11;
+      setAsmPrinterFlags(*miBegin, curRes);
+    }
+  }
+}
+
+// Annotate the instructions along various pointer paths. The paths that
+// are handled are the raw, byte and cacheable pointer paths.
+void
+AMDILPointerManagerImpl::annotatePtrPath()
+{
+  if (PtrToInstMap.empty()) {
+    return;
+  }
+  // First we can check the cacheable pointers
+  annotateCacheablePtrs();
+
+  // Next we annotate the byte pointers
+  annotateBytePtrs();
+
+  // Next we annotate the raw pointers
+  annotateRawPtrs();
+}
+
+// Allocate MultiUAV pointer ID's for the raw/conflict pointers.
+void
+AMDILPointerManagerImpl::allocateMultiUAVPointers(TargetMachine &TM)
+{
+  if (PtrToInstMap.empty()) {
+    return;
+  }
+  uint32_t curUAV = numWriteImages;
+  bool increment = true;
+  // If the RAW_UAV_ID is a value that is larger than the max number of write
+  // images, then we use that UAV ID.
+  if (numWriteImages >= OPENCL_MAX_WRITE_IMAGES) {
+    curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+    increment = false;
+  }
+  PtrSet::iterator siBegin, siEnd;
+  std::vector<MachineInstr*>::iterator miBegin, miEnd;
+  // First lets handle the raw pointers.
+  for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    assert(siBegin->second->getType()->isPointerTy() && "We must be a pointer type "
+           "to be processed at this point!");
+    const PointerType *PT = dyn_cast<PointerType>(siBegin->second->getType());
+    if (conflictPtrs.count(*siBegin) || !PT) {
+      continue;
+    }
+    // We only want to process global address space pointers
+    if (PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+      if ((PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS
+           && STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
+          || (PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS
+              && STM->device()->usesSoftware(AMDILDeviceInfo::ConstantMem))
+          || (PT->getAddressSpace() == AMDILAS::REGION_ADDRESS
+              && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))) {
+        // If we are using software emulated hardware features, then
+        // we need to specify that they use the raw uav and not
+        // zero-copy uav. The easiest way to do this is to assume they
+        // conflict with another pointer. Any pointer that conflicts
+        // with another pointer is assigned to the raw uav or the
+        // arena uav if no raw uav exists.
+        const PointerType *PT = dyn_cast<PointerType>(siBegin->second->getType());
+        if (PT) {
+          conflictPtrs.insert(*siBegin);
+        }
+      }
+      if (PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+        if (STM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem)) {
+          const PointerType *PT = dyn_cast<PointerType>(siBegin->second->getType());
+          if (PT) {
+            conflictPtrs.insert(*siBegin);
+          }
+        } else {
+          if (DEBUGME) {
+            dbgs() << "Scratch Pointer '" << siBegin->second->getName()
+                   << "' being assigned uav "<<
+                   STM->device()->getResourceID(AMDILDevice::SCRATCH_ID) << "\n";
+          }
+          for (miBegin = PtrToInstMap[siBegin->second].begin(),
+               miEnd = PtrToInstMap[siBegin->second].end();
+               miBegin != miEnd; ++miBegin) {
+            AMDILAS::InstrResEnc curRes;
+            getAsmPrinterFlags(*miBegin, curRes);
+            curRes.bits.ResourceID = STM->device()
+                                     ->getResourceID(AMDILDevice::SCRATCH_ID);
+            if (DEBUGME) {
+              dbgs() << "Updated instruction to bitmask ";
+              dbgs().write_hex(curRes.u16all);
+              dbgs() << " with ResID " << curRes.bits.ResourceID;
+              dbgs() << ". Inst: ";
+              (*miBegin)->dump();
+            }
+            setAsmPrinterFlags((*miBegin), curRes);
+            KM->setUAVID(siBegin->second, curRes.bits.ResourceID);
+            mMFI->uav_insert(curRes.bits.ResourceID);
+          }
+          mMFI->setUsesScratch();
+        }
+      }
+      continue;
+    }
+    // If more than just UAV 11 is cacheable, then we can remove
+    // this check.
+    if (cacheablePtrs.count(*siBegin)) {
+      if (DEBUGME) {
+        dbgs() << "Raw Pointer '" << siBegin->second->getName()
+               << "' is cacheable, not allocating a multi-uav for it!\n";
+      }
+      continue;
+    }
+    if (DEBUGME) {
+      dbgs() << "Raw Pointer '" << siBegin->second->getName()
+             << "' being assigned uav " << curUAV << "\n";
+    }
+    if (PtrToInstMap[siBegin->second].empty()) {
+      KM->setUAVID(siBegin->second, curUAV);
+      mMFI->uav_insert(curUAV);
+    }
+    // For all instructions here, we are going to set the new UAV to the curUAV
+    // number and not the value that it currently is set to.
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+      curRes.bits.ResourceID = curUAV;
+      if (isAtomicInst(TM, *miBegin)) {
+        (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+        .setImm(curRes.bits.ResourceID);
+        if (curRes.bits.ResourceID
+            == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+          assert(0 && "Found an atomic instruction that has "
+                 "an arena uav id!");
+        }
+      }
+      if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+        if (DEBUGME) {
+          dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+          (*miBegin)->print(dbgs());
+        }
+        curRes.bits.ByteStore = 1;
+        curRes.bits.CacheableRead = 0;
+      }
+      if (DEBUGME) {
+        dbgs() << "Updated instruction to bitmask ";
+        dbgs().write_hex(curRes.u16all);
+        dbgs() << " with ResID " << curRes.bits.ResourceID;
+        dbgs() << ". Inst: ";
+        (*miBegin)->dump();
+      }
+      setAsmPrinterFlags(*miBegin, curRes);
+      KM->setUAVID(siBegin->second, curRes.bits.ResourceID);
+      mMFI->uav_insert(curRes.bits.ResourceID);
+    }
+    // If we make it here, we can increment the uav counter if we are less
+    // than the max write image count. Otherwise we set it to the default
+    // UAV and leave it.
+    if (increment && curUAV < (OPENCL_MAX_WRITE_IMAGES - 1)) {
+      ++curUAV;
+    } else {
+      curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+      increment = false;
+    }
+  }
+  if (numWriteImages == 8) {
+    curUAV = STM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
+  }
+  // Now lets handle the conflict pointers
+  for (siBegin = conflictPtrs.begin(), siEnd = conflictPtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    assert(siBegin->second->getType()->isPointerTy() && "We must be a pointer type "
+           "to be processed at this point!");
+    const PointerType *PT = dyn_cast<PointerType>(siBegin->second->getType());
+    // We only want to process global address space pointers
+    if (!PT || PT->getAddressSpace() != AMDILAS::GLOBAL_ADDRESS) {
+      continue;
+    }
+    if (DEBUGME) {
+      dbgs() << "Conflict Pointer '" << siBegin->second->getName()
+             << "' being assigned uav " << curUAV << "\n";
+    }
+    if (PtrToInstMap[siBegin->second].empty()) {
+      KM->setUAVID(siBegin->second, curUAV);
+      mMFI->uav_insert(curUAV);
+    }
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+      curRes.bits.ResourceID = curUAV;
+      if (isAtomicInst(TM, *miBegin)) {
+        (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+        .setImm(curRes.bits.ResourceID);
+        if (curRes.bits.ResourceID
+            == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+          assert(0 && "Found an atomic instruction that has "
+                 "an arena uav id!");
+        }
+      }
+      if (curUAV == STM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID)) {
+        if (DEBUGME) {
+          dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+          (*miBegin)->print(dbgs());
+        }
+        curRes.bits.ByteStore = 1;
+      }
+      if (DEBUGME) {
+        dbgs() << "Updated instruction to bitmask ";
+        dbgs().write_hex(curRes.u16all);
+        dbgs() << " with ResID " << curRes.bits.ResourceID;
+        dbgs() << ". Inst: ";
+        (*miBegin)->dump();
+      }
+      setAsmPrinterFlags(*miBegin, curRes);
+      KM->setUAVID(siBegin->second, curRes.bits.ResourceID);
+      mMFI->uav_insert(curRes.bits.ResourceID);
+    }
+  }
+}
+// The first thing we should do is to allocate the default
+// ID for each load/store/atomic instruction so that
+// it is correctly allocated. Everything else after this
+// is just an optimization to more efficiently allocate
+// resource ID's.
+void
+AMDILPointerManagerImpl::allocateDefaultIDs(TargetMachine &TM)
+{
+  std::string longName = std::string("llvm.sampler.annotations.") +
+                         std::string(MF.getFunction()->getName());
+  llvm::StringRef funcName = longName;
+  std::set<std::string> *samplerNames = mAMI->getSamplerForKernel(funcName);
+  if (samplerNames) {
+    for (std::set<std::string>::iterator b = samplerNames->begin(),
+         e = samplerNames->end(); b != e; ++b) {
+      mMFI->addSampler((*b), ~0U);
+    }
+  }
+  for (MachineFunction::iterator mfBegin = MF.begin(),
+       mfEnd = MF.end(); mfBegin != mfEnd; ++mfBegin) {
+    MachineBasicBlock *MB = mfBegin;
+    for (MachineBasicBlock::iterator mbb = MB->begin(), mbe = MB->end();
+         mbb != mbe; ++mbb) {
+      MachineInstr *MI = mbb;
+      if (isLoadInst(TM, MI)
+          || isStoreInst(TM, MI)
+          || isAtomicInst(TM, MI)) {
+        AMDILAS::InstrResEnc curRes;
+        getAsmPrinterFlags(MI, curRes);
+        allocateDefaultID(TM, curRes, MI, false);
+      }
+    }
+  }
+}
+
+bool
+AMDILPointerManagerImpl::perform()
+{
+  // Start out by allocating the default ID's to all instructions in the
+  // function.
+  allocateDefaultIDs(TM);
+
+  if (!mMFI->isKernel()) {
+    // We don't need to parse non-kernel functions as
+    // aren't supported yet. Just setting the default
+    // ID's and exiting is good enough.
+    // FIXME: Support functions.
+    return false;
+  }
+
+  // First we need to go through all of the arguments and assign the
+  // live in registers to the lookup table and the pointer mapping.
+  numWriteImages = parseArguments();
+
+  // Lets do some error checking on the results of the parsing.
+  if (counters.size() > OPENCL_MAX_NUM_ATOMIC_COUNTERS) {
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[INSUFFICIENT_COUNTER_RESOURCES]);
+  }
+  if (semaphores.size() > OPENCL_MAX_NUM_SEMAPHORES) {
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[INSUFFICIENT_SEMAPHORE_RESOURCES]);
+  }
+  if (numWriteImages > OPENCL_MAX_WRITE_IMAGES
+      || (images.size() - numWriteImages > OPENCL_MAX_READ_IMAGES)) {
+    mMFI->addErrorMsg(
+      amd::CompilerErrorMessage[INSUFFICIENT_IMAGE_RESOURCES]);
+  }
+
+  // Now lets parse all of the instructions and update our
+  // lookup tables.
+  parseFunction(TM);
+
+  // We need to go over our pointer map and find all the conflicting
+  // pointers that have byte stores and put them in the bytePtr map.
+  // All conflicting pointers that don't have byte stores go into
+  // the rawPtr map.
+  detectConflictingPointers(TM);
+
+  // The next step is to detect whether the pointer should be added to
+  // the fully cacheable set or not. A pointer is marked as cacheable if
+  // no store instruction exists.
+  detectFullyCacheablePointers(TM);
+
+  // Disable partially cacheable for now when multiUAV is on.
+  // SC versions before SC139 have a bug that generates incorrect
+  // addressing for some cached accesses.
+  if (!STM->device()->isSupported(AMDILDeviceInfo::MultiUAV) &&
+      STM->calVersion() >= CAL_VERSION_SC_139) {
+    // Now we take the set of loads that have no reachable stores and
+    // create a list of additional instructions (those that aren't already
+    // in a cacheablePtr set) that are safe to mark as cacheable.
+    detectCacheableInstrs();
+
+    // Annotate the additional instructions computed above as cacheable.
+    // Note that this should not touch any instructions annotated in
+    // annotatePtrPath.
+    annotateCacheableInstrs();
+  }
+
+  // Now that we have detected everything we need to detect, lets go through an
+  // annotate the instructions along the pointer path for each of the
+  // various pointer types.
+  annotatePtrPath();
+
+  // Annotate the atomic counter path if any exists.
+  annotateAppendPtrs();
+
+  // Annotate the semaphore path if any exists.
+  annotateSemaPtrs();
+
+  // If we support MultiUAV, then we need to determine how
+  // many write images exist so that way we know how many UAV are
+  // left to allocate to buffers.
+  if (STM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) {
+    // We now have (OPENCL_MAX_WRITE_IMAGES - numPtrs) buffers open for
+    // multi-uav allocation.
+    allocateMultiUAVPointers(TM);
+  }
+
+  // The last step is to detect if we have any alias constant pool operations.
+  // This is not likely, but does happen on occasion with double precision
+  // operations.
+  detectAliasedCPoolOps();
+
+  // Add all of the fully read-only pointers to the machine function information
+  // structure so that we can emit it in the metadata.
+  // FIXME: this assumes NoAlias, need to also detect cases where NoAlias
+  // is not set, but there are exclusively only reads or writes to the pointer.
+  for (CacheableSet::iterator csBegin = cacheablePtrs.begin(),
+       csEnd = cacheablePtrs.end(); csBegin != csEnd; ++csBegin) {
+    mMFI->add_read_ptr((*csBegin).second);
+  }
+  if (DEBUGME) {
+    dumpPointers(bytePtrs, "Byte Store Ptrs");
+    dumpPointers(rawPtrs, "Raw Ptrs");
+    dumpPointers(cacheablePtrs, "Cache Load Ptrs");
+    dumpPointers(counters, "Atomic Counters");
+    dumpPointers(semaphores, "Semaphores");
+    dumpPointers(images, "Images");
+  }
+
+  return true;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,79 @@
+//===-- AMDILPointerManager.h ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDIL Pointer Manager is a class that does all the checking for
+// different pointer characteristics. Pointers have attributes that need to be attached
+// to them in order to correctly codegen them efficiently. This class will
+// analyze the pointers of a function and then traverse the uses of the pointers and
+// determine if a pointer can be cached, should belong in the arena, and what UAV it
+// should belong to. There are seperate classes for each unique generation of
+// devices. This pass only works in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_POINTER_MANAGER_H_
+#define _AMDIL_POINTER_MANAGER_H_
+
+#undef DEBUG_TYPE
+#undef DEBUGME
+#define DEBUG_TYPE "PointerManager"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm
+{
+class MachineFunction;
+class AnalysisUsage;
+// The default pointer manager. This handles pointer
+// resource allocation for default ID's only.
+// There is no special processing.
+class AMDILPointerManager : public MachineFunctionPass
+{
+public:
+  AMDILPointerManager(
+    TargetMachine &tm,
+    CodeGenOpt::Level OL);
+  virtual ~AMDILPointerManager();
+  virtual const char*
+  getPassName() const;
+  virtual bool
+  runOnMachineFunction(MachineFunction &F);
+  virtual void
+  getAnalysisUsage(AnalysisUsage &AU) const;
+  static char ID;
+protected:
+  TargetMachine& TM;
+  virtual void clearTempMIFlags(MachineFunction &F);
+private:
+}; // class AMDILPointerManager
+
+// The pointer manager for Evergreen and Northern Island
+// devices. This pointer manager allocates and trackes
+// cached memory, arena resources, raw resources and
+// whether multi-uav is utilized or not.
+class AMDILEGPointerManager : public AMDILPointerManager
+{
+public:
+  AMDILEGPointerManager(
+    TargetMachine &tm,
+    CodeGenOpt::Level OL);
+  virtual ~AMDILEGPointerManager();
+  virtual const char*
+  getPassName() const;
+  virtual bool
+  runOnMachineFunction(MachineFunction &F);
+}; // class AMDILEGPointerManager
+} // end llvm namespace
+#endif // _AMDIL_POINTER_MANAGER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManagerImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManagerImpl.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManagerImpl.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManagerImpl.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,407 @@
+//===-- AMDILPointerManagerImpl.h -----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDIL Pointer Manager is a class that does all the checking for
+// different pointer characteristics. Pointers have attributes that need to be attached
+// to them in order to correctly codegen them efficiently. This class will
+// analyze the pointers of a function and then traverse the uses of the pointers and
+// determine if a pointer can be cached, should belong in the arena, and what UAV it
+// should belong to. There are seperate classes for each unique generation of
+// devices. This pass only works in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_POINTER_MANAGER_IMPL_H_
+#define _AMDIL_POINTER_MANAGER_IMPL_H_
+
+#include "AMDIL.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include <set>
+#include <map>
+#include <list>
+#include <queue>
+
+namespace llvm
+{
+class Value;
+class MachineBasicBlock;
+class AMDILModuleInfo;
+
+// The default pointer manager implementation.
+// This pointer manager implementation allocates and trackes
+// cached memory, arena resources, raw resources and
+// whether multi-uav is utilized or not.
+class AMDILPointerManagerImpl
+{
+protected:
+  // A pair that maps a name of string to a value.
+  typedef std::pair<StringRef, const Value*> StrValPair;
+
+  // Typedefing the multiple different set types to that it is
+  // easier to read what each set is supposed to handle. This
+  // also allows it easier to track which set goes to which
+  // argument in a function call.
+  typedef std::set<StrValPair> PtrSet;
+
+  // A Byte set is the set of all base pointers that must
+  // be allocated to the arena path.
+  typedef PtrSet ByteSet;
+
+  // A Raw set is the set of all base pointers that can be
+  // allocated to the raw path.
+  typedef PtrSet RawSet;
+
+  // A cacheable set is the set of all base pointers that
+  // are deamed cacheable based on annotations or
+  // compiler options.
+  typedef PtrSet CacheableSet;
+
+  // A conflict set is a set of all base pointers whose
+  // use/def chains conflict with another base pointer.
+  typedef PtrSet ConflictSet;
+
+  // An image set is a set of all read/write only image pointers.
+  typedef PtrSet ImageSet;
+
+  // An append set is a set of atomic counter base pointers
+  typedef std::vector<StrValPair> AppendSet;
+
+  // An append set is a set of atomic counter base pointers
+  typedef std::vector<StrValPair> SemaSet;
+
+  // A ConstantSet is a set of constant pool instructions
+  typedef std::set<MachineInstr*> CPoolSet;
+
+  // A CacheableInstSet set is a set of instructions that are cachable
+  // even if the pointer is not generally cacheable.
+  typedef std::set<MachineInstr*> CacheableInstrSet;
+
+  // A pair that maps a virtual register to the equivalent base
+  // pointer value that it was derived from.
+  typedef std::pair<unsigned, StrValPair> RegValPair;
+
+  // A map that maps between the base pointe rvalue and an array
+  // of instructions that are part of the pointer chain. A pointer
+  // chain is a recursive def/use chain of all instructions that don't
+  // store data to memory unless the pointer is the data being stored.
+  typedef std::map<const Value*, std::vector<MachineInstr*> > PtrIMap;
+
+  // A map that holds a set of all base pointers that are used in a machine
+  // instruction. This helps to detect when conflict pointers are found
+  // such as when pointer subtraction occurs.
+  typedef std::map<MachineInstr*, PtrSet> InstPMap;
+
+  // A map that holds the frame index to RegValPair so that writes of
+  // pointers to the stack can be tracked.
+  typedef std::map<unsigned, RegValPair > FIPMap;
+
+  // A small vector impl that holds all of the register to base pointer
+  // mappings for a given function.
+  typedef std::map<unsigned, RegValPair> RVPVec;
+
+  // Information related to the cacheability of instructions in a basic block.
+  // This is used during the parse phase of the pointer algorithm to track
+  // the reachability of stores within a basic block.
+  class BlockCacheableInfo
+  {
+  public:
+    BlockCacheableInfo() :
+      mStoreReachesTop(false),
+      mStoreReachesExit(false),
+      mCacheableSet()
+    {};
+
+    bool storeReachesTop() const  {
+      return mStoreReachesTop;
+    }
+    bool storeReachesExit() const {
+      return mStoreReachesExit;
+    }
+    CacheableInstrSet::const_iterator
+    cacheableBegin() const {
+      return mCacheableSet.begin();
+    }
+    CacheableInstrSet::const_iterator
+    cacheableEnd()   const {
+      return mCacheableSet.end();
+    }
+
+    // mark the block as having a global store that reaches it. This
+    // will also set the store reaches exit flag, and clear the list
+    // of loads (since they are now reachable by a store.)
+    bool setReachesTop() {
+      bool changedExit = !mStoreReachesExit;
+
+      if (!mStoreReachesTop)
+        mCacheableSet.clear();
+
+      mStoreReachesTop = true;
+      mStoreReachesExit = true;
+      return changedExit;
+    }
+
+    // Mark the block as having a store that reaches the exit of the
+    // block.
+    void setReachesExit() {
+      mStoreReachesExit = true;
+    }
+
+    // If the top or the exit of the block are not marked as reachable
+    // by a store, add the load to the list of cacheable loads.
+    void addPossiblyCacheableInst(MachineInstr *load) {
+      // By definition, if store reaches top, then store reaches exit.
+      // So, we only test for exit here.
+      // If we have a volatile load we cannot cache it.
+      if (mStoreReachesExit || isVolatileInst(load)) {
+        return;
+      }
+
+      mCacheableSet.insert(load);
+    }
+
+  private:
+    bool mStoreReachesTop; // Does a global store reach the top of this block?
+    bool mStoreReachesExit;// Does a global store reach the exit of this block?
+    CacheableInstrSet mCacheableSet; // The set of loads in the block not
+    // reachable by a global store.
+  };
+
+  // Map from MachineBasicBlock to it's cacheable load info.
+  typedef std::map<MachineBasicBlock*, BlockCacheableInfo> MBBCacheableMap;
+
+protected:
+  MachineFunction& MF;
+  TargetMachine &TM;
+  AMDILMachineFunctionInfo *mMFI;
+  const AMDILTargetMachine *ATM;
+  const AMDILSubtarget *STM;
+  AMDILKernelManager *KM;
+  AMDILModuleInfo* mAMI;
+
+  // A set of all pointers are tracked in this map and
+  // if multiple pointers are detected, they go to the same
+  // set.
+  PtrIMap PtrToInstMap;
+
+  // All of the instructions that are loads, stores or pointer
+  // conflicts are tracked in the map with a set of all values
+  // that reference the instruction stored.
+  InstPMap InstToPtrMap;
+
+  // In order to track across stack entries, we need a map between a
+  // frame index and a pointer. That way when we load from a frame
+  // index, we know what pointer was stored to the frame index.
+  FIPMap FIToPtrMap;
+
+  // Set of all the pointers that are byte pointers. Byte pointers
+  // are required to have their instructions go to the arena.
+  ByteSet bytePtrs;
+
+  // Set of all the pointers that are cacheable. All of the cache pointers
+  // are required to go to a raw uav and cannot go to arena.
+  CacheableSet cacheablePtrs;
+
+  // Set of all the pointers that go into a raw buffer. A pointer can
+  // exist in either rawPtrs or bytePtrs but not both.
+  RawSet rawPtrs;
+
+  // Set of all the pointers that end up having a conflicting instruction
+  // somewhere in the pointer path.
+  ConflictSet conflictPtrs;
+
+  // Set of all pointers that are images
+  ImageSet images;
+
+  // Set of all pointers that are counters
+  AppendSet counters;
+
+  // Set of all pointers that are semaphores
+  SemaSet semaphores;
+
+  // Set of all pointers that load from a constant pool
+  CPoolSet cpool;
+
+  // Mapping from BB to infomation about the cacheability of the
+  // global load instructions in it.
+  MBBCacheableMap bbCacheable;
+
+  // A set of load instructions that are cacheable
+  // even if all the load instructions of the ptr are not.
+  CacheableInstrSet cacheableSet;
+
+  // The lookup table holds all of the registers that
+  // are used as we assign pointers values to them.
+  // If two pointers collide on the lookup table, then
+  // we assign them to the same UAV. If one of the
+  // pointers is byte addressable, then we assign
+  // them to arena, otherwise we assign them to raw.
+  RVPVec lookupTable;
+
+  uint32_t numWriteImages;
+
+protected:
+  StrValPair createStrValPair(const Value* ptr) {
+    return std::make_pair(ptr ? ptr->getName() : StringRef(""), ptr);
+  }
+
+  std::string findSamplerName(TargetMachine &TM, MachineInstr* MI, unsigned &val);
+
+  // Helper function that allocates the default resource ID for the
+  // respective I/O types.
+  void allocateDefaultID(TargetMachine &TM,
+                         AMDILAS::InstrResEnc &curRes,
+                         MachineInstr *MI,
+                         bool addID);
+
+  // Function that parses the arguments and updates the lookupTable with the
+  // pointer -> register mapping. This function also checks for cacheable
+  // pointers and updates the CacheableSet with the arguments that
+  // can be cached based on the readonlypointer annotation. The final
+  // purpose of this function is to update the imageSet and counterSet
+  // with all pointers that are either images or atomic counters.
+  uint32_t parseArguments();
+
+  void parseLocalArrays();
+
+  // The call stack is interesting in that even in SSA form, it assigns
+  // registers to the same value's over and over again. So we need to
+  // ignore the values that are assigned and just deal with the input
+  // and return registers.
+  void parseCall(MachineBasicBlock::iterator &mBegin,
+                 MachineBasicBlock::iterator mEnd);
+
+  // Detect if the current instruction conflicts with another instruction
+  // and add the instruction to the correct location accordingly.
+  void detectConflictInst(MachineInstr *MI,
+                          AMDILAS::InstrResEnc &curRes,
+                          bool isLoadStore,
+                          unsigned reg,
+                          unsigned dstReg);
+
+  // In this case we want to handle a load instruction.
+  void parseLoadInst(TargetMachine &TM, MachineInstr *MI);
+
+  // In this case we want to handle a store instruction.
+  void parseStoreInst(TargetMachine &TM, MachineInstr *MI);
+
+  // In this case we want to handle an atomic instruction.
+  void parseAtomicInst(TargetMachine &TM, MachineInstr *MI);
+
+  // In this case we want to handle a counter instruction.
+  void parseAppendInst(TargetMachine &TM, MachineInstr *MI);
+
+  /// In this case we want to handle a semaphore instruction.
+  void parseSemaInst(TargetMachine &TM, MachineInstr *MI);
+
+  // In this case we want to handle an Image instruction.
+  void parseImageInst(TargetMachine &TM, MachineInstr *MI);
+
+  // if addri's address is a local array, map addri's dest reg to
+  // the local array
+  void parseAddriInst(MachineInstr *MI);
+
+  // This case handles the rest of the instructions
+  void parseInstruction(TargetMachine &TM, MachineInstr *MI);
+
+  // This function parses the basic block and based on the instruction type,
+  // calls the function to finish parsing the instruction.
+  void parseBasicBlock(TargetMachine &TM, MachineBasicBlock *MB);
+
+  // Follows the Reverse Post Order Traversal of the basic blocks to
+  // determine which order to parse basic blocks in.
+  void parseFunction(TargetMachine &TM);
+
+  // Helper function that dumps to dbgs() information about
+  // a pointer set.
+  void dumpPointers(AppendSet &Ptrs, const char *str);
+
+  // Helper function that dumps to dbgs() information about
+  // a pointer set.
+  void dumpPointers(PtrSet &Ptrs, const char *str);
+
+  // Function that detects all the conflicting pointers and adds
+  // the pointers that are detected to the conflict set, otherwise
+  // they are added to the raw or byte set based on their usage.
+  void detectConflictingPointers(TargetMachine &TM);
+
+  // Function that detects aliased constant pool operations.
+  void detectAliasedCPoolOps();
+
+  // Function that detects fully cacheable pointers. Fully cacheable pointers
+  // are pointers that have no writes to them and no-alias is specified.
+  void detectFullyCacheablePointers(TargetMachine &TM);
+
+  // Are any of the pointers in PtrSet also in the BytePtrs or the CachePtrs?
+  bool ptrSetIntersectsByteOrCache(PtrSet &cacheSet);
+
+  // Function that detects which instructions are cacheable even if
+  // all instructions of the pointer are not cacheable. The resulting
+  // set of instructions will not contain Ptrs that are in the cacheable
+  // ptr set (under the assumption they will get marked cacheable already)
+  // or pointers in the byte set, since they are not cacheable.
+  void detectCacheableInstrs();
+
+  // This function annotates the cacheable pointers with the
+  // CacheableRead bit. The cacheable read bit is set
+  // when the number of write images is not equal to the max
+  // or if the default RAW_UAV_ID is equal to 11. The first
+  // condition means that there is a raw uav between 0 and 7
+  // that is available for cacheable reads and the second
+  // condition means that UAV 11 is available for cacheable
+  // reads.
+  virtual void annotateCacheablePtrs();
+
+  // A byte pointer is a pointer that along the pointer path has a
+  // byte store assigned to it.
+  virtual void annotateBytePtrs();
+
+  // A semaphore pointer is a opaque object that has semaphore instructions
+  // in its path.
+  virtual void annotateSemaPtrs();
+
+  /// An append pointer is a opaque object that has append instructions
+  // in its path.
+  virtual void annotateAppendPtrs();
+
+  // A raw pointer is any pointer that does not have byte store in its path.
+  virtual void annotateRawPtrs();
+
+  virtual void annotateCacheableInstrs();
+
+  // Annotate the instructions along various pointer paths. The paths that
+  // are handled are the raw, byte, cacheable and local pointer paths.
+  virtual void annotatePtrPath();
+
+  // Allocate MultiUAV pointer ID's for the raw/conflict pointers.
+  void allocateMultiUAVPointers(TargetMachine &TM);
+
+public:
+  AMDILPointerManagerImpl(MachineFunction& mf, TargetMachine& tm);
+  virtual ~AMDILPointerManagerImpl() {}
+
+  // The first thing we should do is to allocate the default
+  // ID for each load/store/atomic instruction so that
+  // it is correctly allocated. Everything else after this
+  // is just an optimization to more efficiently allocate
+  // resource ID's.
+  virtual void allocateDefaultIDs(TargetMachine &TM);
+
+  // the main driver function
+  bool perform();
+}; // class AMDILPointerManagerImpl
+} // end llvm namespace
+#endif // _AMDIL_POINTER_MANAGER_IMPL_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPrintfConvert.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPrintfConvert.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPrintfConvert.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPrintfConvert.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,346 @@
+//===-- AMDILPrintfConvert.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PrintfConvert"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILAlgorithms.tpp"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "AMDILTargetMachine.h"
+#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include <cstdio>
+#include <string>
+using namespace llvm;
+namespace
+{
+class LLVM_LIBRARY_VISIBILITY AMDILPrintfConvert : public FunctionPass
+{
+public:
+  TargetMachine &TM;
+  static char ID;
+  AMDILPrintfConvert(TargetMachine &tm, CodeGenOpt::Level OL);
+  ~AMDILPrintfConvert();
+  const char* getPassName() const;
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  bool doFinalization(Module &M);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+  static const char* getConversionSpecifiers(const std::string& fmt,size_t num_ops);
+private:
+  bool expandPrintf(BasicBlock::iterator *bbb);
+  AMDILMachineFunctionInfo *mMFI;
+  AMDILKernelManager *mKM;
+  bool mChanged;
+  SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
+};
+char AMDILPrintfConvert::ID = 0;
+} // anonymouse namespace
+
+namespace llvm
+{
+FunctionPass*
+createAMDILPrintfConvert(TargetMachine &tm, CodeGenOpt::Level OL)
+{
+  return new AMDILPrintfConvert(tm, OL);
+}
+} // llvm namespace
+AMDILPrintfConvert::AMDILPrintfConvert(TargetMachine &tm, CodeGenOpt::Level OL)
+  : FunctionPass(ID), TM(tm)
+{
+}
+AMDILPrintfConvert::~AMDILPrintfConvert()
+{
+}
+bool
+AMDILPrintfConvert::expandPrintf(BasicBlock::iterator *bbb)
+{
+  Instruction *inst = (*bbb);
+  CallInst *CI = dyn_cast<CallInst>(inst);
+  if (!CI) {
+    return false;
+  }
+  int num_ops = CI->getNumOperands();
+  if (!num_ops) {
+    return false;
+  }
+  if (CI->getOperand(num_ops - 1)->getName() != "printf") {
+    return false;
+  }
+
+  const char* opConvSpecifiers = NULL;
+  Function *mF = inst->getParent()->getParent();
+  uint64_t bytes = 0;
+  mChanged = true;
+  if (num_ops == 1) {
+    ++(*bbb);
+    Constant *newConst = ConstantInt::getSigned(CI->getType(), bytes);
+    CI->replaceAllUsesWith(newConst);
+    CI->eraseFromParent();
+    return mChanged;
+  }
+  // Deal with the string here
+  Value *op = CI->getOperand(0);
+  ConstantExpr *GEPinst = dyn_cast<ConstantExpr>(op);
+  if (GEPinst) {
+    GlobalVariable *GVar
+    = dyn_cast<GlobalVariable>(GEPinst->getOperand(0));
+    std::string str = "unknown";
+    if (GVar && GVar->hasInitializer()) {
+      ConstantDataArray *CA
+      = dyn_cast<ConstantDataArray>(GVar->getInitializer());
+      str = (CA->isString() ? CA->getAsString() : "unknown");
+      opConvSpecifiers = getConversionSpecifiers(str,num_ops - 2);
+    }
+    uint64_t id = (uint64_t)mMFI->addPrintfString(str,
+                  getAnalysis<MachineFunctionAnalysis>().getMF()
+                  .getMMI().getObjFileInfo<AMDILModuleInfo>().get_printf_offset());
+    std::string name = "___dumpStringID";
+    Function *nF = NULL;
+    std::vector<Type*> types;
+    types.push_back(Type::getInt32Ty(mF->getContext()));
+    nF = mF->getParent()->getFunction(name);
+    if (!nF) {
+      nF = Function::Create(
+             FunctionType::get(
+               Type::getVoidTy(mF->getContext()), types, false),
+             GlobalValue::ExternalLinkage,
+             name, mF->getParent());
+    }
+    Constant *C = ConstantInt::get(
+                    Type::getInt32Ty(mF->getContext()), id, false);
+    CallInst *nCI = CallInst::Create(nF, C);
+    nCI->insertBefore(CI);
+    bytes = strlen(str.data());
+    for (uint32_t x = 1, y = num_ops - 1; x < y; ++x) {
+      op = CI->getOperand(x);
+      Type *oType = op->getType();
+      uint32_t eleCount = getNumElements(oType);
+      uint32_t eleSize = (uint32_t)GET_SCALAR_SIZE(oType);
+      if (!eleSize) {
+        // Default size is 32bits.
+        eleSize = 32;
+      }
+      if (!eleCount) {
+        // Default num elements is 1.
+        eleCount = 1;
+      }
+      uint32_t totalSize = eleCount * eleSize;
+      mMFI->addPrintfOperand(str, (x - 1),
+                             (uint32_t)totalSize);
+    }
+  }
+  for (uint32_t x = 1, y = num_ops - 1; x < y; ++x) {
+    op = CI->getOperand(x);
+    Type *oType = op->getType();
+    if (oType->isFPOrFPVectorTy()
+        && (oType->getTypeID() != Type::VectorTyID)) {
+      Type *iType = NULL;
+      if (oType->isFloatTy()) {
+        iType = dyn_cast<Type>(
+                  Type::getInt32Ty(oType->getContext()));
+      } else {
+        iType = dyn_cast<Type>(
+                  Type::getInt64Ty(oType->getContext()));
+      }
+      op = new BitCastInst(op, iType, "printfBitCast", CI);
+    } else if (oType->getTypeID() == Type::VectorTyID) {
+      Type *iType = NULL;
+      uint32_t eleCount = getNumElements(oType);
+      uint32_t eleSize = (uint32_t)GET_SCALAR_SIZE(oType);
+      uint32_t totalSize = eleCount * eleSize;
+      if (eleCount == 3) {
+        IntegerType *int32ty = Type::getInt32Ty(oType->getContext());
+        Constant* indices[4] = {
+          ConstantInt::get(int32ty, 0),
+          ConstantInt::get(int32ty, 1),
+          ConstantInt::get(int32ty, 2),
+          ConstantInt::get(int32ty, 2)
+        };
+        Constant* mask = ConstantVector::get(indices);
+        ShuffleVectorInst* shuffle = new ShuffleVectorInst(op, op, mask);
+        shuffle->insertBefore(CI);
+        op = shuffle;
+        oType = op->getType();
+        totalSize += eleSize;
+      }
+      switch (eleSize) {
+      default:
+        eleCount = totalSize / 64;
+        iType = dyn_cast<Type>(
+                  Type::getInt64Ty(oType->getContext()));
+        break;
+      case 8:
+        if (eleCount >= 8) {
+          eleCount = totalSize / 64;
+          iType = dyn_cast<Type>(
+                    Type::getInt64Ty(oType->getContext()));
+        } else if (eleCount >= 3) {
+          eleCount = 1;
+          iType = dyn_cast<Type>(
+                    Type::getInt32Ty(oType->getContext()));
+        } else {
+          eleCount = 1;
+          iType = dyn_cast<Type>(
+                    Type::getInt16Ty(oType->getContext()));
+        }
+        break;
+      case 16:
+        if (eleCount >= 3) {
+          eleCount = totalSize / 64;
+          iType = dyn_cast<Type>(
+                    Type::getInt64Ty(oType->getContext()));
+        } else {
+          eleCount = 1;
+          iType = dyn_cast<Type>(
+                    Type::getInt32Ty(oType->getContext()));
+        }
+        break;
+      }
+      if (eleCount > 1) {
+        iType = dyn_cast<Type>(
+                  VectorType::get(iType, eleCount));
+      }
+      op = new BitCastInst(op, iType, "printfBitCast", CI);
+    }
+    char buffer[256];
+    uint32_t size = (uint32_t)GET_SCALAR_SIZE(oType);
+    if (size) {
+      sprintf(buffer, "___dumpBytes_v%db%u",
+              1,
+              (uint32_t)getNumElements(oType) * (uint32_t)size);
+    } else {
+      const PointerType *PT = dyn_cast<PointerType>(oType);
+      if (opConvSpecifiers && opConvSpecifiers[x-1] == 's' &&
+          PT->getAddressSpace() == 2 &&
+          GET_SCALAR_SIZE(PT->getContainedType(0)) == 8 &&
+          getNumElements(PT->getContainedType(0)) == 1) {
+        op = new BitCastInst(op,
+                             Type::getInt8PtrTy(oType->getContext(),
+                                                AMDILAS::CONSTANT_ADDRESS),
+                             "printfPtrCast", CI);
+
+        sprintf(buffer, "___dumpBytes_v%dbs", 1);
+      } else {
+        op = new PtrToIntInst(op,
+                              Type::getInt32Ty(oType->getContext()),
+                              "printfPtrCast", CI);
+        sprintf(buffer, "___dumpBytes_v1b32");
+      }
+    }
+    std::vector<Type*> types;
+    types.push_back(op->getType());
+    std::string name = buffer;
+    Function *nF = NULL;
+    nF = mF->getParent()->getFunction(name);
+    if (!nF) {
+      nF = Function::Create(
+             FunctionType::get(
+               Type::getVoidTy(mF->getContext()), types, false),
+             GlobalValue::ExternalLinkage,
+             name, mF->getParent());
+    }
+    CallInst *nCI = CallInst::Create(nF, op);
+    nCI->insertBefore(CI);
+    bytes += (size - 4);
+  }
+  if (opConvSpecifiers != NULL) {
+    delete[] opConvSpecifiers;
+  }
+  ++(*bbb);
+  Constant *newConst = ConstantInt::getSigned(CI->getType(), bytes);
+  CI->replaceAllUsesWith(newConst);
+  CI->eraseFromParent();
+  return mChanged;
+}
+bool
+AMDILPrintfConvert::runOnFunction(Function &MF)
+{
+  mChanged = false;
+  mKM = TM.getSubtarget<AMDILSubtarget>().getKernelManager();
+  mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
+         .getInfo<AMDILMachineFunctionInfo>();
+  bVecMap.clear();
+  safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+                    std::bind1st(
+                      std::mem_fun(
+                        &AMDILPrintfConvert::expandPrintf), this));
+  return mChanged;
+}
+
+const char*
+AMDILPrintfConvert::getPassName() const
+{
+  return "AMDIL Printf Conversion Pass";
+}
+bool
+AMDILPrintfConvert::doInitialization(Module &M)
+{
+  return false;
+}
+
+bool
+AMDILPrintfConvert::doFinalization(Module &M)
+{
+  return false;
+}
+
+void
+AMDILPrintfConvert::getAnalysisUsage(AnalysisUsage &AU) const
+{
+  AU.addRequired<MachineFunctionAnalysis>();
+  FunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+}
+const char*
+AMDILPrintfConvert::getConversionSpecifiers(const std::string& fmt,size_t num_ops)
+{
+  static const char* convSpecifiers = "cdieEfgGaosuxXp";
+  size_t curFmtSpecifierIdx = 0;
+  size_t prevFmtSpecifierIdx = 0;
+  size_t opIdx = 0;
+  char* opConvSpecifiers = new char[num_ops];
+  while ((curFmtSpecifierIdx = fmt.find_first_of(convSpecifiers,curFmtSpecifierIdx)) != std::string::npos) {
+    bool argDump = false;
+    const std::string curFmt = fmt.substr(prevFmtSpecifierIdx,curFmtSpecifierIdx - prevFmtSpecifierIdx);
+    size_t pTag = curFmt.find_last_of("%");
+    if (pTag != std::string::npos) {
+      argDump = true;
+      while (pTag && curFmt[--pTag] == '%') {
+        argDump = !argDump;
+      }
+    }
+    if (argDump) {
+      opConvSpecifiers[opIdx++] = fmt[curFmtSpecifierIdx];
+    }
+    prevFmtSpecifierIdx = ++curFmtSpecifierIdx;
+  }
+  if (opIdx == 0) {
+    delete[] opConvSpecifiers;
+    return  NULL;
+  }
+  return opConvSpecifiers;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILProfiles.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILProfiles.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILProfiles.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILProfiles.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,166 @@
+//===-- AMDILProfiles.td --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These are used for custom selection dag type profiles.
+//
+//===----------------------------------------------------------------------===//
+
+def SDTIL_GenUnaryOp : SDTypeProfile<1, 1, [
+    SDTCisSameAs<0, 1>
+    ]>;
+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
+    SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+    ]>;
+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
+    SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
+    ]>;
+def SDTIL_GenCMovLog : SDTypeProfile<1, 3, [
+    SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisInt<1>
+    ]>;
+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
+    SDTCisEltOfVec<1, 0>
+    ]>;
+
+def SDTIL_GenVecExtract : SDTypeProfile<1, 2, [
+    SDTCisEltOfVec<0, 1>, SDTCisVT<2, i32>
+    ]>;
+
+def SDTIL_GenVecInsert : SDTypeProfile<1, 4, [
+    SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>,
+    SDTCisVT<3, i32>, SDTCisVT<4, i32>
+    ]>;
+
+def SDTIL_GenVecShuffle : SDTypeProfile <1, 2, [
+    SDTCisSameAs<0, 1>, SDTCisVT<2, i32>
+    ]>;
+
+def SDTIL_GenVecConcat : SDTypeProfile <1, 2, [
+    SDTCisSameAs<1, 2>
+    ]>;
+//===----------------------------------------------------------------------===//
+// Conversion Profile Types
+//===----------------------------------------------------------------------===//
+def SDTIL_DPToFPOp : SDTypeProfile<1, 1, [
+    SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>
+    ]>; // d2f
+
+def SDTIL_AnyToInt : SDTypeProfile<1, 1, [
+    SDTCisInt<0>
+    ]>;
+def SDTIL_IntToAny : SDTypeProfile<1, 1, [
+    SDTCisInt<1>
+    ]>;
+def SDTIL_GenBitConv : SDTypeProfile<1, 1, []>;
+//===----------------------------------------------------------------------===//
+// Scalar Profile Types
+//===----------------------------------------------------------------------===//
+
+// Add instruction pattern to handle offsets of memory operationns
+def SDTIL_AddAddrri: SDTypeProfile<1, 2, [
+    SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisSameAs<0, 2>
+    ]>;
+def SDTIL_AddAddrir : SDTypeProfile<1, 2, [
+    SDTCisInt<0>, SDTCisPtrTy<2>, SDTCisSameAs<0, 1>
+    ]>;
+
+def SDTIL_LCreate : SDTypeProfile<1, 2, [
+    SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
+    ]>;
+def SDTIL_LCreate2 : SDTypeProfile<1, 2, [
+    SDTCisVT<0, v2i64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
+    ]>;
+def SDTIL_LComp : SDTypeProfile<1, 1, [
+    SDTCisVT<0, i32>, SDTCisVT<1, i64>
+    ]>;
+def SDTIL_LComp2 : SDTypeProfile<1, 1, [
+    SDTCisVT<0, v2i32>, SDTCisVT<1, v2i64>
+    ]>;
+def SDTIL_DCreate : SDTypeProfile<1, 2, [
+    SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>
+    ]>;
+def SDTIL_DComp : SDTypeProfile<1, 1, [
+    SDTCisVT<0, i32>, SDTCisVT<1, f64>
+    ]>;
+def SDTIL_DCreate2 : SDTypeProfile<1, 2, [
+    SDTCisVT<0, v2f64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2>
+    ]>;
+def SDTIL_DComp2 : SDTypeProfile<1, 1, [
+    SDTCisVT<0, v2i32>, SDTCisVT<1, v2f64>
+    ]>;
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Profile for Normal Call
+def SDTIL_Call : SDTypeProfile<0, 1, [
+    SDTCisVT<0, i32>
+    ]>;
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+    SDTCisVT<0, OtherVT>
+    ]>;
+// Comparison instruction
+def SDTIL_Cmp  : SDTypeProfile<1, 3, [
+    SDTCisSameAs<0, 2>, SDTCisSameAs<2,3>, SDTCisVT<1, i32>
+    ]>;
+
+
+//===----------------------------------------------------------------------===//
+// Call Sequence Profiles
+//===----------------------------------------------------------------------===//
+def SDTIL_CallSeqStart  : SDCallSeqStart< [
+    SDTCisVT<0, i32>
+    ]>;
+def SDTIL_CallSeqEnd    : SDCallSeqEnd< [
+    SDTCisVT<0, i32>, SDTCisVT<1, i32>
+    ]>;
+
+//===----------------------------------------------------------------------===//
+// Image Operation Profiles
+//===----------------------------------------------------------------------===//
+def SDTIL_ImageRead  : SDTypeProfile<1, 3, 
+    [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, v4f32>]>;
+def SDTIL_ImageWrite : SDTypeProfile<0, 3,
+    [SDTCisPtrTy<0>, SDTCisVT<1, v2i32>, SDTCisVT<2, v4i32>]>;
+def SDTIL_ImageWrite3D : SDTypeProfile<0, 3,
+    [SDTCisPtrTy<0>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>]>;
+def SDTIL_ImageInfo  : SDTypeProfile<1, 1,
+    [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>]>;
+//===----------------------------------------------------------------------===//
+// Atomic Operation Profiles
+//===----------------------------------------------------------------------===//
+def SDTIL_UniAtomNoRet : SDTypeProfile<0, 2, [
+    SDTCisPtrTy<0>, SDTCisInt<1>
+    ]>;
+def SDTIL_BinAtomNoRet : SDTypeProfile<0, 3, [
+    SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i32>
+    ]>;
+def SDTIL_TriAtomNoRet : SDTypeProfile<0, 4, [
+    SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisVT<3, i32>
+    ]>;
+def SDTIL_UniAtom : SDTypeProfile<1, 2, [
+    SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
+    ]>;
+def SDTIL_BinAtom : SDTypeProfile<1, 3, [
+    SDTCisSameAs<0, 2>, SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisVT<3, i32>
+    ]>;
+def SDTIL_TriAtom : SDTypeProfile<1, 4, [
+    SDTCisSameAs<0, 2>, SDTCisPtrTy<1>, SDTCisInt<2>,
+    SDTCisSameAs<3, 2>, SDTCisVT<4, i32>
+    ]>;
+
+def SDTIL_BinAtomFloat : SDTypeProfile<1, 3, [
+    SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, f32>, SDTCisVT<3, f32>
+    ]>;
+def SDTIL_BinAtomNoRetFloat : SDTypeProfile<0, 3, [
+    SDTCisPtrTy<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>
+    ]>;
+
+def SDTIL_Append : SDTypeProfile<1, 1, [
+    SDTCisVT<0, i32>, SDTCisPtrTy<1>
+    ]>;

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsScalar.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsScalar.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsScalar.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsScalar.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,829 @@
+//===-- AMDILRegisterDefsScalar.td ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def Rx1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def Ry1 : AMDILReg<1, "r1">, DwarfRegAlias<Rx1>;
+def Rz1 : AMDILReg<1, "r1">, DwarfRegAlias<Rx1>;
+def Rw1 : AMDILReg<1, "r1">, DwarfRegAlias<Rx1>;
+def Rx2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def Ry2 : AMDILReg<2, "r2">, DwarfRegAlias<Rx2>;
+def Rz2 : AMDILReg<2, "r2">, DwarfRegAlias<Rx2>;
+def Rw2 : AMDILReg<2, "r2">, DwarfRegAlias<Rx2>;
+def Rx3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def Ry3 : AMDILReg<3, "r3">, DwarfRegAlias<Rx3>;
+def Rz3 : AMDILReg<3, "r3">, DwarfRegAlias<Rx3>;
+def Rw3 : AMDILReg<3, "r3">, DwarfRegAlias<Rx3>;
+def Rx4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def Ry4 : AMDILReg<4, "r4">, DwarfRegAlias<Rx4>;
+def Rz4 : AMDILReg<4, "r4">, DwarfRegAlias<Rx4>;
+def Rw4 : AMDILReg<4, "r4">, DwarfRegAlias<Rx4>;
+def Rx5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def Ry5 : AMDILReg<5, "r5">, DwarfRegAlias<Rx5>;
+def Rz5 : AMDILReg<5, "r5">, DwarfRegAlias<Rx5>;
+def Rw5 : AMDILReg<5, "r5">, DwarfRegAlias<Rx5>;
+def Rx6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def Ry6 : AMDILReg<6, "r6">, DwarfRegAlias<Rx6>;
+def Rz6 : AMDILReg<6, "r6">, DwarfRegAlias<Rx6>;
+def Rw6 : AMDILReg<6, "r6">, DwarfRegAlias<Rx6>;
+def Rx7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def Ry7 : AMDILReg<7, "r7">, DwarfRegAlias<Rx7>;
+def Rz7 : AMDILReg<7, "r7">, DwarfRegAlias<Rx7>;
+def Rw7 : AMDILReg<7, "r7">, DwarfRegAlias<Rx7>;
+def Rx8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def Ry8 : AMDILReg<8, "r8">, DwarfRegAlias<Rx8>;
+def Rz8 : AMDILReg<8, "r8">, DwarfRegAlias<Rx8>;
+def Rw8 : AMDILReg<8, "r8">, DwarfRegAlias<Rx8>;
+def Rx9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def Ry9 : AMDILReg<9, "r9">, DwarfRegAlias<Rx9>;
+def Rz9 : AMDILReg<9, "r9">, DwarfRegAlias<Rx9>;
+def Rw9 : AMDILReg<9, "r9">, DwarfRegAlias<Rx9>;
+def Rx10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def Ry10 : AMDILReg<10, "r10">, DwarfRegAlias<Rx10>;
+def Rz10 : AMDILReg<10, "r10">, DwarfRegAlias<Rx10>;
+def Rw10 : AMDILReg<10, "r10">, DwarfRegAlias<Rx10>;
+def Rx11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def Ry11 : AMDILReg<11, "r11">, DwarfRegAlias<Rx11>;
+def Rz11 : AMDILReg<11, "r11">, DwarfRegAlias<Rx11>;
+def Rw11 : AMDILReg<11, "r11">, DwarfRegAlias<Rx11>;
+def Rx12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def Ry12 : AMDILReg<12, "r12">, DwarfRegAlias<Rx12>;
+def Rz12 : AMDILReg<12, "r12">, DwarfRegAlias<Rx12>;
+def Rw12 : AMDILReg<12, "r12">, DwarfRegAlias<Rx12>;
+def Rx13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def Ry13 : AMDILReg<13, "r13">, DwarfRegAlias<Rx13>;
+def Rz13 : AMDILReg<13, "r13">, DwarfRegAlias<Rx13>;
+def Rw13 : AMDILReg<13, "r13">, DwarfRegAlias<Rx13>;
+def Rx14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def Ry14 : AMDILReg<14, "r14">, DwarfRegAlias<Rx14>;
+def Rz14 : AMDILReg<14, "r14">, DwarfRegAlias<Rx14>;
+def Rw14 : AMDILReg<14, "r14">, DwarfRegAlias<Rx14>;
+def Rx15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def Ry15 : AMDILReg<15, "r15">, DwarfRegAlias<Rx15>;
+def Rz15 : AMDILReg<15, "r15">, DwarfRegAlias<Rx15>;
+def Rw15 : AMDILReg<15, "r15">, DwarfRegAlias<Rx15>;
+def Rx16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def Ry16 : AMDILReg<16, "r16">, DwarfRegAlias<Rx16>;
+def Rz16 : AMDILReg<16, "r16">, DwarfRegAlias<Rx16>;
+def Rw16 : AMDILReg<16, "r16">, DwarfRegAlias<Rx16>;
+def Rx17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def Ry17 : AMDILReg<17, "r17">, DwarfRegAlias<Rx17>;
+def Rz17 : AMDILReg<17, "r17">, DwarfRegAlias<Rx17>;
+def Rw17 : AMDILReg<17, "r17">, DwarfRegAlias<Rx17>;
+def Rx18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def Ry18 : AMDILReg<18, "r18">, DwarfRegAlias<Rx18>;
+def Rz18 : AMDILReg<18, "r18">, DwarfRegAlias<Rx18>;
+def Rw18 : AMDILReg<18, "r18">, DwarfRegAlias<Rx18>;
+def Rx19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def Ry19 : AMDILReg<19, "r19">, DwarfRegAlias<Rx19>;
+def Rz19 : AMDILReg<19, "r19">, DwarfRegAlias<Rx19>;
+def Rw19 : AMDILReg<19, "r19">, DwarfRegAlias<Rx19>;
+def Rx20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+def Ry20 : AMDILReg<20, "r20">, DwarfRegAlias<Rx20>;
+def Rz20 : AMDILReg<20, "r20">, DwarfRegAlias<Rx20>;
+def Rw20 : AMDILReg<20, "r20">, DwarfRegAlias<Rx20>;
+def Rx21 : AMDILReg<21, "r21">, DwarfRegNum<[21]>;
+def Ry21 : AMDILReg<21, "r21">, DwarfRegAlias<Rx21>;
+def Rz21 : AMDILReg<21, "r21">, DwarfRegAlias<Rx21>;
+def Rw21 : AMDILReg<21, "r21">, DwarfRegAlias<Rx21>;
+def Rx22 : AMDILReg<22, "r22">, DwarfRegNum<[22]>;
+def Ry22 : AMDILReg<22, "r22">, DwarfRegAlias<Rx22>;
+def Rz22 : AMDILReg<22, "r22">, DwarfRegAlias<Rx22>;
+def Rw22 : AMDILReg<22, "r22">, DwarfRegAlias<Rx22>;
+def Rx23 : AMDILReg<23, "r23">, DwarfRegNum<[23]>;
+def Ry23 : AMDILReg<23, "r23">, DwarfRegAlias<Rx23>;
+def Rz23 : AMDILReg<23, "r23">, DwarfRegAlias<Rx23>;
+def Rw23 : AMDILReg<23, "r23">, DwarfRegAlias<Rx23>;
+def Rx24 : AMDILReg<24, "r24">, DwarfRegNum<[24]>;
+def Ry24 : AMDILReg<24, "r24">, DwarfRegAlias<Rx24>;
+def Rz24 : AMDILReg<24, "r24">, DwarfRegAlias<Rx24>;
+def Rw24 : AMDILReg<24, "r24">, DwarfRegAlias<Rx24>;
+def Rx25 : AMDILReg<25, "r25">, DwarfRegNum<[25]>;
+def Ry25 : AMDILReg<25, "r25">, DwarfRegAlias<Rx25>;
+def Rz25 : AMDILReg<25, "r25">, DwarfRegAlias<Rx25>;
+def Rw25 : AMDILReg<25, "r25">, DwarfRegAlias<Rx25>;
+def Rx26 : AMDILReg<26, "r26">, DwarfRegNum<[26]>;
+def Ry26 : AMDILReg<26, "r26">, DwarfRegAlias<Rx26>;
+def Rz26 : AMDILReg<26, "r26">, DwarfRegAlias<Rx26>;
+def Rw26 : AMDILReg<26, "r26">, DwarfRegAlias<Rx26>;
+def Rx27 : AMDILReg<27, "r27">, DwarfRegNum<[27]>;
+def Ry27 : AMDILReg<27, "r27">, DwarfRegAlias<Rx27>;
+def Rz27 : AMDILReg<27, "r27">, DwarfRegAlias<Rx27>;
+def Rw27 : AMDILReg<27, "r27">, DwarfRegAlias<Rx27>;
+def Rx28 : AMDILReg<28, "r28">, DwarfRegNum<[28]>;
+def Ry28 : AMDILReg<28, "r28">, DwarfRegAlias<Rx28>;
+def Rz28 : AMDILReg<28, "r28">, DwarfRegAlias<Rx28>;
+def Rw28 : AMDILReg<28, "r28">, DwarfRegAlias<Rx28>;
+def Rx29 : AMDILReg<29, "r29">, DwarfRegNum<[29]>;
+def Ry29 : AMDILReg<29, "r29">, DwarfRegAlias<Rx29>;
+def Rz29 : AMDILReg<29, "r29">, DwarfRegAlias<Rx29>;
+def Rw29 : AMDILReg<29, "r29">, DwarfRegAlias<Rx29>;
+def Rx30 : AMDILReg<30, "r30">, DwarfRegNum<[30]>;
+def Ry30 : AMDILReg<30, "r30">, DwarfRegAlias<Rx30>;
+def Rz30 : AMDILReg<30, "r30">, DwarfRegAlias<Rx30>;
+def Rw30 : AMDILReg<30, "r30">, DwarfRegAlias<Rx30>;
+def Rx31 : AMDILReg<31, "r31">, DwarfRegNum<[31]>;
+def Ry31 : AMDILReg<31, "r31">, DwarfRegAlias<Rx31>;
+def Rz31 : AMDILReg<31, "r31">, DwarfRegAlias<Rx31>;
+def Rw31 : AMDILReg<31, "r31">, DwarfRegAlias<Rx31>;
+def Rx32 : AMDILReg<32, "r32">, DwarfRegNum<[32]>;
+def Ry32 : AMDILReg<32, "r32">, DwarfRegAlias<Rx32>;
+def Rz32 : AMDILReg<32, "r32">, DwarfRegAlias<Rx32>;
+def Rw32 : AMDILReg<32, "r32">, DwarfRegAlias<Rx32>;
+def Rx33 : AMDILReg<33, "r33">, DwarfRegNum<[33]>;
+def Ry33 : AMDILReg<33, "r33">, DwarfRegAlias<Rx33>;
+def Rz33 : AMDILReg<33, "r33">, DwarfRegAlias<Rx33>;
+def Rw33 : AMDILReg<33, "r33">, DwarfRegAlias<Rx33>;
+def Rx34 : AMDILReg<34, "r34">, DwarfRegNum<[34]>;
+def Ry34 : AMDILReg<34, "r34">, DwarfRegAlias<Rx34>;
+def Rz34 : AMDILReg<34, "r34">, DwarfRegAlias<Rx34>;
+def Rw34 : AMDILReg<34, "r34">, DwarfRegAlias<Rx34>;
+def Rx35 : AMDILReg<35, "r35">, DwarfRegNum<[35]>;
+def Ry35 : AMDILReg<35, "r35">, DwarfRegAlias<Rx35>;
+def Rz35 : AMDILReg<35, "r35">, DwarfRegAlias<Rx35>;
+def Rw35 : AMDILReg<35, "r35">, DwarfRegAlias<Rx35>;
+def Rx36 : AMDILReg<36, "r36">, DwarfRegNum<[36]>;
+def Ry36 : AMDILReg<36, "r36">, DwarfRegAlias<Rx36>;
+def Rz36 : AMDILReg<36, "r36">, DwarfRegAlias<Rx36>;
+def Rw36 : AMDILReg<36, "r36">, DwarfRegAlias<Rx36>;
+def Rx37 : AMDILReg<37, "r37">, DwarfRegNum<[37]>;
+def Ry37 : AMDILReg<37, "r37">, DwarfRegAlias<Rx37>;
+def Rz37 : AMDILReg<37, "r37">, DwarfRegAlias<Rx37>;
+def Rw37 : AMDILReg<37, "r37">, DwarfRegAlias<Rx37>;
+def Rx38 : AMDILReg<38, "r38">, DwarfRegNum<[38]>;
+def Ry38 : AMDILReg<38, "r38">, DwarfRegAlias<Rx38>;
+def Rz38 : AMDILReg<38, "r38">, DwarfRegAlias<Rx38>;
+def Rw38 : AMDILReg<38, "r38">, DwarfRegAlias<Rx38>;
+def Rx39 : AMDILReg<39, "r39">, DwarfRegNum<[39]>;
+def Ry39 : AMDILReg<39, "r39">, DwarfRegAlias<Rx39>;
+def Rz39 : AMDILReg<39, "r39">, DwarfRegAlias<Rx39>;
+def Rw39 : AMDILReg<39, "r39">, DwarfRegAlias<Rx39>;
+def Rx40 : AMDILReg<40, "r40">, DwarfRegNum<[40]>;
+def Ry40 : AMDILReg<40, "r40">, DwarfRegAlias<Rx40>;
+def Rz40 : AMDILReg<40, "r40">, DwarfRegAlias<Rx40>;
+def Rw40 : AMDILReg<40, "r40">, DwarfRegAlias<Rx40>;
+def Rx41 : AMDILReg<41, "r41">, DwarfRegNum<[41]>;
+def Ry41 : AMDILReg<41, "r41">, DwarfRegAlias<Rx41>;
+def Rz41 : AMDILReg<41, "r41">, DwarfRegAlias<Rx41>;
+def Rw41 : AMDILReg<41, "r41">, DwarfRegAlias<Rx41>;
+def Rx42 : AMDILReg<42, "r42">, DwarfRegNum<[42]>;
+def Ry42 : AMDILReg<42, "r42">, DwarfRegAlias<Rx42>;
+def Rz42 : AMDILReg<42, "r42">, DwarfRegAlias<Rx42>;
+def Rw42 : AMDILReg<42, "r42">, DwarfRegAlias<Rx42>;
+def Rx43 : AMDILReg<43, "r43">, DwarfRegNum<[43]>;
+def Ry43 : AMDILReg<43, "r43">, DwarfRegAlias<Rx43>;
+def Rz43 : AMDILReg<43, "r43">, DwarfRegAlias<Rx43>;
+def Rw43 : AMDILReg<43, "r43">, DwarfRegAlias<Rx43>;
+def Rx44 : AMDILReg<44, "r44">, DwarfRegNum<[44]>;
+def Ry44 : AMDILReg<44, "r44">, DwarfRegAlias<Rx44>;
+def Rz44 : AMDILReg<44, "r44">, DwarfRegAlias<Rx44>;
+def Rw44 : AMDILReg<44, "r44">, DwarfRegAlias<Rx44>;
+def Rx45 : AMDILReg<45, "r45">, DwarfRegNum<[45]>;
+def Ry45 : AMDILReg<45, "r45">, DwarfRegAlias<Rx45>;
+def Rz45 : AMDILReg<45, "r45">, DwarfRegAlias<Rx45>;
+def Rw45 : AMDILReg<45, "r45">, DwarfRegAlias<Rx45>;
+def Rx46 : AMDILReg<46, "r46">, DwarfRegNum<[46]>;
+def Ry46 : AMDILReg<46, "r46">, DwarfRegAlias<Rx46>;
+def Rz46 : AMDILReg<46, "r46">, DwarfRegAlias<Rx46>;
+def Rw46 : AMDILReg<46, "r46">, DwarfRegAlias<Rx46>;
+def Rx47 : AMDILReg<47, "r47">, DwarfRegNum<[47]>;
+def Ry47 : AMDILReg<47, "r47">, DwarfRegAlias<Rx47>;
+def Rz47 : AMDILReg<47, "r47">, DwarfRegAlias<Rx47>;
+def Rw47 : AMDILReg<47, "r47">, DwarfRegAlias<Rx47>;
+def Rx48 : AMDILReg<48, "r48">, DwarfRegNum<[48]>;
+def Ry48 : AMDILReg<48, "r48">, DwarfRegAlias<Rx48>;
+def Rz48 : AMDILReg<48, "r48">, DwarfRegAlias<Rx48>;
+def Rw48 : AMDILReg<48, "r48">, DwarfRegAlias<Rx48>;
+def Rx49 : AMDILReg<49, "r49">, DwarfRegNum<[49]>;
+def Ry49 : AMDILReg<49, "r49">, DwarfRegAlias<Rx49>;
+def Rz49 : AMDILReg<49, "r49">, DwarfRegAlias<Rx49>;
+def Rw49 : AMDILReg<49, "r49">, DwarfRegAlias<Rx49>;
+def Rx50 : AMDILReg<50, "r50">, DwarfRegNum<[50]>;
+def Ry50 : AMDILReg<50, "r50">, DwarfRegAlias<Rx50>;
+def Rz50 : AMDILReg<50, "r50">, DwarfRegAlias<Rx50>;
+def Rw50 : AMDILReg<50, "r50">, DwarfRegAlias<Rx50>;
+def Rx51 : AMDILReg<51, "r51">, DwarfRegNum<[51]>;
+def Ry51 : AMDILReg<51, "r51">, DwarfRegAlias<Rx51>;
+def Rz51 : AMDILReg<51, "r51">, DwarfRegAlias<Rx51>;
+def Rw51 : AMDILReg<51, "r51">, DwarfRegAlias<Rx51>;
+def Rx52 : AMDILReg<52, "r52">, DwarfRegNum<[52]>;
+def Ry52 : AMDILReg<52, "r52">, DwarfRegAlias<Rx52>;
+def Rz52 : AMDILReg<52, "r52">, DwarfRegAlias<Rx52>;
+def Rw52 : AMDILReg<52, "r52">, DwarfRegAlias<Rx52>;
+def Rx53 : AMDILReg<53, "r53">, DwarfRegNum<[53]>;
+def Ry53 : AMDILReg<53, "r53">, DwarfRegAlias<Rx53>;
+def Rz53 : AMDILReg<53, "r53">, DwarfRegAlias<Rx53>;
+def Rw53 : AMDILReg<53, "r53">, DwarfRegAlias<Rx53>;
+def Rx54 : AMDILReg<54, "r54">, DwarfRegNum<[54]>;
+def Ry54 : AMDILReg<54, "r54">, DwarfRegAlias<Rx54>;
+def Rz54 : AMDILReg<54, "r54">, DwarfRegAlias<Rx54>;
+def Rw54 : AMDILReg<54, "r54">, DwarfRegAlias<Rx54>;
+def Rx55 : AMDILReg<55, "r55">, DwarfRegNum<[55]>;
+def Ry55 : AMDILReg<55, "r55">, DwarfRegAlias<Rx55>;
+def Rz55 : AMDILReg<55, "r55">, DwarfRegAlias<Rx55>;
+def Rw55 : AMDILReg<55, "r55">, DwarfRegAlias<Rx55>;
+def Rx56 : AMDILReg<56, "r56">, DwarfRegNum<[56]>;
+def Ry56 : AMDILReg<56, "r56">, DwarfRegAlias<Rx56>;
+def Rz56 : AMDILReg<56, "r56">, DwarfRegAlias<Rx56>;
+def Rw56 : AMDILReg<56, "r56">, DwarfRegAlias<Rx56>;
+def Rx57 : AMDILReg<57, "r57">, DwarfRegNum<[57]>;
+def Ry57 : AMDILReg<57, "r57">, DwarfRegAlias<Rx57>;
+def Rz57 : AMDILReg<57, "r57">, DwarfRegAlias<Rx57>;
+def Rw57 : AMDILReg<57, "r57">, DwarfRegAlias<Rx57>;
+def Rx58 : AMDILReg<58, "r58">, DwarfRegNum<[58]>;
+def Ry58 : AMDILReg<58, "r58">, DwarfRegAlias<Rx58>;
+def Rz58 : AMDILReg<58, "r58">, DwarfRegAlias<Rx58>;
+def Rw58 : AMDILReg<58, "r58">, DwarfRegAlias<Rx58>;
+def Rx59 : AMDILReg<59, "r59">, DwarfRegNum<[59]>;
+def Ry59 : AMDILReg<59, "r59">, DwarfRegAlias<Rx59>;
+def Rz59 : AMDILReg<59, "r59">, DwarfRegAlias<Rx59>;
+def Rw59 : AMDILReg<59, "r59">, DwarfRegAlias<Rx59>;
+def Rx60 : AMDILReg<60, "r60">, DwarfRegNum<[60]>;
+def Ry60 : AMDILReg<60, "r60">, DwarfRegAlias<Rx60>;
+def Rz60 : AMDILReg<60, "r60">, DwarfRegAlias<Rx60>;
+def Rw60 : AMDILReg<60, "r60">, DwarfRegAlias<Rx60>;
+def Rx61 : AMDILReg<61, "r61">, DwarfRegNum<[61]>;
+def Ry61 : AMDILReg<61, "r61">, DwarfRegAlias<Rx61>;
+def Rz61 : AMDILReg<61, "r61">, DwarfRegAlias<Rx61>;
+def Rw61 : AMDILReg<61, "r61">, DwarfRegAlias<Rx61>;
+def Rx62 : AMDILReg<62, "r62">, DwarfRegNum<[62]>;
+def Ry62 : AMDILReg<62, "r62">, DwarfRegAlias<Rx62>;
+def Rz62 : AMDILReg<62, "r62">, DwarfRegAlias<Rx62>;
+def Rw62 : AMDILReg<62, "r62">, DwarfRegAlias<Rx62>;
+def Rx63 : AMDILReg<63, "r63">, DwarfRegNum<[63]>;
+def Ry63 : AMDILReg<63, "r63">, DwarfRegAlias<Rx63>;
+def Rz63 : AMDILReg<63, "r63">, DwarfRegAlias<Rx63>;
+def Rw63 : AMDILReg<63, "r63">, DwarfRegAlias<Rx63>;
+def Rx64 : AMDILReg<64, "r64">, DwarfRegNum<[64]>;
+def Ry64 : AMDILReg<64, "r64">, DwarfRegAlias<Rx64>;
+def Rz64 : AMDILReg<64, "r64">, DwarfRegAlias<Rx64>;
+def Rw64 : AMDILReg<64, "r64">, DwarfRegAlias<Rx64>;
+def Rx65 : AMDILReg<65, "r65">, DwarfRegNum<[65]>;
+def Ry65 : AMDILReg<65, "r65">, DwarfRegAlias<Rx65>;
+def Rz65 : AMDILReg<65, "r65">, DwarfRegAlias<Rx65>;
+def Rw65 : AMDILReg<65, "r65">, DwarfRegAlias<Rx65>;
+def Rx66 : AMDILReg<66, "r66">, DwarfRegNum<[66]>;
+def Ry66 : AMDILReg<66, "r66">, DwarfRegAlias<Rx66>;
+def Rz66 : AMDILReg<66, "r66">, DwarfRegAlias<Rx66>;
+def Rw66 : AMDILReg<66, "r66">, DwarfRegAlias<Rx66>;
+def Rx67 : AMDILReg<67, "r67">, DwarfRegNum<[67]>;
+def Ry67 : AMDILReg<67, "r67">, DwarfRegAlias<Rx67>;
+def Rz67 : AMDILReg<67, "r67">, DwarfRegAlias<Rx67>;
+def Rw67 : AMDILReg<67, "r67">, DwarfRegAlias<Rx67>;
+def Rx68 : AMDILReg<68, "r68">, DwarfRegNum<[68]>;
+def Ry68 : AMDILReg<68, "r68">, DwarfRegAlias<Rx68>;
+def Rz68 : AMDILReg<68, "r68">, DwarfRegAlias<Rx68>;
+def Rw68 : AMDILReg<68, "r68">, DwarfRegAlias<Rx68>;
+def Rx69 : AMDILReg<69, "r69">, DwarfRegNum<[69]>;
+def Ry69 : AMDILReg<69, "r69">, DwarfRegAlias<Rx69>;
+def Rz69 : AMDILReg<69, "r69">, DwarfRegAlias<Rx69>;
+def Rw69 : AMDILReg<69, "r69">, DwarfRegAlias<Rx69>;
+def Rx70 : AMDILReg<70, "r70">, DwarfRegNum<[70]>;
+def Ry70 : AMDILReg<70, "r70">, DwarfRegAlias<Rx70>;
+def Rz70 : AMDILReg<70, "r70">, DwarfRegAlias<Rx70>;
+def Rw70 : AMDILReg<70, "r70">, DwarfRegAlias<Rx70>;
+def Rx71 : AMDILReg<71, "r71">, DwarfRegNum<[71]>;
+def Ry71 : AMDILReg<71, "r71">, DwarfRegAlias<Rx71>;
+def Rz71 : AMDILReg<71, "r71">, DwarfRegAlias<Rx71>;
+def Rw71 : AMDILReg<71, "r71">, DwarfRegAlias<Rx71>;
+def Rx72 : AMDILReg<72, "r72">, DwarfRegNum<[72]>;
+def Ry72 : AMDILReg<72, "r72">, DwarfRegAlias<Rx72>;
+def Rz72 : AMDILReg<72, "r72">, DwarfRegAlias<Rx72>;
+def Rw72 : AMDILReg<72, "r72">, DwarfRegAlias<Rx72>;
+def Rx73 : AMDILReg<73, "r73">, DwarfRegNum<[73]>;
+def Ry73 : AMDILReg<73, "r73">, DwarfRegAlias<Rx73>;
+def Rz73 : AMDILReg<73, "r73">, DwarfRegAlias<Rx73>;
+def Rw73 : AMDILReg<73, "r73">, DwarfRegAlias<Rx73>;
+def Rx74 : AMDILReg<74, "r74">, DwarfRegNum<[74]>;
+def Ry74 : AMDILReg<74, "r74">, DwarfRegAlias<Rx74>;
+def Rz74 : AMDILReg<74, "r74">, DwarfRegAlias<Rx74>;
+def Rw74 : AMDILReg<74, "r74">, DwarfRegAlias<Rx74>;
+def Rx75 : AMDILReg<75, "r75">, DwarfRegNum<[75]>;
+def Ry75 : AMDILReg<75, "r75">, DwarfRegAlias<Rx75>;
+def Rz75 : AMDILReg<75, "r75">, DwarfRegAlias<Rx75>;
+def Rw75 : AMDILReg<75, "r75">, DwarfRegAlias<Rx75>;
+def Rx76 : AMDILReg<76, "r76">, DwarfRegNum<[76]>;
+def Ry76 : AMDILReg<76, "r76">, DwarfRegAlias<Rx76>;
+def Rz76 : AMDILReg<76, "r76">, DwarfRegAlias<Rx76>;
+def Rw76 : AMDILReg<76, "r76">, DwarfRegAlias<Rx76>;
+def Rx77 : AMDILReg<77, "r77">, DwarfRegNum<[77]>;
+def Ry77 : AMDILReg<77, "r77">, DwarfRegAlias<Rx77>;
+def Rz77 : AMDILReg<77, "r77">, DwarfRegAlias<Rx77>;
+def Rw77 : AMDILReg<77, "r77">, DwarfRegAlias<Rx77>;
+def Rx78 : AMDILReg<78, "r78">, DwarfRegNum<[78]>;
+def Ry78 : AMDILReg<78, "r78">, DwarfRegAlias<Rx78>;
+def Rz78 : AMDILReg<78, "r78">, DwarfRegAlias<Rx78>;
+def Rw78 : AMDILReg<78, "r78">, DwarfRegAlias<Rx78>;
+def Rx79 : AMDILReg<79, "r79">, DwarfRegNum<[79]>;
+def Ry79 : AMDILReg<79, "r79">, DwarfRegAlias<Rx79>;
+def Rz79 : AMDILReg<79, "r79">, DwarfRegAlias<Rx79>;
+def Rw79 : AMDILReg<79, "r79">, DwarfRegAlias<Rx79>;
+def Rx80 : AMDILReg<80, "r80">, DwarfRegNum<[80]>;
+def Ry80 : AMDILReg<80, "r80">, DwarfRegAlias<Rx80>;
+def Rz80 : AMDILReg<80, "r80">, DwarfRegAlias<Rx80>;
+def Rw80 : AMDILReg<80, "r80">, DwarfRegAlias<Rx80>;
+def Rx81 : AMDILReg<81, "r81">, DwarfRegNum<[81]>;
+def Ry81 : AMDILReg<81, "r81">, DwarfRegAlias<Rx81>;
+def Rz81 : AMDILReg<81, "r81">, DwarfRegAlias<Rx81>;
+def Rw81 : AMDILReg<81, "r81">, DwarfRegAlias<Rx81>;
+def Rx82 : AMDILReg<82, "r82">, DwarfRegNum<[82]>;
+def Ry82 : AMDILReg<82, "r82">, DwarfRegAlias<Rx82>;
+def Rz82 : AMDILReg<82, "r82">, DwarfRegAlias<Rx82>;
+def Rw82 : AMDILReg<82, "r82">, DwarfRegAlias<Rx82>;
+def Rx83 : AMDILReg<83, "r83">, DwarfRegNum<[83]>;
+def Ry83 : AMDILReg<83, "r83">, DwarfRegAlias<Rx83>;
+def Rz83 : AMDILReg<83, "r83">, DwarfRegAlias<Rx83>;
+def Rw83 : AMDILReg<83, "r83">, DwarfRegAlias<Rx83>;
+def Rx84 : AMDILReg<84, "r84">, DwarfRegNum<[84]>;
+def Ry84 : AMDILReg<84, "r84">, DwarfRegAlias<Rx84>;
+def Rz84 : AMDILReg<84, "r84">, DwarfRegAlias<Rx84>;
+def Rw84 : AMDILReg<84, "r84">, DwarfRegAlias<Rx84>;
+def Rx85 : AMDILReg<85, "r85">, DwarfRegNum<[85]>;
+def Ry85 : AMDILReg<85, "r85">, DwarfRegAlias<Rx85>;
+def Rz85 : AMDILReg<85, "r85">, DwarfRegAlias<Rx85>;
+def Rw85 : AMDILReg<85, "r85">, DwarfRegAlias<Rx85>;
+def Rx86 : AMDILReg<86, "r86">, DwarfRegNum<[86]>;
+def Ry86 : AMDILReg<86, "r86">, DwarfRegAlias<Rx86>;
+def Rz86 : AMDILReg<86, "r86">, DwarfRegAlias<Rx86>;
+def Rw86 : AMDILReg<86, "r86">, DwarfRegAlias<Rx86>;
+def Rx87 : AMDILReg<87, "r87">, DwarfRegNum<[87]>;
+def Ry87 : AMDILReg<87, "r87">, DwarfRegAlias<Rx87>;
+def Rz87 : AMDILReg<87, "r87">, DwarfRegAlias<Rx87>;
+def Rw87 : AMDILReg<87, "r87">, DwarfRegAlias<Rx87>;
+def Rx88 : AMDILReg<88, "r88">, DwarfRegNum<[88]>;
+def Ry88 : AMDILReg<88, "r88">, DwarfRegAlias<Rx88>;
+def Rz88 : AMDILReg<88, "r88">, DwarfRegAlias<Rx88>;
+def Rw88 : AMDILReg<88, "r88">, DwarfRegAlias<Rx88>;
+def Rx89 : AMDILReg<89, "r89">, DwarfRegNum<[89]>;
+def Ry89 : AMDILReg<89, "r89">, DwarfRegAlias<Rx89>;
+def Rz89 : AMDILReg<89, "r89">, DwarfRegAlias<Rx89>;
+def Rw89 : AMDILReg<89, "r89">, DwarfRegAlias<Rx89>;
+def Rx90 : AMDILReg<90, "r90">, DwarfRegNum<[90]>;
+def Ry90 : AMDILReg<90, "r90">, DwarfRegAlias<Rx90>;
+def Rz90 : AMDILReg<90, "r90">, DwarfRegAlias<Rx90>;
+def Rw90 : AMDILReg<90, "r90">, DwarfRegAlias<Rx90>;
+def Rx91 : AMDILReg<91, "r91">, DwarfRegNum<[91]>;
+def Ry91 : AMDILReg<91, "r91">, DwarfRegAlias<Rx91>;
+def Rz91 : AMDILReg<91, "r91">, DwarfRegAlias<Rx91>;
+def Rw91 : AMDILReg<91, "r91">, DwarfRegAlias<Rx91>;
+def Rx92 : AMDILReg<92, "r92">, DwarfRegNum<[92]>;
+def Ry92 : AMDILReg<92, "r92">, DwarfRegAlias<Rx92>;
+def Rz92 : AMDILReg<92, "r92">, DwarfRegAlias<Rx92>;
+def Rw92 : AMDILReg<92, "r92">, DwarfRegAlias<Rx92>;
+def Rx93 : AMDILReg<93, "r93">, DwarfRegNum<[93]>;
+def Ry93 : AMDILReg<93, "r93">, DwarfRegAlias<Rx93>;
+def Rz93 : AMDILReg<93, "r93">, DwarfRegAlias<Rx93>;
+def Rw93 : AMDILReg<93, "r93">, DwarfRegAlias<Rx93>;
+def Rx94 : AMDILReg<94, "r94">, DwarfRegNum<[94]>;
+def Ry94 : AMDILReg<94, "r94">, DwarfRegAlias<Rx94>;
+def Rz94 : AMDILReg<94, "r94">, DwarfRegAlias<Rx94>;
+def Rw94 : AMDILReg<94, "r94">, DwarfRegAlias<Rx94>;
+def Rx95 : AMDILReg<95, "r95">, DwarfRegNum<[95]>;
+def Ry95 : AMDILReg<95, "r95">, DwarfRegAlias<Rx95>;
+def Rz95 : AMDILReg<95, "r95">, DwarfRegAlias<Rx95>;
+def Rw95 : AMDILReg<95, "r95">, DwarfRegAlias<Rx95>;
+def Rx96 : AMDILReg<96, "r96">, DwarfRegNum<[96]>;
+def Ry96 : AMDILReg<96, "r96">, DwarfRegAlias<Rx96>;
+def Rz96 : AMDILReg<96, "r96">, DwarfRegAlias<Rx96>;
+def Rw96 : AMDILReg<96, "r96">, DwarfRegAlias<Rx96>;
+def Rx97 : AMDILReg<97, "r97">, DwarfRegNum<[97]>;
+def Ry97 : AMDILReg<97, "r97">, DwarfRegAlias<Rx97>;
+def Rz97 : AMDILReg<97, "r97">, DwarfRegAlias<Rx97>;
+def Rw97 : AMDILReg<97, "r97">, DwarfRegAlias<Rx97>;
+def Rx98 : AMDILReg<98, "r98">, DwarfRegNum<[98]>;
+def Ry98 : AMDILReg<98, "r98">, DwarfRegAlias<Rx98>;
+def Rz98 : AMDILReg<98, "r98">, DwarfRegAlias<Rx98>;
+def Rw98 : AMDILReg<98, "r98">, DwarfRegAlias<Rx98>;
+def Rx99 : AMDILReg<99, "r99">, DwarfRegNum<[99]>;
+def Ry99 : AMDILReg<99, "r99">, DwarfRegAlias<Rx99>;
+def Rz99 : AMDILReg<99, "r99">, DwarfRegAlias<Rx99>;
+def Rw99 : AMDILReg<99, "r99">, DwarfRegAlias<Rx99>;
+def Rx100 : AMDILReg<100, "r100">, DwarfRegNum<[100]>;
+def Ry100 : AMDILReg<100, "r100">, DwarfRegAlias<Rx100>;
+def Rz100 : AMDILReg<100, "r100">, DwarfRegAlias<Rx100>;
+def Rw100 : AMDILReg<100, "r100">, DwarfRegAlias<Rx100>;
+def Rx101 : AMDILReg<101, "r101">, DwarfRegNum<[101]>;
+def Ry101 : AMDILReg<101, "r101">, DwarfRegAlias<Rx101>;
+def Rz101 : AMDILReg<101, "r101">, DwarfRegAlias<Rx101>;
+def Rw101 : AMDILReg<101, "r101">, DwarfRegAlias<Rx101>;
+def Rx102 : AMDILReg<102, "r102">, DwarfRegNum<[102]>;
+def Ry102 : AMDILReg<102, "r102">, DwarfRegAlias<Rx102>;
+def Rz102 : AMDILReg<102, "r102">, DwarfRegAlias<Rx102>;
+def Rw102 : AMDILReg<102, "r102">, DwarfRegAlias<Rx102>;
+def Rx103 : AMDILReg<103, "r103">, DwarfRegNum<[103]>;
+def Ry103 : AMDILReg<103, "r103">, DwarfRegAlias<Rx103>;
+def Rz103 : AMDILReg<103, "r103">, DwarfRegAlias<Rx103>;
+def Rw103 : AMDILReg<103, "r103">, DwarfRegAlias<Rx103>;
+def Rx104 : AMDILReg<104, "r104">, DwarfRegNum<[104]>;
+def Ry104 : AMDILReg<104, "r104">, DwarfRegAlias<Rx104>;
+def Rz104 : AMDILReg<104, "r104">, DwarfRegAlias<Rx104>;
+def Rw104 : AMDILReg<104, "r104">, DwarfRegAlias<Rx104>;
+def Rx105 : AMDILReg<105, "r105">, DwarfRegNum<[105]>;
+def Ry105 : AMDILReg<105, "r105">, DwarfRegAlias<Rx105>;
+def Rz105 : AMDILReg<105, "r105">, DwarfRegAlias<Rx105>;
+def Rw105 : AMDILReg<105, "r105">, DwarfRegAlias<Rx105>;
+def Rx106 : AMDILReg<106, "r106">, DwarfRegNum<[106]>;
+def Ry106 : AMDILReg<106, "r106">, DwarfRegAlias<Rx106>;
+def Rz106 : AMDILReg<106, "r106">, DwarfRegAlias<Rx106>;
+def Rw106 : AMDILReg<106, "r106">, DwarfRegAlias<Rx106>;
+def Rx107 : AMDILReg<107, "r107">, DwarfRegNum<[107]>;
+def Ry107 : AMDILReg<107, "r107">, DwarfRegAlias<Rx107>;
+def Rz107 : AMDILReg<107, "r107">, DwarfRegAlias<Rx107>;
+def Rw107 : AMDILReg<107, "r107">, DwarfRegAlias<Rx107>;
+def Rx108 : AMDILReg<108, "r108">, DwarfRegNum<[108]>;
+def Ry108 : AMDILReg<108, "r108">, DwarfRegAlias<Rx108>;
+def Rz108 : AMDILReg<108, "r108">, DwarfRegAlias<Rx108>;
+def Rw108 : AMDILReg<108, "r108">, DwarfRegAlias<Rx108>;
+def Rx109 : AMDILReg<109, "r109">, DwarfRegNum<[109]>;
+def Ry109 : AMDILReg<109, "r109">, DwarfRegAlias<Rx109>;
+def Rz109 : AMDILReg<109, "r109">, DwarfRegAlias<Rx109>;
+def Rw109 : AMDILReg<109, "r109">, DwarfRegAlias<Rx109>;
+def Rx110 : AMDILReg<110, "r110">, DwarfRegNum<[110]>;
+def Ry110 : AMDILReg<110, "r110">, DwarfRegAlias<Rx110>;
+def Rz110 : AMDILReg<110, "r110">, DwarfRegAlias<Rx110>;
+def Rw110 : AMDILReg<110, "r110">, DwarfRegAlias<Rx110>;
+def Rx111 : AMDILReg<111, "r111">, DwarfRegNum<[111]>;
+def Ry111 : AMDILReg<111, "r111">, DwarfRegAlias<Rx111>;
+def Rz111 : AMDILReg<111, "r111">, DwarfRegAlias<Rx111>;
+def Rw111 : AMDILReg<111, "r111">, DwarfRegAlias<Rx111>;
+def Rx112 : AMDILReg<112, "r112">, DwarfRegNum<[112]>;
+def Ry112 : AMDILReg<112, "r112">, DwarfRegAlias<Rx112>;
+def Rz112 : AMDILReg<112, "r112">, DwarfRegAlias<Rx112>;
+def Rw112 : AMDILReg<112, "r112">, DwarfRegAlias<Rx112>;
+def Rx113 : AMDILReg<113, "r113">, DwarfRegNum<[113]>;
+def Ry113 : AMDILReg<113, "r113">, DwarfRegAlias<Rx113>;
+def Rz113 : AMDILReg<113, "r113">, DwarfRegAlias<Rx113>;
+def Rw113 : AMDILReg<113, "r113">, DwarfRegAlias<Rx113>;
+def Rx114 : AMDILReg<114, "r114">, DwarfRegNum<[114]>;
+def Ry114 : AMDILReg<114, "r114">, DwarfRegAlias<Rx114>;
+def Rz114 : AMDILReg<114, "r114">, DwarfRegAlias<Rx114>;
+def Rw114 : AMDILReg<114, "r114">, DwarfRegAlias<Rx114>;
+def Rx115 : AMDILReg<115, "r115">, DwarfRegNum<[115]>;
+def Ry115 : AMDILReg<115, "r115">, DwarfRegAlias<Rx115>;
+def Rz115 : AMDILReg<115, "r115">, DwarfRegAlias<Rx115>;
+def Rw115 : AMDILReg<115, "r115">, DwarfRegAlias<Rx115>;
+def Rx116 : AMDILReg<116, "r116">, DwarfRegNum<[116]>;
+def Ry116 : AMDILReg<116, "r116">, DwarfRegAlias<Rx116>;
+def Rz116 : AMDILReg<116, "r116">, DwarfRegAlias<Rx116>;
+def Rw116 : AMDILReg<116, "r116">, DwarfRegAlias<Rx116>;
+def Rx117 : AMDILReg<117, "r117">, DwarfRegNum<[117]>;
+def Ry117 : AMDILReg<117, "r117">, DwarfRegAlias<Rx117>;
+def Rz117 : AMDILReg<117, "r117">, DwarfRegAlias<Rx117>;
+def Rw117 : AMDILReg<117, "r117">, DwarfRegAlias<Rx117>;
+def Rx118 : AMDILReg<118, "r118">, DwarfRegNum<[118]>;
+def Ry118 : AMDILReg<118, "r118">, DwarfRegAlias<Rx118>;
+def Rz118 : AMDILReg<118, "r118">, DwarfRegAlias<Rx118>;
+def Rw118 : AMDILReg<118, "r118">, DwarfRegAlias<Rx118>;
+def Rx119 : AMDILReg<119, "r119">, DwarfRegNum<[119]>;
+def Ry119 : AMDILReg<119, "r119">, DwarfRegAlias<Rx119>;
+def Rz119 : AMDILReg<119, "r119">, DwarfRegAlias<Rx119>;
+def Rw119 : AMDILReg<119, "r119">, DwarfRegAlias<Rx119>;
+def Rx120 : AMDILReg<120, "r120">, DwarfRegNum<[120]>;
+def Ry120 : AMDILReg<120, "r120">, DwarfRegAlias<Rx120>;
+def Rz120 : AMDILReg<120, "r120">, DwarfRegAlias<Rx120>;
+def Rw120 : AMDILReg<120, "r120">, DwarfRegAlias<Rx120>;
+def Rx121 : AMDILReg<121, "r121">, DwarfRegNum<[121]>;
+def Ry121 : AMDILReg<121, "r121">, DwarfRegAlias<Rx121>;
+def Rz121 : AMDILReg<121, "r121">, DwarfRegAlias<Rx121>;
+def Rw121 : AMDILReg<121, "r121">, DwarfRegAlias<Rx121>;
+def Rx122 : AMDILReg<122, "r122">, DwarfRegNum<[122]>;
+def Ry122 : AMDILReg<122, "r122">, DwarfRegAlias<Rx122>;
+def Rz122 : AMDILReg<122, "r122">, DwarfRegAlias<Rx122>;
+def Rw122 : AMDILReg<122, "r122">, DwarfRegAlias<Rx122>;
+def Rx123 : AMDILReg<123, "r123">, DwarfRegNum<[123]>;
+def Ry123 : AMDILReg<123, "r123">, DwarfRegAlias<Rx123>;
+def Rz123 : AMDILReg<123, "r123">, DwarfRegAlias<Rx123>;
+def Rw123 : AMDILReg<123, "r123">, DwarfRegAlias<Rx123>;
+def Rx124 : AMDILReg<124, "r124">, DwarfRegNum<[124]>;
+def Ry124 : AMDILReg<124, "r124">, DwarfRegAlias<Rx124>;
+def Rz124 : AMDILReg<124, "r124">, DwarfRegAlias<Rx124>;
+def Rw124 : AMDILReg<124, "r124">, DwarfRegAlias<Rx124>;
+def Rx125 : AMDILReg<125, "r125">, DwarfRegNum<[125]>;
+def Ry125 : AMDILReg<125, "r125">, DwarfRegAlias<Rx125>;
+def Rz125 : AMDILReg<125, "r125">, DwarfRegAlias<Rx125>;
+def Rw125 : AMDILReg<125, "r125">, DwarfRegAlias<Rx125>;
+def Rx126 : AMDILReg<126, "r126">, DwarfRegNum<[126]>;
+def Ry126 : AMDILReg<126, "r126">, DwarfRegAlias<Rx126>;
+def Rz126 : AMDILReg<126, "r126">, DwarfRegAlias<Rx126>;
+def Rw126 : AMDILReg<126, "r126">, DwarfRegAlias<Rx126>;
+def Rx127 : AMDILReg<127, "r127">, DwarfRegNum<[127]>;
+def Ry127 : AMDILReg<127, "r127">, DwarfRegAlias<Rx127>;
+def Rz127 : AMDILReg<127, "r127">, DwarfRegAlias<Rx127>;
+def Rw127 : AMDILReg<127, "r127">, DwarfRegAlias<Rx127>;
+def Rx128 : AMDILReg<128, "r128">, DwarfRegNum<[128]>;
+def Ry128 : AMDILReg<128, "r128">, DwarfRegAlias<Rx128>;
+def Rz128 : AMDILReg<128, "r128">, DwarfRegAlias<Rx128>;
+def Rw128 : AMDILReg<128, "r128">, DwarfRegAlias<Rx128>;
+def Rx129 : AMDILReg<129, "r129">, DwarfRegNum<[129]>;
+def Ry129 : AMDILReg<129, "r129">, DwarfRegAlias<Rx129>;
+def Rz129 : AMDILReg<129, "r129">, DwarfRegAlias<Rx129>;
+def Rw129 : AMDILReg<129, "r129">, DwarfRegAlias<Rx129>;
+def Rx130 : AMDILReg<130, "r130">, DwarfRegNum<[130]>;
+def Ry130 : AMDILReg<130, "r130">, DwarfRegAlias<Rx130>;
+def Rz130 : AMDILReg<130, "r130">, DwarfRegAlias<Rx130>;
+def Rw130 : AMDILReg<130, "r130">, DwarfRegAlias<Rx130>;
+def Rx131 : AMDILReg<131, "r131">, DwarfRegNum<[131]>;
+def Ry131 : AMDILReg<131, "r131">, DwarfRegAlias<Rx131>;
+def Rz131 : AMDILReg<131, "r131">, DwarfRegAlias<Rx131>;
+def Rw131 : AMDILReg<131, "r131">, DwarfRegAlias<Rx131>;
+def Rx132 : AMDILReg<132, "r132">, DwarfRegNum<[132]>;
+def Ry132 : AMDILReg<132, "r132">, DwarfRegAlias<Rx132>;
+def Rz132 : AMDILReg<132, "r132">, DwarfRegAlias<Rx132>;
+def Rw132 : AMDILReg<132, "r132">, DwarfRegAlias<Rx132>;
+def Rx133 : AMDILReg<133, "r133">, DwarfRegNum<[133]>;
+def Ry133 : AMDILReg<133, "r133">, DwarfRegAlias<Rx133>;
+def Rz133 : AMDILReg<133, "r133">, DwarfRegAlias<Rx133>;
+def Rw133 : AMDILReg<133, "r133">, DwarfRegAlias<Rx133>;
+def Rx134 : AMDILReg<134, "r134">, DwarfRegNum<[134]>;
+def Ry134 : AMDILReg<134, "r134">, DwarfRegAlias<Rx134>;
+def Rz134 : AMDILReg<134, "r134">, DwarfRegAlias<Rx134>;
+def Rw134 : AMDILReg<134, "r134">, DwarfRegAlias<Rx134>;
+def Rx135 : AMDILReg<135, "r135">, DwarfRegNum<[135]>;
+def Ry135 : AMDILReg<135, "r135">, DwarfRegAlias<Rx135>;
+def Rz135 : AMDILReg<135, "r135">, DwarfRegAlias<Rx135>;
+def Rw135 : AMDILReg<135, "r135">, DwarfRegAlias<Rx135>;
+def Rx136 : AMDILReg<136, "r136">, DwarfRegNum<[136]>;
+def Ry136 : AMDILReg<136, "r136">, DwarfRegAlias<Rx136>;
+def Rz136 : AMDILReg<136, "r136">, DwarfRegAlias<Rx136>;
+def Rw136 : AMDILReg<136, "r136">, DwarfRegAlias<Rx136>;
+def Rx137 : AMDILReg<137, "r137">, DwarfRegNum<[137]>;
+def Ry137 : AMDILReg<137, "r137">, DwarfRegAlias<Rx137>;
+def Rz137 : AMDILReg<137, "r137">, DwarfRegAlias<Rx137>;
+def Rw137 : AMDILReg<137, "r137">, DwarfRegAlias<Rx137>;
+def Rx138 : AMDILReg<138, "r138">, DwarfRegNum<[138]>;
+def Ry138 : AMDILReg<138, "r138">, DwarfRegAlias<Rx138>;
+def Rz138 : AMDILReg<138, "r138">, DwarfRegAlias<Rx138>;
+def Rw138 : AMDILReg<138, "r138">, DwarfRegAlias<Rx138>;
+def Rx139 : AMDILReg<139, "r139">, DwarfRegNum<[139]>;
+def Ry139 : AMDILReg<139, "r139">, DwarfRegAlias<Rx139>;
+def Rz139 : AMDILReg<139, "r139">, DwarfRegAlias<Rx139>;
+def Rw139 : AMDILReg<139, "r139">, DwarfRegAlias<Rx139>;
+def Rx140 : AMDILReg<140, "r140">, DwarfRegNum<[140]>;
+def Ry140 : AMDILReg<140, "r140">, DwarfRegAlias<Rx140>;
+def Rz140 : AMDILReg<140, "r140">, DwarfRegAlias<Rx140>;
+def Rw140 : AMDILReg<140, "r140">, DwarfRegAlias<Rx140>;
+def Rx141 : AMDILReg<141, "r141">, DwarfRegNum<[141]>;
+def Ry141 : AMDILReg<141, "r141">, DwarfRegAlias<Rx141>;
+def Rz141 : AMDILReg<141, "r141">, DwarfRegAlias<Rx141>;
+def Rw141 : AMDILReg<141, "r141">, DwarfRegAlias<Rx141>;
+def Rx142 : AMDILReg<142, "r142">, DwarfRegNum<[142]>;
+def Ry142 : AMDILReg<142, "r142">, DwarfRegAlias<Rx142>;
+def Rz142 : AMDILReg<142, "r142">, DwarfRegAlias<Rx142>;
+def Rw142 : AMDILReg<142, "r142">, DwarfRegAlias<Rx142>;
+def Rx143 : AMDILReg<143, "r143">, DwarfRegNum<[143]>;
+def Ry143 : AMDILReg<143, "r143">, DwarfRegAlias<Rx143>;
+def Rz143 : AMDILReg<143, "r143">, DwarfRegAlias<Rx143>;
+def Rw143 : AMDILReg<143, "r143">, DwarfRegAlias<Rx143>;
+def Rx144 : AMDILReg<144, "r144">, DwarfRegNum<[144]>;
+def Ry144 : AMDILReg<144, "r144">, DwarfRegAlias<Rx144>;
+def Rz144 : AMDILReg<144, "r144">, DwarfRegAlias<Rx144>;
+def Rw144 : AMDILReg<144, "r144">, DwarfRegAlias<Rx144>;
+def Rx145 : AMDILReg<145, "r145">, DwarfRegNum<[145]>;
+def Ry145 : AMDILReg<145, "r145">, DwarfRegAlias<Rx145>;
+def Rz145 : AMDILReg<145, "r145">, DwarfRegAlias<Rx145>;
+def Rw145 : AMDILReg<145, "r145">, DwarfRegAlias<Rx145>;
+def Rx146 : AMDILReg<146, "r146">, DwarfRegNum<[146]>;
+def Ry146 : AMDILReg<146, "r146">, DwarfRegAlias<Rx146>;
+def Rz146 : AMDILReg<146, "r146">, DwarfRegAlias<Rx146>;
+def Rw146 : AMDILReg<146, "r146">, DwarfRegAlias<Rx146>;
+def Rx147 : AMDILReg<147, "r147">, DwarfRegNum<[147]>;
+def Ry147 : AMDILReg<147, "r147">, DwarfRegAlias<Rx147>;
+def Rz147 : AMDILReg<147, "r147">, DwarfRegAlias<Rx147>;
+def Rw147 : AMDILReg<147, "r147">, DwarfRegAlias<Rx147>;
+def Rx148 : AMDILReg<148, "r148">, DwarfRegNum<[148]>;
+def Ry148 : AMDILReg<148, "r148">, DwarfRegAlias<Rx148>;
+def Rz148 : AMDILReg<148, "r148">, DwarfRegAlias<Rx148>;
+def Rw148 : AMDILReg<148, "r148">, DwarfRegAlias<Rx148>;
+def Rx149 : AMDILReg<149, "r149">, DwarfRegNum<[149]>;
+def Ry149 : AMDILReg<149, "r149">, DwarfRegAlias<Rx149>;
+def Rz149 : AMDILReg<149, "r149">, DwarfRegAlias<Rx149>;
+def Rw149 : AMDILReg<149, "r149">, DwarfRegAlias<Rx149>;
+def Rx150 : AMDILReg<150, "r150">, DwarfRegNum<[150]>;
+def Ry150 : AMDILReg<150, "r150">, DwarfRegAlias<Rx150>;
+def Rz150 : AMDILReg<150, "r150">, DwarfRegAlias<Rx150>;
+def Rw150 : AMDILReg<150, "r150">, DwarfRegAlias<Rx150>;
+def Rx151 : AMDILReg<151, "r151">, DwarfRegNum<[151]>;
+def Ry151 : AMDILReg<151, "r151">, DwarfRegAlias<Rx151>;
+def Rz151 : AMDILReg<151, "r151">, DwarfRegAlias<Rx151>;
+def Rw151 : AMDILReg<151, "r151">, DwarfRegAlias<Rx151>;
+def Rx152 : AMDILReg<152, "r152">, DwarfRegNum<[152]>;
+def Ry152 : AMDILReg<152, "r152">, DwarfRegAlias<Rx152>;
+def Rz152 : AMDILReg<152, "r152">, DwarfRegAlias<Rx152>;
+def Rw152 : AMDILReg<152, "r152">, DwarfRegAlias<Rx152>;
+def Rx153 : AMDILReg<153, "r153">, DwarfRegNum<[153]>;
+def Ry153 : AMDILReg<153, "r153">, DwarfRegAlias<Rx153>;
+def Rz153 : AMDILReg<153, "r153">, DwarfRegAlias<Rx153>;
+def Rw153 : AMDILReg<153, "r153">, DwarfRegAlias<Rx153>;
+def Rx154 : AMDILReg<154, "r154">, DwarfRegNum<[154]>;
+def Ry154 : AMDILReg<154, "r154">, DwarfRegAlias<Rx154>;
+def Rz154 : AMDILReg<154, "r154">, DwarfRegAlias<Rx154>;
+def Rw154 : AMDILReg<154, "r154">, DwarfRegAlias<Rx154>;
+def Rx155 : AMDILReg<155, "r155">, DwarfRegNum<[155]>;
+def Ry155 : AMDILReg<155, "r155">, DwarfRegAlias<Rx155>;
+def Rz155 : AMDILReg<155, "r155">, DwarfRegAlias<Rx155>;
+def Rw155 : AMDILReg<155, "r155">, DwarfRegAlias<Rx155>;
+def Rx156 : AMDILReg<156, "r156">, DwarfRegNum<[156]>;
+def Ry156 : AMDILReg<156, "r156">, DwarfRegAlias<Rx156>;
+def Rz156 : AMDILReg<156, "r156">, DwarfRegAlias<Rx156>;
+def Rw156 : AMDILReg<156, "r156">, DwarfRegAlias<Rx156>;
+def Rx157 : AMDILReg<157, "r157">, DwarfRegNum<[157]>;
+def Ry157 : AMDILReg<157, "r157">, DwarfRegAlias<Rx157>;
+def Rz157 : AMDILReg<157, "r157">, DwarfRegAlias<Rx157>;
+def Rw157 : AMDILReg<157, "r157">, DwarfRegAlias<Rx157>;
+def Rx158 : AMDILReg<158, "r158">, DwarfRegNum<[158]>;
+def Ry158 : AMDILReg<158, "r158">, DwarfRegAlias<Rx158>;
+def Rz158 : AMDILReg<158, "r158">, DwarfRegAlias<Rx158>;
+def Rw158 : AMDILReg<158, "r158">, DwarfRegAlias<Rx158>;
+def Rx159 : AMDILReg<159, "r159">, DwarfRegNum<[159]>;
+def Ry159 : AMDILReg<159, "r159">, DwarfRegAlias<Rx159>;
+def Rz159 : AMDILReg<159, "r159">, DwarfRegAlias<Rx159>;
+def Rw159 : AMDILReg<159, "r159">, DwarfRegAlias<Rx159>;
+def Rx160 : AMDILReg<160, "r160">, DwarfRegNum<[160]>;
+def Ry160 : AMDILReg<160, "r160">, DwarfRegAlias<Rx160>;
+def Rz160 : AMDILReg<160, "r160">, DwarfRegAlias<Rx160>;
+def Rw160 : AMDILReg<160, "r160">, DwarfRegAlias<Rx160>;
+def Rx161 : AMDILReg<161, "r161">, DwarfRegNum<[161]>;
+def Ry161 : AMDILReg<161, "r161">, DwarfRegAlias<Rx161>;
+def Rz161 : AMDILReg<161, "r161">, DwarfRegAlias<Rx161>;
+def Rw161 : AMDILReg<161, "r161">, DwarfRegAlias<Rx161>;
+def Rx162 : AMDILReg<162, "r162">, DwarfRegNum<[162]>;
+def Ry162 : AMDILReg<162, "r162">, DwarfRegAlias<Rx162>;
+def Rz162 : AMDILReg<162, "r162">, DwarfRegAlias<Rx162>;
+def Rw162 : AMDILReg<162, "r162">, DwarfRegAlias<Rx162>;
+def Rx163 : AMDILReg<163, "r163">, DwarfRegNum<[163]>;
+def Ry163 : AMDILReg<163, "r163">, DwarfRegAlias<Rx163>;
+def Rz163 : AMDILReg<163, "r163">, DwarfRegAlias<Rx163>;
+def Rw163 : AMDILReg<163, "r163">, DwarfRegAlias<Rx163>;
+def Rx164 : AMDILReg<164, "r164">, DwarfRegNum<[164]>;
+def Ry164 : AMDILReg<164, "r164">, DwarfRegAlias<Rx164>;
+def Rz164 : AMDILReg<164, "r164">, DwarfRegAlias<Rx164>;
+def Rw164 : AMDILReg<164, "r164">, DwarfRegAlias<Rx164>;
+def Rx165 : AMDILReg<165, "r165">, DwarfRegNum<[165]>;
+def Ry165 : AMDILReg<165, "r165">, DwarfRegAlias<Rx165>;
+def Rz165 : AMDILReg<165, "r165">, DwarfRegAlias<Rx165>;
+def Rw165 : AMDILReg<165, "r165">, DwarfRegAlias<Rx165>;
+def Rx166 : AMDILReg<166, "r166">, DwarfRegNum<[166]>;
+def Ry166 : AMDILReg<166, "r166">, DwarfRegAlias<Rx166>;
+def Rz166 : AMDILReg<166, "r166">, DwarfRegAlias<Rx166>;
+def Rw166 : AMDILReg<166, "r166">, DwarfRegAlias<Rx166>;
+def Rx167 : AMDILReg<167, "r167">, DwarfRegNum<[167]>;
+def Ry167 : AMDILReg<167, "r167">, DwarfRegAlias<Rx167>;
+def Rz167 : AMDILReg<167, "r167">, DwarfRegAlias<Rx167>;
+def Rw167 : AMDILReg<167, "r167">, DwarfRegAlias<Rx167>;
+def Rx168 : AMDILReg<168, "r168">, DwarfRegNum<[168]>;
+def Ry168 : AMDILReg<168, "r168">, DwarfRegAlias<Rx168>;
+def Rz168 : AMDILReg<168, "r168">, DwarfRegAlias<Rx168>;
+def Rw168 : AMDILReg<168, "r168">, DwarfRegAlias<Rx168>;
+def Rx169 : AMDILReg<169, "r169">, DwarfRegNum<[169]>;
+def Ry169 : AMDILReg<169, "r169">, DwarfRegAlias<Rx169>;
+def Rz169 : AMDILReg<169, "r169">, DwarfRegAlias<Rx169>;
+def Rw169 : AMDILReg<169, "r169">, DwarfRegAlias<Rx169>;
+def Rx170 : AMDILReg<170, "r170">, DwarfRegNum<[170]>;
+def Ry170 : AMDILReg<170, "r170">, DwarfRegAlias<Rx170>;
+def Rz170 : AMDILReg<170, "r170">, DwarfRegAlias<Rx170>;
+def Rw170 : AMDILReg<170, "r170">, DwarfRegAlias<Rx170>;
+def Rx171 : AMDILReg<171, "r171">, DwarfRegNum<[171]>;
+def Ry171 : AMDILReg<171, "r171">, DwarfRegAlias<Rx171>;
+def Rz171 : AMDILReg<171, "r171">, DwarfRegAlias<Rx171>;
+def Rw171 : AMDILReg<171, "r171">, DwarfRegAlias<Rx171>;
+def Rx172 : AMDILReg<172, "r172">, DwarfRegNum<[172]>;
+def Ry172 : AMDILReg<172, "r172">, DwarfRegAlias<Rx172>;
+def Rz172 : AMDILReg<172, "r172">, DwarfRegAlias<Rx172>;
+def Rw172 : AMDILReg<172, "r172">, DwarfRegAlias<Rx172>;
+def Rx173 : AMDILReg<173, "r173">, DwarfRegNum<[173]>;
+def Ry173 : AMDILReg<173, "r173">, DwarfRegAlias<Rx173>;
+def Rz173 : AMDILReg<173, "r173">, DwarfRegAlias<Rx173>;
+def Rw173 : AMDILReg<173, "r173">, DwarfRegAlias<Rx173>;
+def Rx174 : AMDILReg<174, "r174">, DwarfRegNum<[174]>;
+def Ry174 : AMDILReg<174, "r174">, DwarfRegAlias<Rx174>;
+def Rz174 : AMDILReg<174, "r174">, DwarfRegAlias<Rx174>;
+def Rw174 : AMDILReg<174, "r174">, DwarfRegAlias<Rx174>;
+def Rx175 : AMDILReg<175, "r175">, DwarfRegNum<[175]>;
+def Ry175 : AMDILReg<175, "r175">, DwarfRegAlias<Rx175>;
+def Rz175 : AMDILReg<175, "r175">, DwarfRegAlias<Rx175>;
+def Rw175 : AMDILReg<175, "r175">, DwarfRegAlias<Rx175>;
+def Rx176 : AMDILReg<176, "r176">, DwarfRegNum<[176]>;
+def Ry176 : AMDILReg<176, "r176">, DwarfRegAlias<Rx176>;
+def Rz176 : AMDILReg<176, "r176">, DwarfRegAlias<Rx176>;
+def Rw176 : AMDILReg<176, "r176">, DwarfRegAlias<Rx176>;
+def Rx177 : AMDILReg<177, "r177">, DwarfRegNum<[177]>;
+def Ry177 : AMDILReg<177, "r177">, DwarfRegAlias<Rx177>;
+def Rz177 : AMDILReg<177, "r177">, DwarfRegAlias<Rx177>;
+def Rw177 : AMDILReg<177, "r177">, DwarfRegAlias<Rx177>;
+def Rx178 : AMDILReg<178, "r178">, DwarfRegNum<[178]>;
+def Ry178 : AMDILReg<178, "r178">, DwarfRegAlias<Rx178>;
+def Rz178 : AMDILReg<178, "r178">, DwarfRegAlias<Rx178>;
+def Rw178 : AMDILReg<178, "r178">, DwarfRegAlias<Rx178>;
+def Rx179 : AMDILReg<179, "r179">, DwarfRegNum<[179]>;
+def Ry179 : AMDILReg<179, "r179">, DwarfRegAlias<Rx179>;
+def Rz179 : AMDILReg<179, "r179">, DwarfRegAlias<Rx179>;
+def Rw179 : AMDILReg<179, "r179">, DwarfRegAlias<Rx179>;
+def Rx180 : AMDILReg<180, "r180">, DwarfRegNum<[180]>;
+def Ry180 : AMDILReg<180, "r180">, DwarfRegAlias<Rx180>;
+def Rz180 : AMDILReg<180, "r180">, DwarfRegAlias<Rx180>;
+def Rw180 : AMDILReg<180, "r180">, DwarfRegAlias<Rx180>;
+def Rx181 : AMDILReg<181, "r181">, DwarfRegNum<[181]>;
+def Ry181 : AMDILReg<181, "r181">, DwarfRegAlias<Rx181>;
+def Rz181 : AMDILReg<181, "r181">, DwarfRegAlias<Rx181>;
+def Rw181 : AMDILReg<181, "r181">, DwarfRegAlias<Rx181>;
+def Rx182 : AMDILReg<182, "r182">, DwarfRegNum<[182]>;
+def Ry182 : AMDILReg<182, "r182">, DwarfRegAlias<Rx182>;
+def Rz182 : AMDILReg<182, "r182">, DwarfRegAlias<Rx182>;
+def Rw182 : AMDILReg<182, "r182">, DwarfRegAlias<Rx182>;
+def Rx183 : AMDILReg<183, "r183">, DwarfRegNum<[183]>;
+def Ry183 : AMDILReg<183, "r183">, DwarfRegAlias<Rx183>;
+def Rz183 : AMDILReg<183, "r183">, DwarfRegAlias<Rx183>;
+def Rw183 : AMDILReg<183, "r183">, DwarfRegAlias<Rx183>;
+def Rx184 : AMDILReg<184, "r184">, DwarfRegNum<[184]>;
+def Ry184 : AMDILReg<184, "r184">, DwarfRegAlias<Rx184>;
+def Rz184 : AMDILReg<184, "r184">, DwarfRegAlias<Rx184>;
+def Rw184 : AMDILReg<184, "r184">, DwarfRegAlias<Rx184>;
+def Rx185 : AMDILReg<185, "r185">, DwarfRegNum<[185]>;
+def Ry185 : AMDILReg<185, "r185">, DwarfRegAlias<Rx185>;
+def Rz185 : AMDILReg<185, "r185">, DwarfRegAlias<Rx185>;
+def Rw185 : AMDILReg<185, "r185">, DwarfRegAlias<Rx185>;
+def Rx186 : AMDILReg<186, "r186">, DwarfRegNum<[186]>;
+def Ry186 : AMDILReg<186, "r186">, DwarfRegAlias<Rx186>;
+def Rz186 : AMDILReg<186, "r186">, DwarfRegAlias<Rx186>;
+def Rw186 : AMDILReg<186, "r186">, DwarfRegAlias<Rx186>;
+def Rx187 : AMDILReg<187, "r187">, DwarfRegNum<[187]>;
+def Ry187 : AMDILReg<187, "r187">, DwarfRegAlias<Rx187>;
+def Rz187 : AMDILReg<187, "r187">, DwarfRegAlias<Rx187>;
+def Rw187 : AMDILReg<187, "r187">, DwarfRegAlias<Rx187>;
+def Rx188 : AMDILReg<188, "r188">, DwarfRegNum<[188]>;
+def Ry188 : AMDILReg<188, "r188">, DwarfRegAlias<Rx188>;
+def Rz188 : AMDILReg<188, "r188">, DwarfRegAlias<Rx188>;
+def Rw188 : AMDILReg<188, "r188">, DwarfRegAlias<Rx188>;
+def Rx189 : AMDILReg<189, "r189">, DwarfRegNum<[189]>;
+def Ry189 : AMDILReg<189, "r189">, DwarfRegAlias<Rx189>;
+def Rz189 : AMDILReg<189, "r189">, DwarfRegAlias<Rx189>;
+def Rw189 : AMDILReg<189, "r189">, DwarfRegAlias<Rx189>;
+def Rx190 : AMDILReg<190, "r190">, DwarfRegNum<[190]>;
+def Ry190 : AMDILReg<190, "r190">, DwarfRegAlias<Rx190>;
+def Rz190 : AMDILReg<190, "r190">, DwarfRegAlias<Rx190>;
+def Rw190 : AMDILReg<190, "r190">, DwarfRegAlias<Rx190>;
+def Rx191 : AMDILReg<191, "r191">, DwarfRegNum<[191]>;
+def Ry191 : AMDILReg<191, "r191">, DwarfRegAlias<Rx191>;
+def Rz191 : AMDILReg<191, "r191">, DwarfRegAlias<Rx191>;
+def Rw191 : AMDILReg<191, "r191">, DwarfRegAlias<Rx191>;
+def Rx1000 : AMDILReg<1000, "r1000">, DwarfRegNum<[1000]>;
+def Ry1000 : AMDILReg<1000, "r1000">, DwarfRegAlias<Rx1000>;
+def Rz1000 : AMDILReg<1000, "r1000">, DwarfRegAlias<Rx1000>;
+def Rw1000 : AMDILReg<1000, "r1000">, DwarfRegAlias<Rx1000>;
+def Rx1001 : AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def Ry1001 : AMDILReg<1001, "r1001">, DwarfRegAlias<Rx1001>;
+def Rz1001 : AMDILReg<1001, "r1001">, DwarfRegAlias<Rx1001>;
+def Rw1001 : AMDILReg<1001, "r1001">, DwarfRegAlias<Rx1001>;
+def Rx1002 : AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def Ry1002 : AMDILReg<1002, "r1002">, DwarfRegAlias<Rx1002>;
+def Rz1002 : AMDILReg<1002, "r1002">, DwarfRegAlias<Rx1002>;
+def Rw1002 : AMDILReg<1002, "r1002">, DwarfRegAlias<Rx1002>;
+def Rx1003 : AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def Ry1003 : AMDILReg<1003, "r1003">, DwarfRegAlias<Rx1003>;
+def Rz1003 : AMDILReg<1003, "r1003">, DwarfRegAlias<Rx1003>;
+def Rw1003 : AMDILReg<1003, "r1003">, DwarfRegAlias<Rx1003>;
+def Rx1004 : AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def Ry1004 : AMDILReg<1004, "r1004">, DwarfRegAlias<Rx1004>;
+def Rz1004 : AMDILReg<1004, "r1004">, DwarfRegAlias<Rx1004>;
+def Rw1004 : AMDILReg<1004, "r1004">, DwarfRegAlias<Rx1004>;
+def Rx1005 : AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def Ry1005 : AMDILReg<1005, "r1005">, DwarfRegAlias<Rx1005>;
+def Rz1005 : AMDILReg<1005, "r1005">, DwarfRegAlias<Rx1005>;
+def Rw1005 : AMDILReg<1005, "r1005">, DwarfRegAlias<Rx1005>;
+def Rx1006 : AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def Ry1006 : AMDILReg<1006, "r1006">, DwarfRegAlias<Rx1006>;
+def Rz1006 : AMDILReg<1006, "r1006">, DwarfRegAlias<Rx1006>;
+def Rw1006 : AMDILReg<1006, "r1006">, DwarfRegAlias<Rx1006>;
+def Rx1007 : AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def Ry1007 : AMDILReg<1007, "r1007">, DwarfRegAlias<Rx1007>;
+def Rz1007 : AMDILReg<1007, "r1007">, DwarfRegAlias<Rx1007>;
+def Rw1007 : AMDILReg<1007, "r1007">, DwarfRegAlias<Rx1007>;
+def Rx1008 : AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def Ry1008 : AMDILReg<1008, "r1008">, DwarfRegAlias<Rx1008>;
+def Rz1008 : AMDILReg<1008, "r1008">, DwarfRegAlias<Rx1008>;
+def Rw1008 : AMDILReg<1008, "r1008">, DwarfRegAlias<Rx1008>;
+def Rx1009 : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def Ry1009 : AMDILReg<1009, "r1009">, DwarfRegAlias<Rx1009>;
+def Rz1009 : AMDILReg<1009, "r1009">, DwarfRegAlias<Rx1009>;
+def Rw1009 : AMDILReg<1009, "r1009">, DwarfRegAlias<Rx1009>;
+def Rx1010 : AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def Ry1010 : AMDILReg<1010, "r1010">, DwarfRegAlias<Rx1010>;
+def Rz1010 : AMDILReg<1010, "r1010">, DwarfRegAlias<Rx1010>;
+def Rw1010 : AMDILReg<1010, "r1010">, DwarfRegAlias<Rx1010>;
+def Rx1011 : AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def Ry1011 : AMDILReg<1011, "r1011">, DwarfRegAlias<Rx1011>;
+def Rz1011 : AMDILReg<1011, "r1011">, DwarfRegAlias<Rx1011>;
+def Rw1011 : AMDILReg<1011, "r1011">, DwarfRegAlias<Rx1011>;
+def Rx1012 : AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def Ry1012 : AMDILReg<1012, "r1012">, DwarfRegAlias<Rx1012>;
+def Rz1012 : AMDILReg<1012, "r1012">, DwarfRegAlias<Rx1012>;
+def Rw1012 : AMDILReg<1012, "r1012">, DwarfRegAlias<Rx1012>;

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV2.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV2.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV2.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,421 @@
+//===-- AMDILRegisterDefsV2.td --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def Rxy1 : AMDILRegWithSubReg<1, "r1", [Rx1, Ry1], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1>;
+def Rzw1 : AMDILRegWithSubReg<1, "r1", [Rz1, Rw1], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1>;
+def Rxy2 : AMDILRegWithSubReg<2, "r2", [Rx2, Ry2], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx2>;
+def Rzw2 : AMDILRegWithSubReg<2, "r2", [Rz2, Rw2], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx2>;
+def Rxy3 : AMDILRegWithSubReg<3, "r3", [Rx3, Ry3], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx3>;
+def Rzw3 : AMDILRegWithSubReg<3, "r3", [Rz3, Rw3], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx3>;
+def Rxy4 : AMDILRegWithSubReg<4, "r4", [Rx4, Ry4], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx4>;
+def Rzw4 : AMDILRegWithSubReg<4, "r4", [Rz4, Rw4], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx4>;
+def Rxy5 : AMDILRegWithSubReg<5, "r5", [Rx5, Ry5], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx5>;
+def Rzw5 : AMDILRegWithSubReg<5, "r5", [Rz5, Rw5], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx5>;
+def Rxy6 : AMDILRegWithSubReg<6, "r6", [Rx6, Ry6], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx6>;
+def Rzw6 : AMDILRegWithSubReg<6, "r6", [Rz6, Rw6], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx6>;
+def Rxy7 : AMDILRegWithSubReg<7, "r7", [Rx7, Ry7], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx7>;
+def Rzw7 : AMDILRegWithSubReg<7, "r7", [Rz7, Rw7], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx7>;
+def Rxy8 : AMDILRegWithSubReg<8, "r8", [Rx8, Ry8], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx8>;
+def Rzw8 : AMDILRegWithSubReg<8, "r8", [Rz8, Rw8], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx8>;
+def Rxy9 : AMDILRegWithSubReg<9, "r9", [Rx9, Ry9], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx9>;
+def Rzw9 : AMDILRegWithSubReg<9, "r9", [Rz9, Rw9], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx9>;
+def Rxy10 : AMDILRegWithSubReg<10, "r10", [Rx10, Ry10], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx10>;
+def Rzw10 : AMDILRegWithSubReg<10, "r10", [Rz10, Rw10], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx10>;
+def Rxy11 : AMDILRegWithSubReg<11, "r11", [Rx11, Ry11], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx11>;
+def Rzw11 : AMDILRegWithSubReg<11, "r11", [Rz11, Rw11], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx11>;
+def Rxy12 : AMDILRegWithSubReg<12, "r12", [Rx12, Ry12], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx12>;
+def Rzw12 : AMDILRegWithSubReg<12, "r12", [Rz12, Rw12], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx12>;
+def Rxy13 : AMDILRegWithSubReg<13, "r13", [Rx13, Ry13], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx13>;
+def Rzw13 : AMDILRegWithSubReg<13, "r13", [Rz13, Rw13], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx13>;
+def Rxy14 : AMDILRegWithSubReg<14, "r14", [Rx14, Ry14], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx14>;
+def Rzw14 : AMDILRegWithSubReg<14, "r14", [Rz14, Rw14], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx14>;
+def Rxy15 : AMDILRegWithSubReg<15, "r15", [Rx15, Ry15], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx15>;
+def Rzw15 : AMDILRegWithSubReg<15, "r15", [Rz15, Rw15], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx15>;
+def Rxy16 : AMDILRegWithSubReg<16, "r16", [Rx16, Ry16], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx16>;
+def Rzw16 : AMDILRegWithSubReg<16, "r16", [Rz16, Rw16], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx16>;
+def Rxy17 : AMDILRegWithSubReg<17, "r17", [Rx17, Ry17], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx17>;
+def Rzw17 : AMDILRegWithSubReg<17, "r17", [Rz17, Rw17], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx17>;
+def Rxy18 : AMDILRegWithSubReg<18, "r18", [Rx18, Ry18], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx18>;
+def Rzw18 : AMDILRegWithSubReg<18, "r18", [Rz18, Rw18], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx18>;
+def Rxy19 : AMDILRegWithSubReg<19, "r19", [Rx19, Ry19], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx19>;
+def Rzw19 : AMDILRegWithSubReg<19, "r19", [Rz19, Rw19], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx19>;
+def Rxy20 : AMDILRegWithSubReg<20, "r20", [Rx20, Ry20], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx20>;
+def Rzw20 : AMDILRegWithSubReg<20, "r20", [Rz20, Rw20], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx20>;
+def Rxy21 : AMDILRegWithSubReg<21, "r21", [Rx21, Ry21], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx21>;
+def Rzw21 : AMDILRegWithSubReg<21, "r21", [Rz21, Rw21], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx21>;
+def Rxy22 : AMDILRegWithSubReg<22, "r22", [Rx22, Ry22], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx22>;
+def Rzw22 : AMDILRegWithSubReg<22, "r22", [Rz22, Rw22], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx22>;
+def Rxy23 : AMDILRegWithSubReg<23, "r23", [Rx23, Ry23], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx23>;
+def Rzw23 : AMDILRegWithSubReg<23, "r23", [Rz23, Rw23], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx23>;
+def Rxy24 : AMDILRegWithSubReg<24, "r24", [Rx24, Ry24], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx24>;
+def Rzw24 : AMDILRegWithSubReg<24, "r24", [Rz24, Rw24], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx24>;
+def Rxy25 : AMDILRegWithSubReg<25, "r25", [Rx25, Ry25], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx25>;
+def Rzw25 : AMDILRegWithSubReg<25, "r25", [Rz25, Rw25], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx25>;
+def Rxy26 : AMDILRegWithSubReg<26, "r26", [Rx26, Ry26], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx26>;
+def Rzw26 : AMDILRegWithSubReg<26, "r26", [Rz26, Rw26], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx26>;
+def Rxy27 : AMDILRegWithSubReg<27, "r27", [Rx27, Ry27], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx27>;
+def Rzw27 : AMDILRegWithSubReg<27, "r27", [Rz27, Rw27], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx27>;
+def Rxy28 : AMDILRegWithSubReg<28, "r28", [Rx28, Ry28], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx28>;
+def Rzw28 : AMDILRegWithSubReg<28, "r28", [Rz28, Rw28], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx28>;
+def Rxy29 : AMDILRegWithSubReg<29, "r29", [Rx29, Ry29], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx29>;
+def Rzw29 : AMDILRegWithSubReg<29, "r29", [Rz29, Rw29], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx29>;
+def Rxy30 : AMDILRegWithSubReg<30, "r30", [Rx30, Ry30], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx30>;
+def Rzw30 : AMDILRegWithSubReg<30, "r30", [Rz30, Rw30], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx30>;
+def Rxy31 : AMDILRegWithSubReg<31, "r31", [Rx31, Ry31], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx31>;
+def Rzw31 : AMDILRegWithSubReg<31, "r31", [Rz31, Rw31], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx31>;
+def Rxy32 : AMDILRegWithSubReg<32, "r32", [Rx32, Ry32], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx32>;
+def Rzw32 : AMDILRegWithSubReg<32, "r32", [Rz32, Rw32], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx32>;
+def Rxy33 : AMDILRegWithSubReg<33, "r33", [Rx33, Ry33], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx33>;
+def Rzw33 : AMDILRegWithSubReg<33, "r33", [Rz33, Rw33], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx33>;
+def Rxy34 : AMDILRegWithSubReg<34, "r34", [Rx34, Ry34], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx34>;
+def Rzw34 : AMDILRegWithSubReg<34, "r34", [Rz34, Rw34], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx34>;
+def Rxy35 : AMDILRegWithSubReg<35, "r35", [Rx35, Ry35], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx35>;
+def Rzw35 : AMDILRegWithSubReg<35, "r35", [Rz35, Rw35], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx35>;
+def Rxy36 : AMDILRegWithSubReg<36, "r36", [Rx36, Ry36], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx36>;
+def Rzw36 : AMDILRegWithSubReg<36, "r36", [Rz36, Rw36], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx36>;
+def Rxy37 : AMDILRegWithSubReg<37, "r37", [Rx37, Ry37], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx37>;
+def Rzw37 : AMDILRegWithSubReg<37, "r37", [Rz37, Rw37], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx37>;
+def Rxy38 : AMDILRegWithSubReg<38, "r38", [Rx38, Ry38], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx38>;
+def Rzw38 : AMDILRegWithSubReg<38, "r38", [Rz38, Rw38], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx38>;
+def Rxy39 : AMDILRegWithSubReg<39, "r39", [Rx39, Ry39], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx39>;
+def Rzw39 : AMDILRegWithSubReg<39, "r39", [Rz39, Rw39], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx39>;
+def Rxy40 : AMDILRegWithSubReg<40, "r40", [Rx40, Ry40], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx40>;
+def Rzw40 : AMDILRegWithSubReg<40, "r40", [Rz40, Rw40], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx40>;
+def Rxy41 : AMDILRegWithSubReg<41, "r41", [Rx41, Ry41], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx41>;
+def Rzw41 : AMDILRegWithSubReg<41, "r41", [Rz41, Rw41], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx41>;
+def Rxy42 : AMDILRegWithSubReg<42, "r42", [Rx42, Ry42], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx42>;
+def Rzw42 : AMDILRegWithSubReg<42, "r42", [Rz42, Rw42], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx42>;
+def Rxy43 : AMDILRegWithSubReg<43, "r43", [Rx43, Ry43], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx43>;
+def Rzw43 : AMDILRegWithSubReg<43, "r43", [Rz43, Rw43], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx43>;
+def Rxy44 : AMDILRegWithSubReg<44, "r44", [Rx44, Ry44], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx44>;
+def Rzw44 : AMDILRegWithSubReg<44, "r44", [Rz44, Rw44], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx44>;
+def Rxy45 : AMDILRegWithSubReg<45, "r45", [Rx45, Ry45], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx45>;
+def Rzw45 : AMDILRegWithSubReg<45, "r45", [Rz45, Rw45], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx45>;
+def Rxy46 : AMDILRegWithSubReg<46, "r46", [Rx46, Ry46], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx46>;
+def Rzw46 : AMDILRegWithSubReg<46, "r46", [Rz46, Rw46], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx46>;
+def Rxy47 : AMDILRegWithSubReg<47, "r47", [Rx47, Ry47], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx47>;
+def Rzw47 : AMDILRegWithSubReg<47, "r47", [Rz47, Rw47], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx47>;
+def Rxy48 : AMDILRegWithSubReg<48, "r48", [Rx48, Ry48], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx48>;
+def Rzw48 : AMDILRegWithSubReg<48, "r48", [Rz48, Rw48], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx48>;
+def Rxy49 : AMDILRegWithSubReg<49, "r49", [Rx49, Ry49], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx49>;
+def Rzw49 : AMDILRegWithSubReg<49, "r49", [Rz49, Rw49], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx49>;
+def Rxy50 : AMDILRegWithSubReg<50, "r50", [Rx50, Ry50], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx50>;
+def Rzw50 : AMDILRegWithSubReg<50, "r50", [Rz50, Rw50], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx50>;
+def Rxy51 : AMDILRegWithSubReg<51, "r51", [Rx51, Ry51], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx51>;
+def Rzw51 : AMDILRegWithSubReg<51, "r51", [Rz51, Rw51], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx51>;
+def Rxy52 : AMDILRegWithSubReg<52, "r52", [Rx52, Ry52], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx52>;
+def Rzw52 : AMDILRegWithSubReg<52, "r52", [Rz52, Rw52], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx52>;
+def Rxy53 : AMDILRegWithSubReg<53, "r53", [Rx53, Ry53], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx53>;
+def Rzw53 : AMDILRegWithSubReg<53, "r53", [Rz53, Rw53], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx53>;
+def Rxy54 : AMDILRegWithSubReg<54, "r54", [Rx54, Ry54], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx54>;
+def Rzw54 : AMDILRegWithSubReg<54, "r54", [Rz54, Rw54], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx54>;
+def Rxy55 : AMDILRegWithSubReg<55, "r55", [Rx55, Ry55], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx55>;
+def Rzw55 : AMDILRegWithSubReg<55, "r55", [Rz55, Rw55], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx55>;
+def Rxy56 : AMDILRegWithSubReg<56, "r56", [Rx56, Ry56], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx56>;
+def Rzw56 : AMDILRegWithSubReg<56, "r56", [Rz56, Rw56], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx56>;
+def Rxy57 : AMDILRegWithSubReg<57, "r57", [Rx57, Ry57], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx57>;
+def Rzw57 : AMDILRegWithSubReg<57, "r57", [Rz57, Rw57], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx57>;
+def Rxy58 : AMDILRegWithSubReg<58, "r58", [Rx58, Ry58], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx58>;
+def Rzw58 : AMDILRegWithSubReg<58, "r58", [Rz58, Rw58], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx58>;
+def Rxy59 : AMDILRegWithSubReg<59, "r59", [Rx59, Ry59], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx59>;
+def Rzw59 : AMDILRegWithSubReg<59, "r59", [Rz59, Rw59], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx59>;
+def Rxy60 : AMDILRegWithSubReg<60, "r60", [Rx60, Ry60], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx60>;
+def Rzw60 : AMDILRegWithSubReg<60, "r60", [Rz60, Rw60], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx60>;
+def Rxy61 : AMDILRegWithSubReg<61, "r61", [Rx61, Ry61], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx61>;
+def Rzw61 : AMDILRegWithSubReg<61, "r61", [Rz61, Rw61], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx61>;
+def Rxy62 : AMDILRegWithSubReg<62, "r62", [Rx62, Ry62], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx62>;
+def Rzw62 : AMDILRegWithSubReg<62, "r62", [Rz62, Rw62], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx62>;
+def Rxy63 : AMDILRegWithSubReg<63, "r63", [Rx63, Ry63], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx63>;
+def Rzw63 : AMDILRegWithSubReg<63, "r63", [Rz63, Rw63], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx63>;
+def Rxy64 : AMDILRegWithSubReg<64, "r64", [Rx64, Ry64], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx64>;
+def Rzw64 : AMDILRegWithSubReg<64, "r64", [Rz64, Rw64], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx64>;
+def Rxy65 : AMDILRegWithSubReg<65, "r65", [Rx65, Ry65], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx65>;
+def Rzw65 : AMDILRegWithSubReg<65, "r65", [Rz65, Rw65], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx65>;
+def Rxy66 : AMDILRegWithSubReg<66, "r66", [Rx66, Ry66], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx66>;
+def Rzw66 : AMDILRegWithSubReg<66, "r66", [Rz66, Rw66], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx66>;
+def Rxy67 : AMDILRegWithSubReg<67, "r67", [Rx67, Ry67], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx67>;
+def Rzw67 : AMDILRegWithSubReg<67, "r67", [Rz67, Rw67], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx67>;
+def Rxy68 : AMDILRegWithSubReg<68, "r68", [Rx68, Ry68], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx68>;
+def Rzw68 : AMDILRegWithSubReg<68, "r68", [Rz68, Rw68], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx68>;
+def Rxy69 : AMDILRegWithSubReg<69, "r69", [Rx69, Ry69], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx69>;
+def Rzw69 : AMDILRegWithSubReg<69, "r69", [Rz69, Rw69], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx69>;
+def Rxy70 : AMDILRegWithSubReg<70, "r70", [Rx70, Ry70], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx70>;
+def Rzw70 : AMDILRegWithSubReg<70, "r70", [Rz70, Rw70], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx70>;
+def Rxy71 : AMDILRegWithSubReg<71, "r71", [Rx71, Ry71], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx71>;
+def Rzw71 : AMDILRegWithSubReg<71, "r71", [Rz71, Rw71], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx71>;
+def Rxy72 : AMDILRegWithSubReg<72, "r72", [Rx72, Ry72], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx72>;
+def Rzw72 : AMDILRegWithSubReg<72, "r72", [Rz72, Rw72], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx72>;
+def Rxy73 : AMDILRegWithSubReg<73, "r73", [Rx73, Ry73], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx73>;
+def Rzw73 : AMDILRegWithSubReg<73, "r73", [Rz73, Rw73], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx73>;
+def Rxy74 : AMDILRegWithSubReg<74, "r74", [Rx74, Ry74], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx74>;
+def Rzw74 : AMDILRegWithSubReg<74, "r74", [Rz74, Rw74], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx74>;
+def Rxy75 : AMDILRegWithSubReg<75, "r75", [Rx75, Ry75], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx75>;
+def Rzw75 : AMDILRegWithSubReg<75, "r75", [Rz75, Rw75], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx75>;
+def Rxy76 : AMDILRegWithSubReg<76, "r76", [Rx76, Ry76], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx76>;
+def Rzw76 : AMDILRegWithSubReg<76, "r76", [Rz76, Rw76], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx76>;
+def Rxy77 : AMDILRegWithSubReg<77, "r77", [Rx77, Ry77], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx77>;
+def Rzw77 : AMDILRegWithSubReg<77, "r77", [Rz77, Rw77], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx77>;
+def Rxy78 : AMDILRegWithSubReg<78, "r78", [Rx78, Ry78], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx78>;
+def Rzw78 : AMDILRegWithSubReg<78, "r78", [Rz78, Rw78], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx78>;
+def Rxy79 : AMDILRegWithSubReg<79, "r79", [Rx79, Ry79], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx79>;
+def Rzw79 : AMDILRegWithSubReg<79, "r79", [Rz79, Rw79], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx79>;
+def Rxy80 : AMDILRegWithSubReg<80, "r80", [Rx80, Ry80], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx80>;
+def Rzw80 : AMDILRegWithSubReg<80, "r80", [Rz80, Rw80], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx80>;
+def Rxy81 : AMDILRegWithSubReg<81, "r81", [Rx81, Ry81], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx81>;
+def Rzw81 : AMDILRegWithSubReg<81, "r81", [Rz81, Rw81], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx81>;
+def Rxy82 : AMDILRegWithSubReg<82, "r82", [Rx82, Ry82], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx82>;
+def Rzw82 : AMDILRegWithSubReg<82, "r82", [Rz82, Rw82], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx82>;
+def Rxy83 : AMDILRegWithSubReg<83, "r83", [Rx83, Ry83], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx83>;
+def Rzw83 : AMDILRegWithSubReg<83, "r83", [Rz83, Rw83], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx83>;
+def Rxy84 : AMDILRegWithSubReg<84, "r84", [Rx84, Ry84], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx84>;
+def Rzw84 : AMDILRegWithSubReg<84, "r84", [Rz84, Rw84], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx84>;
+def Rxy85 : AMDILRegWithSubReg<85, "r85", [Rx85, Ry85], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx85>;
+def Rzw85 : AMDILRegWithSubReg<85, "r85", [Rz85, Rw85], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx85>;
+def Rxy86 : AMDILRegWithSubReg<86, "r86", [Rx86, Ry86], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx86>;
+def Rzw86 : AMDILRegWithSubReg<86, "r86", [Rz86, Rw86], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx86>;
+def Rxy87 : AMDILRegWithSubReg<87, "r87", [Rx87, Ry87], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx87>;
+def Rzw87 : AMDILRegWithSubReg<87, "r87", [Rz87, Rw87], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx87>;
+def Rxy88 : AMDILRegWithSubReg<88, "r88", [Rx88, Ry88], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx88>;
+def Rzw88 : AMDILRegWithSubReg<88, "r88", [Rz88, Rw88], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx88>;
+def Rxy89 : AMDILRegWithSubReg<89, "r89", [Rx89, Ry89], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx89>;
+def Rzw89 : AMDILRegWithSubReg<89, "r89", [Rz89, Rw89], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx89>;
+def Rxy90 : AMDILRegWithSubReg<90, "r90", [Rx90, Ry90], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx90>;
+def Rzw90 : AMDILRegWithSubReg<90, "r90", [Rz90, Rw90], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx90>;
+def Rxy91 : AMDILRegWithSubReg<91, "r91", [Rx91, Ry91], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx91>;
+def Rzw91 : AMDILRegWithSubReg<91, "r91", [Rz91, Rw91], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx91>;
+def Rxy92 : AMDILRegWithSubReg<92, "r92", [Rx92, Ry92], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx92>;
+def Rzw92 : AMDILRegWithSubReg<92, "r92", [Rz92, Rw92], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx92>;
+def Rxy93 : AMDILRegWithSubReg<93, "r93", [Rx93, Ry93], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx93>;
+def Rzw93 : AMDILRegWithSubReg<93, "r93", [Rz93, Rw93], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx93>;
+def Rxy94 : AMDILRegWithSubReg<94, "r94", [Rx94, Ry94], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx94>;
+def Rzw94 : AMDILRegWithSubReg<94, "r94", [Rz94, Rw94], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx94>;
+def Rxy95 : AMDILRegWithSubReg<95, "r95", [Rx95, Ry95], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx95>;
+def Rzw95 : AMDILRegWithSubReg<95, "r95", [Rz95, Rw95], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx95>;
+def Rxy96 : AMDILRegWithSubReg<96, "r96", [Rx96, Ry96], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx96>;
+def Rzw96 : AMDILRegWithSubReg<96, "r96", [Rz96, Rw96], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx96>;
+def Rxy97 : AMDILRegWithSubReg<97, "r97", [Rx97, Ry97], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx97>;
+def Rzw97 : AMDILRegWithSubReg<97, "r97", [Rz97, Rw97], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx97>;
+def Rxy98 : AMDILRegWithSubReg<98, "r98", [Rx98, Ry98], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx98>;
+def Rzw98 : AMDILRegWithSubReg<98, "r98", [Rz98, Rw98], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx98>;
+def Rxy99 : AMDILRegWithSubReg<99, "r99", [Rx99, Ry99], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx99>;
+def Rzw99 : AMDILRegWithSubReg<99, "r99", [Rz99, Rw99], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx99>;
+def Rxy100 : AMDILRegWithSubReg<100, "r100", [Rx100, Ry100], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx100>;
+def Rzw100 : AMDILRegWithSubReg<100, "r100", [Rz100, Rw100], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx100>;
+def Rxy101 : AMDILRegWithSubReg<101, "r101", [Rx101, Ry101], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx101>;
+def Rzw101 : AMDILRegWithSubReg<101, "r101", [Rz101, Rw101], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx101>;
+def Rxy102 : AMDILRegWithSubReg<102, "r102", [Rx102, Ry102], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx102>;
+def Rzw102 : AMDILRegWithSubReg<102, "r102", [Rz102, Rw102], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx102>;
+def Rxy103 : AMDILRegWithSubReg<103, "r103", [Rx103, Ry103], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx103>;
+def Rzw103 : AMDILRegWithSubReg<103, "r103", [Rz103, Rw103], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx103>;
+def Rxy104 : AMDILRegWithSubReg<104, "r104", [Rx104, Ry104], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx104>;
+def Rzw104 : AMDILRegWithSubReg<104, "r104", [Rz104, Rw104], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx104>;
+def Rxy105 : AMDILRegWithSubReg<105, "r105", [Rx105, Ry105], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx105>;
+def Rzw105 : AMDILRegWithSubReg<105, "r105", [Rz105, Rw105], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx105>;
+def Rxy106 : AMDILRegWithSubReg<106, "r106", [Rx106, Ry106], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx106>;
+def Rzw106 : AMDILRegWithSubReg<106, "r106", [Rz106, Rw106], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx106>;
+def Rxy107 : AMDILRegWithSubReg<107, "r107", [Rx107, Ry107], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx107>;
+def Rzw107 : AMDILRegWithSubReg<107, "r107", [Rz107, Rw107], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx107>;
+def Rxy108 : AMDILRegWithSubReg<108, "r108", [Rx108, Ry108], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx108>;
+def Rzw108 : AMDILRegWithSubReg<108, "r108", [Rz108, Rw108], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx108>;
+def Rxy109 : AMDILRegWithSubReg<109, "r109", [Rx109, Ry109], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx109>;
+def Rzw109 : AMDILRegWithSubReg<109, "r109", [Rz109, Rw109], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx109>;
+def Rxy110 : AMDILRegWithSubReg<110, "r110", [Rx110, Ry110], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx110>;
+def Rzw110 : AMDILRegWithSubReg<110, "r110", [Rz110, Rw110], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx110>;
+def Rxy111 : AMDILRegWithSubReg<111, "r111", [Rx111, Ry111], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx111>;
+def Rzw111 : AMDILRegWithSubReg<111, "r111", [Rz111, Rw111], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx111>;
+def Rxy112 : AMDILRegWithSubReg<112, "r112", [Rx112, Ry112], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx112>;
+def Rzw112 : AMDILRegWithSubReg<112, "r112", [Rz112, Rw112], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx112>;
+def Rxy113 : AMDILRegWithSubReg<113, "r113", [Rx113, Ry113], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx113>;
+def Rzw113 : AMDILRegWithSubReg<113, "r113", [Rz113, Rw113], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx113>;
+def Rxy114 : AMDILRegWithSubReg<114, "r114", [Rx114, Ry114], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx114>;
+def Rzw114 : AMDILRegWithSubReg<114, "r114", [Rz114, Rw114], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx114>;
+def Rxy115 : AMDILRegWithSubReg<115, "r115", [Rx115, Ry115], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx115>;
+def Rzw115 : AMDILRegWithSubReg<115, "r115", [Rz115, Rw115], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx115>;
+def Rxy116 : AMDILRegWithSubReg<116, "r116", [Rx116, Ry116], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx116>;
+def Rzw116 : AMDILRegWithSubReg<116, "r116", [Rz116, Rw116], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx116>;
+def Rxy117 : AMDILRegWithSubReg<117, "r117", [Rx117, Ry117], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx117>;
+def Rzw117 : AMDILRegWithSubReg<117, "r117", [Rz117, Rw117], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx117>;
+def Rxy118 : AMDILRegWithSubReg<118, "r118", [Rx118, Ry118], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx118>;
+def Rzw118 : AMDILRegWithSubReg<118, "r118", [Rz118, Rw118], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx118>;
+def Rxy119 : AMDILRegWithSubReg<119, "r119", [Rx119, Ry119], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx119>;
+def Rzw119 : AMDILRegWithSubReg<119, "r119", [Rz119, Rw119], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx119>;
+def Rxy120 : AMDILRegWithSubReg<120, "r120", [Rx120, Ry120], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx120>;
+def Rzw120 : AMDILRegWithSubReg<120, "r120", [Rz120, Rw120], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx120>;
+def Rxy121 : AMDILRegWithSubReg<121, "r121", [Rx121, Ry121], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx121>;
+def Rzw121 : AMDILRegWithSubReg<121, "r121", [Rz121, Rw121], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx121>;
+def Rxy122 : AMDILRegWithSubReg<122, "r122", [Rx122, Ry122], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx122>;
+def Rzw122 : AMDILRegWithSubReg<122, "r122", [Rz122, Rw122], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx122>;
+def Rxy123 : AMDILRegWithSubReg<123, "r123", [Rx123, Ry123], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx123>;
+def Rzw123 : AMDILRegWithSubReg<123, "r123", [Rz123, Rw123], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx123>;
+def Rxy124 : AMDILRegWithSubReg<124, "r124", [Rx124, Ry124], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx124>;
+def Rzw124 : AMDILRegWithSubReg<124, "r124", [Rz124, Rw124], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx124>;
+def Rxy125 : AMDILRegWithSubReg<125, "r125", [Rx125, Ry125], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx125>;
+def Rzw125 : AMDILRegWithSubReg<125, "r125", [Rz125, Rw125], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx125>;
+def Rxy126 : AMDILRegWithSubReg<126, "r126", [Rx126, Ry126], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx126>;
+def Rzw126 : AMDILRegWithSubReg<126, "r126", [Rz126, Rw126], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx126>;
+def Rxy127 : AMDILRegWithSubReg<127, "r127", [Rx127, Ry127], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx127>;
+def Rzw127 : AMDILRegWithSubReg<127, "r127", [Rz127, Rw127], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx127>;
+def Rxy128 : AMDILRegWithSubReg<128, "r128", [Rx128, Ry128], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx128>;
+def Rzw128 : AMDILRegWithSubReg<128, "r128", [Rz128, Rw128], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx128>;
+def Rxy129 : AMDILRegWithSubReg<129, "r129", [Rx129, Ry129], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx129>;
+def Rzw129 : AMDILRegWithSubReg<129, "r129", [Rz129, Rw129], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx129>;
+def Rxy130 : AMDILRegWithSubReg<130, "r130", [Rx130, Ry130], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx130>;
+def Rzw130 : AMDILRegWithSubReg<130, "r130", [Rz130, Rw130], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx130>;
+def Rxy131 : AMDILRegWithSubReg<131, "r131", [Rx131, Ry131], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx131>;
+def Rzw131 : AMDILRegWithSubReg<131, "r131", [Rz131, Rw131], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx131>;
+def Rxy132 : AMDILRegWithSubReg<132, "r132", [Rx132, Ry132], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx132>;
+def Rzw132 : AMDILRegWithSubReg<132, "r132", [Rz132, Rw132], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx132>;
+def Rxy133 : AMDILRegWithSubReg<133, "r133", [Rx133, Ry133], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx133>;
+def Rzw133 : AMDILRegWithSubReg<133, "r133", [Rz133, Rw133], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx133>;
+def Rxy134 : AMDILRegWithSubReg<134, "r134", [Rx134, Ry134], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx134>;
+def Rzw134 : AMDILRegWithSubReg<134, "r134", [Rz134, Rw134], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx134>;
+def Rxy135 : AMDILRegWithSubReg<135, "r135", [Rx135, Ry135], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx135>;
+def Rzw135 : AMDILRegWithSubReg<135, "r135", [Rz135, Rw135], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx135>;
+def Rxy136 : AMDILRegWithSubReg<136, "r136", [Rx136, Ry136], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx136>;
+def Rzw136 : AMDILRegWithSubReg<136, "r136", [Rz136, Rw136], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx136>;
+def Rxy137 : AMDILRegWithSubReg<137, "r137", [Rx137, Ry137], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx137>;
+def Rzw137 : AMDILRegWithSubReg<137, "r137", [Rz137, Rw137], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx137>;
+def Rxy138 : AMDILRegWithSubReg<138, "r138", [Rx138, Ry138], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx138>;
+def Rzw138 : AMDILRegWithSubReg<138, "r138", [Rz138, Rw138], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx138>;
+def Rxy139 : AMDILRegWithSubReg<139, "r139", [Rx139, Ry139], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx139>;
+def Rzw139 : AMDILRegWithSubReg<139, "r139", [Rz139, Rw139], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx139>;
+def Rxy140 : AMDILRegWithSubReg<140, "r140", [Rx140, Ry140], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx140>;
+def Rzw140 : AMDILRegWithSubReg<140, "r140", [Rz140, Rw140], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx140>;
+def Rxy141 : AMDILRegWithSubReg<141, "r141", [Rx141, Ry141], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx141>;
+def Rzw141 : AMDILRegWithSubReg<141, "r141", [Rz141, Rw141], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx141>;
+def Rxy142 : AMDILRegWithSubReg<142, "r142", [Rx142, Ry142], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx142>;
+def Rzw142 : AMDILRegWithSubReg<142, "r142", [Rz142, Rw142], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx142>;
+def Rxy143 : AMDILRegWithSubReg<143, "r143", [Rx143, Ry143], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx143>;
+def Rzw143 : AMDILRegWithSubReg<143, "r143", [Rz143, Rw143], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx143>;
+def Rxy144 : AMDILRegWithSubReg<144, "r144", [Rx144, Ry144], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx144>;
+def Rzw144 : AMDILRegWithSubReg<144, "r144", [Rz144, Rw144], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx144>;
+def Rxy145 : AMDILRegWithSubReg<145, "r145", [Rx145, Ry145], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx145>;
+def Rzw145 : AMDILRegWithSubReg<145, "r145", [Rz145, Rw145], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx145>;
+def Rxy146 : AMDILRegWithSubReg<146, "r146", [Rx146, Ry146], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx146>;
+def Rzw146 : AMDILRegWithSubReg<146, "r146", [Rz146, Rw146], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx146>;
+def Rxy147 : AMDILRegWithSubReg<147, "r147", [Rx147, Ry147], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx147>;
+def Rzw147 : AMDILRegWithSubReg<147, "r147", [Rz147, Rw147], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx147>;
+def Rxy148 : AMDILRegWithSubReg<148, "r148", [Rx148, Ry148], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx148>;
+def Rzw148 : AMDILRegWithSubReg<148, "r148", [Rz148, Rw148], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx148>;
+def Rxy149 : AMDILRegWithSubReg<149, "r149", [Rx149, Ry149], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx149>;
+def Rzw149 : AMDILRegWithSubReg<149, "r149", [Rz149, Rw149], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx149>;
+def Rxy150 : AMDILRegWithSubReg<150, "r150", [Rx150, Ry150], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx150>;
+def Rzw150 : AMDILRegWithSubReg<150, "r150", [Rz150, Rw150], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx150>;
+def Rxy151 : AMDILRegWithSubReg<151, "r151", [Rx151, Ry151], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx151>;
+def Rzw151 : AMDILRegWithSubReg<151, "r151", [Rz151, Rw151], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx151>;
+def Rxy152 : AMDILRegWithSubReg<152, "r152", [Rx152, Ry152], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx152>;
+def Rzw152 : AMDILRegWithSubReg<152, "r152", [Rz152, Rw152], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx152>;
+def Rxy153 : AMDILRegWithSubReg<153, "r153", [Rx153, Ry153], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx153>;
+def Rzw153 : AMDILRegWithSubReg<153, "r153", [Rz153, Rw153], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx153>;
+def Rxy154 : AMDILRegWithSubReg<154, "r154", [Rx154, Ry154], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx154>;
+def Rzw154 : AMDILRegWithSubReg<154, "r154", [Rz154, Rw154], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx154>;
+def Rxy155 : AMDILRegWithSubReg<155, "r155", [Rx155, Ry155], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx155>;
+def Rzw155 : AMDILRegWithSubReg<155, "r155", [Rz155, Rw155], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx155>;
+def Rxy156 : AMDILRegWithSubReg<156, "r156", [Rx156, Ry156], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx156>;
+def Rzw156 : AMDILRegWithSubReg<156, "r156", [Rz156, Rw156], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx156>;
+def Rxy157 : AMDILRegWithSubReg<157, "r157", [Rx157, Ry157], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx157>;
+def Rzw157 : AMDILRegWithSubReg<157, "r157", [Rz157, Rw157], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx157>;
+def Rxy158 : AMDILRegWithSubReg<158, "r158", [Rx158, Ry158], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx158>;
+def Rzw158 : AMDILRegWithSubReg<158, "r158", [Rz158, Rw158], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx158>;
+def Rxy159 : AMDILRegWithSubReg<159, "r159", [Rx159, Ry159], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx159>;
+def Rzw159 : AMDILRegWithSubReg<159, "r159", [Rz159, Rw159], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx159>;
+def Rxy160 : AMDILRegWithSubReg<160, "r160", [Rx160, Ry160], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx160>;
+def Rzw160 : AMDILRegWithSubReg<160, "r160", [Rz160, Rw160], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx160>;
+def Rxy161 : AMDILRegWithSubReg<161, "r161", [Rx161, Ry161], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx161>;
+def Rzw161 : AMDILRegWithSubReg<161, "r161", [Rz161, Rw161], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx161>;
+def Rxy162 : AMDILRegWithSubReg<162, "r162", [Rx162, Ry162], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx162>;
+def Rzw162 : AMDILRegWithSubReg<162, "r162", [Rz162, Rw162], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx162>;
+def Rxy163 : AMDILRegWithSubReg<163, "r163", [Rx163, Ry163], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx163>;
+def Rzw163 : AMDILRegWithSubReg<163, "r163", [Rz163, Rw163], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx163>;
+def Rxy164 : AMDILRegWithSubReg<164, "r164", [Rx164, Ry164], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx164>;
+def Rzw164 : AMDILRegWithSubReg<164, "r164", [Rz164, Rw164], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx164>;
+def Rxy165 : AMDILRegWithSubReg<165, "r165", [Rx165, Ry165], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx165>;
+def Rzw165 : AMDILRegWithSubReg<165, "r165", [Rz165, Rw165], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx165>;
+def Rxy166 : AMDILRegWithSubReg<166, "r166", [Rx166, Ry166], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx166>;
+def Rzw166 : AMDILRegWithSubReg<166, "r166", [Rz166, Rw166], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx166>;
+def Rxy167 : AMDILRegWithSubReg<167, "r167", [Rx167, Ry167], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx167>;
+def Rzw167 : AMDILRegWithSubReg<167, "r167", [Rz167, Rw167], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx167>;
+def Rxy168 : AMDILRegWithSubReg<168, "r168", [Rx168, Ry168], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx168>;
+def Rzw168 : AMDILRegWithSubReg<168, "r168", [Rz168, Rw168], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx168>;
+def Rxy169 : AMDILRegWithSubReg<169, "r169", [Rx169, Ry169], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx169>;
+def Rzw169 : AMDILRegWithSubReg<169, "r169", [Rz169, Rw169], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx169>;
+def Rxy170 : AMDILRegWithSubReg<170, "r170", [Rx170, Ry170], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx170>;
+def Rzw170 : AMDILRegWithSubReg<170, "r170", [Rz170, Rw170], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx170>;
+def Rxy171 : AMDILRegWithSubReg<171, "r171", [Rx171, Ry171], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx171>;
+def Rzw171 : AMDILRegWithSubReg<171, "r171", [Rz171, Rw171], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx171>;
+def Rxy172 : AMDILRegWithSubReg<172, "r172", [Rx172, Ry172], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx172>;
+def Rzw172 : AMDILRegWithSubReg<172, "r172", [Rz172, Rw172], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx172>;
+def Rxy173 : AMDILRegWithSubReg<173, "r173", [Rx173, Ry173], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx173>;
+def Rzw173 : AMDILRegWithSubReg<173, "r173", [Rz173, Rw173], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx173>;
+def Rxy174 : AMDILRegWithSubReg<174, "r174", [Rx174, Ry174], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx174>;
+def Rzw174 : AMDILRegWithSubReg<174, "r174", [Rz174, Rw174], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx174>;
+def Rxy175 : AMDILRegWithSubReg<175, "r175", [Rx175, Ry175], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx175>;
+def Rzw175 : AMDILRegWithSubReg<175, "r175", [Rz175, Rw175], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx175>;
+def Rxy176 : AMDILRegWithSubReg<176, "r176", [Rx176, Ry176], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx176>;
+def Rzw176 : AMDILRegWithSubReg<176, "r176", [Rz176, Rw176], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx176>;
+def Rxy177 : AMDILRegWithSubReg<177, "r177", [Rx177, Ry177], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx177>;
+def Rzw177 : AMDILRegWithSubReg<177, "r177", [Rz177, Rw177], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx177>;
+def Rxy178 : AMDILRegWithSubReg<178, "r178", [Rx178, Ry178], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx178>;
+def Rzw178 : AMDILRegWithSubReg<178, "r178", [Rz178, Rw178], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx178>;
+def Rxy179 : AMDILRegWithSubReg<179, "r179", [Rx179, Ry179], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx179>;
+def Rzw179 : AMDILRegWithSubReg<179, "r179", [Rz179, Rw179], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx179>;
+def Rxy180 : AMDILRegWithSubReg<180, "r180", [Rx180, Ry180], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx180>;
+def Rzw180 : AMDILRegWithSubReg<180, "r180", [Rz180, Rw180], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx180>;
+def Rxy181 : AMDILRegWithSubReg<181, "r181", [Rx181, Ry181], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx181>;
+def Rzw181 : AMDILRegWithSubReg<181, "r181", [Rz181, Rw181], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx181>;
+def Rxy182 : AMDILRegWithSubReg<182, "r182", [Rx182, Ry182], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx182>;
+def Rzw182 : AMDILRegWithSubReg<182, "r182", [Rz182, Rw182], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx182>;
+def Rxy183 : AMDILRegWithSubReg<183, "r183", [Rx183, Ry183], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx183>;
+def Rzw183 : AMDILRegWithSubReg<183, "r183", [Rz183, Rw183], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx183>;
+def Rxy184 : AMDILRegWithSubReg<184, "r184", [Rx184, Ry184], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx184>;
+def Rzw184 : AMDILRegWithSubReg<184, "r184", [Rz184, Rw184], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx184>;
+def Rxy185 : AMDILRegWithSubReg<185, "r185", [Rx185, Ry185], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx185>;
+def Rzw185 : AMDILRegWithSubReg<185, "r185", [Rz185, Rw185], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx185>;
+def Rxy186 : AMDILRegWithSubReg<186, "r186", [Rx186, Ry186], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx186>;
+def Rzw186 : AMDILRegWithSubReg<186, "r186", [Rz186, Rw186], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx186>;
+def Rxy187 : AMDILRegWithSubReg<187, "r187", [Rx187, Ry187], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx187>;
+def Rzw187 : AMDILRegWithSubReg<187, "r187", [Rz187, Rw187], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx187>;
+def Rxy188 : AMDILRegWithSubReg<188, "r188", [Rx188, Ry188], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx188>;
+def Rzw188 : AMDILRegWithSubReg<188, "r188", [Rz188, Rw188], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx188>;
+def Rxy189 : AMDILRegWithSubReg<189, "r189", [Rx189, Ry189], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx189>;
+def Rzw189 : AMDILRegWithSubReg<189, "r189", [Rz189, Rw189], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx189>;
+def Rxy190 : AMDILRegWithSubReg<190, "r190", [Rx190, Ry190], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx190>;
+def Rzw190 : AMDILRegWithSubReg<190, "r190", [Rz190, Rw190], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx190>;
+def Rxy191 : AMDILRegWithSubReg<191, "r191", [Rx191, Ry191], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx191>;
+def Rzw191 : AMDILRegWithSubReg<191, "r191", [Rz191, Rw191], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx191>;
+def Rxy1000 : AMDILRegWithSubReg<1000, "r1000", [Rx1000, Ry1000], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1000>;
+def Rzw1000 : AMDILRegWithSubReg<1000, "r1000", [Rz1000, Rw1000], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1000>;
+def Rxy1001 : AMDILRegWithSubReg<1001, "r1001", [Rx1001, Ry1001], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1001>;
+def Rzw1001 : AMDILRegWithSubReg<1001, "r1001", [Rz1001, Rw1001], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1001>;
+def Rxy1002 : AMDILRegWithSubReg<1002, "r1002", [Rx1002, Ry1002], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1002>;
+def Rzw1002 : AMDILRegWithSubReg<1002, "r1002", [Rz1002, Rw1002], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1002>;
+def Rxy1003 : AMDILRegWithSubReg<1003, "r1003", [Rx1003, Ry1003], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1003>;
+def Rzw1003 : AMDILRegWithSubReg<1003, "r1003", [Rz1003, Rw1003], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1003>;
+def Rxy1004 : AMDILRegWithSubReg<1004, "r1004", [Rx1004, Ry1004], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1004>;
+def Rzw1004 : AMDILRegWithSubReg<1004, "r1004", [Rz1004, Rw1004], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1004>;
+def Rxy1005 : AMDILRegWithSubReg<1005, "r1005", [Rx1005, Ry1005], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1005>;
+def Rzw1005 : AMDILRegWithSubReg<1005, "r1005", [Rz1005, Rw1005], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1005>;
+def Rxy1006 : AMDILRegWithSubReg<1006, "r1006", [Rx1006, Ry1006], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1006>;
+def Rzw1006 : AMDILRegWithSubReg<1006, "r1006", [Rz1006, Rw1006], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1006>;
+def Rxy1007 : AMDILRegWithSubReg<1007, "r1007", [Rx1007, Ry1007], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1007>;
+def Rzw1007 : AMDILRegWithSubReg<1007, "r1007", [Rz1007, Rw1007], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1007>;
+def Rxy1008 : AMDILRegWithSubReg<1008, "r1008", [Rx1008, Ry1008], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1008>;
+def Rzw1008 : AMDILRegWithSubReg<1008, "r1008", [Rz1008, Rw1008], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1008>;
+def Rxy1009 : AMDILRegWithSubReg<1009, "r1009", [Rx1009, Ry1009], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1009>;
+def Rzw1009 : AMDILRegWithSubReg<1009, "r1009", [Rz1009, Rw1009], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1009>;
+def Rxy1010 : AMDILRegWithSubReg<1010, "r1010", [Rx1010, Ry1010], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1010>;
+def Rzw1010 : AMDILRegWithSubReg<1010, "r1010", [Rz1010, Rw1010], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1010>;
+def Rxy1011 : AMDILRegWithSubReg<1011, "r1011", [Rx1011, Ry1011], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1011>;
+def Rzw1011 : AMDILRegWithSubReg<1011, "r1011", [Rz1011, Rw1011], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1011>;
+def Rxy1012 : AMDILRegWithSubReg<1012, "r1012", [Rx1012, Ry1012], [sub_x_comp, sub_y_comp]>, DwarfRegAlias<Rx1012>;
+def Rzw1012 : AMDILRegWithSubReg<1012, "r1012", [Rz1012, Rw1012], [sub_z_comp, sub_w_comp]>, DwarfRegAlias<Rx1012>;

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV4.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV4.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV4.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV4.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,217 @@
+//===-- AMDILRegisterDefsV4.td --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def R1 : AMDILRegWithSubReg<1, "r1", [Rxy1, Rzw1], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1>;
+def R2 : AMDILRegWithSubReg<2, "r2", [Rxy2, Rzw2], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx2>;
+def R3 : AMDILRegWithSubReg<3, "r3", [Rxy3, Rzw3], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx3>;
+def R4 : AMDILRegWithSubReg<4, "r4", [Rxy4, Rzw4], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx4>;
+def R5 : AMDILRegWithSubReg<5, "r5", [Rxy5, Rzw5], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx5>;
+def R6 : AMDILRegWithSubReg<6, "r6", [Rxy6, Rzw6], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx6>;
+def R7 : AMDILRegWithSubReg<7, "r7", [Rxy7, Rzw7], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx7>;
+def R8 : AMDILRegWithSubReg<8, "r8", [Rxy8, Rzw8], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx8>;
+def R9 : AMDILRegWithSubReg<9, "r9", [Rxy9, Rzw9], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx9>;
+def R10 : AMDILRegWithSubReg<10, "r10", [Rxy10, Rzw10], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx10>;
+def R11 : AMDILRegWithSubReg<11, "r11", [Rxy11, Rzw11], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx11>;
+def R12 : AMDILRegWithSubReg<12, "r12", [Rxy12, Rzw12], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx12>;
+def R13 : AMDILRegWithSubReg<13, "r13", [Rxy13, Rzw13], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx13>;
+def R14 : AMDILRegWithSubReg<14, "r14", [Rxy14, Rzw14], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx14>;
+def R15 : AMDILRegWithSubReg<15, "r15", [Rxy15, Rzw15], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx15>;
+def R16 : AMDILRegWithSubReg<16, "r16", [Rxy16, Rzw16], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx16>;
+def R17 : AMDILRegWithSubReg<17, "r17", [Rxy17, Rzw17], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx17>;
+def R18 : AMDILRegWithSubReg<18, "r18", [Rxy18, Rzw18], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx18>;
+def R19 : AMDILRegWithSubReg<19, "r19", [Rxy19, Rzw19], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx19>;
+def R20 : AMDILRegWithSubReg<20, "r20", [Rxy20, Rzw20], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx20>;
+def R21 : AMDILRegWithSubReg<21, "r21", [Rxy21, Rzw21], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx21>;
+def R22 : AMDILRegWithSubReg<22, "r22", [Rxy22, Rzw22], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx22>;
+def R23 : AMDILRegWithSubReg<23, "r23", [Rxy23, Rzw23], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx23>;
+def R24 : AMDILRegWithSubReg<24, "r24", [Rxy24, Rzw24], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx24>;
+def R25 : AMDILRegWithSubReg<25, "r25", [Rxy25, Rzw25], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx25>;
+def R26 : AMDILRegWithSubReg<26, "r26", [Rxy26, Rzw26], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx26>;
+def R27 : AMDILRegWithSubReg<27, "r27", [Rxy27, Rzw27], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx27>;
+def R28 : AMDILRegWithSubReg<28, "r28", [Rxy28, Rzw28], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx28>;
+def R29 : AMDILRegWithSubReg<29, "r29", [Rxy29, Rzw29], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx29>;
+def R30 : AMDILRegWithSubReg<30, "r30", [Rxy30, Rzw30], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx30>;
+def R31 : AMDILRegWithSubReg<31, "r31", [Rxy31, Rzw31], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx31>;
+def R32 : AMDILRegWithSubReg<32, "r32", [Rxy32, Rzw32], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx32>;
+def R33 : AMDILRegWithSubReg<33, "r33", [Rxy33, Rzw33], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx33>;
+def R34 : AMDILRegWithSubReg<34, "r34", [Rxy34, Rzw34], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx34>;
+def R35 : AMDILRegWithSubReg<35, "r35", [Rxy35, Rzw35], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx35>;
+def R36 : AMDILRegWithSubReg<36, "r36", [Rxy36, Rzw36], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx36>;
+def R37 : AMDILRegWithSubReg<37, "r37", [Rxy37, Rzw37], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx37>;
+def R38 : AMDILRegWithSubReg<38, "r38", [Rxy38, Rzw38], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx38>;
+def R39 : AMDILRegWithSubReg<39, "r39", [Rxy39, Rzw39], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx39>;
+def R40 : AMDILRegWithSubReg<40, "r40", [Rxy40, Rzw40], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx40>;
+def R41 : AMDILRegWithSubReg<41, "r41", [Rxy41, Rzw41], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx41>;
+def R42 : AMDILRegWithSubReg<42, "r42", [Rxy42, Rzw42], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx42>;
+def R43 : AMDILRegWithSubReg<43, "r43", [Rxy43, Rzw43], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx43>;
+def R44 : AMDILRegWithSubReg<44, "r44", [Rxy44, Rzw44], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx44>;
+def R45 : AMDILRegWithSubReg<45, "r45", [Rxy45, Rzw45], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx45>;
+def R46 : AMDILRegWithSubReg<46, "r46", [Rxy46, Rzw46], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx46>;
+def R47 : AMDILRegWithSubReg<47, "r47", [Rxy47, Rzw47], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx47>;
+def R48 : AMDILRegWithSubReg<48, "r48", [Rxy48, Rzw48], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx48>;
+def R49 : AMDILRegWithSubReg<49, "r49", [Rxy49, Rzw49], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx49>;
+def R50 : AMDILRegWithSubReg<50, "r50", [Rxy50, Rzw50], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx50>;
+def R51 : AMDILRegWithSubReg<51, "r51", [Rxy51, Rzw51], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx51>;
+def R52 : AMDILRegWithSubReg<52, "r52", [Rxy52, Rzw52], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx52>;
+def R53 : AMDILRegWithSubReg<53, "r53", [Rxy53, Rzw53], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx53>;
+def R54 : AMDILRegWithSubReg<54, "r54", [Rxy54, Rzw54], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx54>;
+def R55 : AMDILRegWithSubReg<55, "r55", [Rxy55, Rzw55], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx55>;
+def R56 : AMDILRegWithSubReg<56, "r56", [Rxy56, Rzw56], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx56>;
+def R57 : AMDILRegWithSubReg<57, "r57", [Rxy57, Rzw57], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx57>;
+def R58 : AMDILRegWithSubReg<58, "r58", [Rxy58, Rzw58], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx58>;
+def R59 : AMDILRegWithSubReg<59, "r59", [Rxy59, Rzw59], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx59>;
+def R60 : AMDILRegWithSubReg<60, "r60", [Rxy60, Rzw60], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx60>;
+def R61 : AMDILRegWithSubReg<61, "r61", [Rxy61, Rzw61], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx61>;
+def R62 : AMDILRegWithSubReg<62, "r62", [Rxy62, Rzw62], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx62>;
+def R63 : AMDILRegWithSubReg<63, "r63", [Rxy63, Rzw63], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx63>;
+def R64 : AMDILRegWithSubReg<64, "r64", [Rxy64, Rzw64], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx64>;
+def R65 : AMDILRegWithSubReg<65, "r65", [Rxy65, Rzw65], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx65>;
+def R66 : AMDILRegWithSubReg<66, "r66", [Rxy66, Rzw66], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx66>;
+def R67 : AMDILRegWithSubReg<67, "r67", [Rxy67, Rzw67], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx67>;
+def R68 : AMDILRegWithSubReg<68, "r68", [Rxy68, Rzw68], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx68>;
+def R69 : AMDILRegWithSubReg<69, "r69", [Rxy69, Rzw69], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx69>;
+def R70 : AMDILRegWithSubReg<70, "r70", [Rxy70, Rzw70], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx70>;
+def R71 : AMDILRegWithSubReg<71, "r71", [Rxy71, Rzw71], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx71>;
+def R72 : AMDILRegWithSubReg<72, "r72", [Rxy72, Rzw72], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx72>;
+def R73 : AMDILRegWithSubReg<73, "r73", [Rxy73, Rzw73], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx73>;
+def R74 : AMDILRegWithSubReg<74, "r74", [Rxy74, Rzw74], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx74>;
+def R75 : AMDILRegWithSubReg<75, "r75", [Rxy75, Rzw75], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx75>;
+def R76 : AMDILRegWithSubReg<76, "r76", [Rxy76, Rzw76], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx76>;
+def R77 : AMDILRegWithSubReg<77, "r77", [Rxy77, Rzw77], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx77>;
+def R78 : AMDILRegWithSubReg<78, "r78", [Rxy78, Rzw78], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx78>;
+def R79 : AMDILRegWithSubReg<79, "r79", [Rxy79, Rzw79], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx79>;
+def R80 : AMDILRegWithSubReg<80, "r80", [Rxy80, Rzw80], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx80>;
+def R81 : AMDILRegWithSubReg<81, "r81", [Rxy81, Rzw81], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx81>;
+def R82 : AMDILRegWithSubReg<82, "r82", [Rxy82, Rzw82], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx82>;
+def R83 : AMDILRegWithSubReg<83, "r83", [Rxy83, Rzw83], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx83>;
+def R84 : AMDILRegWithSubReg<84, "r84", [Rxy84, Rzw84], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx84>;
+def R85 : AMDILRegWithSubReg<85, "r85", [Rxy85, Rzw85], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx85>;
+def R86 : AMDILRegWithSubReg<86, "r86", [Rxy86, Rzw86], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx86>;
+def R87 : AMDILRegWithSubReg<87, "r87", [Rxy87, Rzw87], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx87>;
+def R88 : AMDILRegWithSubReg<88, "r88", [Rxy88, Rzw88], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx88>;
+def R89 : AMDILRegWithSubReg<89, "r89", [Rxy89, Rzw89], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx89>;
+def R90 : AMDILRegWithSubReg<90, "r90", [Rxy90, Rzw90], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx90>;
+def R91 : AMDILRegWithSubReg<91, "r91", [Rxy91, Rzw91], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx91>;
+def R92 : AMDILRegWithSubReg<92, "r92", [Rxy92, Rzw92], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx92>;
+def R93 : AMDILRegWithSubReg<93, "r93", [Rxy93, Rzw93], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx93>;
+def R94 : AMDILRegWithSubReg<94, "r94", [Rxy94, Rzw94], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx94>;
+def R95 : AMDILRegWithSubReg<95, "r95", [Rxy95, Rzw95], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx95>;
+def R96 : AMDILRegWithSubReg<96, "r96", [Rxy96, Rzw96], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx96>;
+def R97 : AMDILRegWithSubReg<97, "r97", [Rxy97, Rzw97], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx97>;
+def R98 : AMDILRegWithSubReg<98, "r98", [Rxy98, Rzw98], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx98>;
+def R99 : AMDILRegWithSubReg<99, "r99", [Rxy99, Rzw99], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx99>;
+def R100 : AMDILRegWithSubReg<100, "r100", [Rxy100, Rzw100], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx100>;
+def R101 : AMDILRegWithSubReg<101, "r101", [Rxy101, Rzw101], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx101>;
+def R102 : AMDILRegWithSubReg<102, "r102", [Rxy102, Rzw102], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx102>;
+def R103 : AMDILRegWithSubReg<103, "r103", [Rxy103, Rzw103], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx103>;
+def R104 : AMDILRegWithSubReg<104, "r104", [Rxy104, Rzw104], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx104>;
+def R105 : AMDILRegWithSubReg<105, "r105", [Rxy105, Rzw105], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx105>;
+def R106 : AMDILRegWithSubReg<106, "r106", [Rxy106, Rzw106], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx106>;
+def R107 : AMDILRegWithSubReg<107, "r107", [Rxy107, Rzw107], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx107>;
+def R108 : AMDILRegWithSubReg<108, "r108", [Rxy108, Rzw108], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx108>;
+def R109 : AMDILRegWithSubReg<109, "r109", [Rxy109, Rzw109], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx109>;
+def R110 : AMDILRegWithSubReg<110, "r110", [Rxy110, Rzw110], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx110>;
+def R111 : AMDILRegWithSubReg<111, "r111", [Rxy111, Rzw111], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx111>;
+def R112 : AMDILRegWithSubReg<112, "r112", [Rxy112, Rzw112], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx112>;
+def R113 : AMDILRegWithSubReg<113, "r113", [Rxy113, Rzw113], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx113>;
+def R114 : AMDILRegWithSubReg<114, "r114", [Rxy114, Rzw114], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx114>;
+def R115 : AMDILRegWithSubReg<115, "r115", [Rxy115, Rzw115], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx115>;
+def R116 : AMDILRegWithSubReg<116, "r116", [Rxy116, Rzw116], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx116>;
+def R117 : AMDILRegWithSubReg<117, "r117", [Rxy117, Rzw117], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx117>;
+def R118 : AMDILRegWithSubReg<118, "r118", [Rxy118, Rzw118], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx118>;
+def R119 : AMDILRegWithSubReg<119, "r119", [Rxy119, Rzw119], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx119>;
+def R120 : AMDILRegWithSubReg<120, "r120", [Rxy120, Rzw120], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx120>;
+def R121 : AMDILRegWithSubReg<121, "r121", [Rxy121, Rzw121], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx121>;
+def R122 : AMDILRegWithSubReg<122, "r122", [Rxy122, Rzw122], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx122>;
+def R123 : AMDILRegWithSubReg<123, "r123", [Rxy123, Rzw123], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx123>;
+def R124 : AMDILRegWithSubReg<124, "r124", [Rxy124, Rzw124], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx124>;
+def R125 : AMDILRegWithSubReg<125, "r125", [Rxy125, Rzw125], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx125>;
+def R126 : AMDILRegWithSubReg<126, "r126", [Rxy126, Rzw126], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx126>;
+def R127 : AMDILRegWithSubReg<127, "r127", [Rxy127, Rzw127], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx127>;
+def R128 : AMDILRegWithSubReg<128, "r128", [Rxy128, Rzw128], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx128>;
+def R129 : AMDILRegWithSubReg<129, "r129", [Rxy129, Rzw129], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx129>;
+def R130 : AMDILRegWithSubReg<130, "r130", [Rxy130, Rzw130], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx130>;
+def R131 : AMDILRegWithSubReg<131, "r131", [Rxy131, Rzw131], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx131>;
+def R132 : AMDILRegWithSubReg<132, "r132", [Rxy132, Rzw132], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx132>;
+def R133 : AMDILRegWithSubReg<133, "r133", [Rxy133, Rzw133], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx133>;
+def R134 : AMDILRegWithSubReg<134, "r134", [Rxy134, Rzw134], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx134>;
+def R135 : AMDILRegWithSubReg<135, "r135", [Rxy135, Rzw135], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx135>;
+def R136 : AMDILRegWithSubReg<136, "r136", [Rxy136, Rzw136], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx136>;
+def R137 : AMDILRegWithSubReg<137, "r137", [Rxy137, Rzw137], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx137>;
+def R138 : AMDILRegWithSubReg<138, "r138", [Rxy138, Rzw138], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx138>;
+def R139 : AMDILRegWithSubReg<139, "r139", [Rxy139, Rzw139], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx139>;
+def R140 : AMDILRegWithSubReg<140, "r140", [Rxy140, Rzw140], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx140>;
+def R141 : AMDILRegWithSubReg<141, "r141", [Rxy141, Rzw141], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx141>;
+def R142 : AMDILRegWithSubReg<142, "r142", [Rxy142, Rzw142], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx142>;
+def R143 : AMDILRegWithSubReg<143, "r143", [Rxy143, Rzw143], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx143>;
+def R144 : AMDILRegWithSubReg<144, "r144", [Rxy144, Rzw144], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx144>;
+def R145 : AMDILRegWithSubReg<145, "r145", [Rxy145, Rzw145], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx145>;
+def R146 : AMDILRegWithSubReg<146, "r146", [Rxy146, Rzw146], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx146>;
+def R147 : AMDILRegWithSubReg<147, "r147", [Rxy147, Rzw147], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx147>;
+def R148 : AMDILRegWithSubReg<148, "r148", [Rxy148, Rzw148], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx148>;
+def R149 : AMDILRegWithSubReg<149, "r149", [Rxy149, Rzw149], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx149>;
+def R150 : AMDILRegWithSubReg<150, "r150", [Rxy150, Rzw150], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx150>;
+def R151 : AMDILRegWithSubReg<151, "r151", [Rxy151, Rzw151], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx151>;
+def R152 : AMDILRegWithSubReg<152, "r152", [Rxy152, Rzw152], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx152>;
+def R153 : AMDILRegWithSubReg<153, "r153", [Rxy153, Rzw153], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx153>;
+def R154 : AMDILRegWithSubReg<154, "r154", [Rxy154, Rzw154], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx154>;
+def R155 : AMDILRegWithSubReg<155, "r155", [Rxy155, Rzw155], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx155>;
+def R156 : AMDILRegWithSubReg<156, "r156", [Rxy156, Rzw156], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx156>;
+def R157 : AMDILRegWithSubReg<157, "r157", [Rxy157, Rzw157], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx157>;
+def R158 : AMDILRegWithSubReg<158, "r158", [Rxy158, Rzw158], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx158>;
+def R159 : AMDILRegWithSubReg<159, "r159", [Rxy159, Rzw159], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx159>;
+def R160 : AMDILRegWithSubReg<160, "r160", [Rxy160, Rzw160], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx160>;
+def R161 : AMDILRegWithSubReg<161, "r161", [Rxy161, Rzw161], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx161>;
+def R162 : AMDILRegWithSubReg<162, "r162", [Rxy162, Rzw162], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx162>;
+def R163 : AMDILRegWithSubReg<163, "r163", [Rxy163, Rzw163], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx163>;
+def R164 : AMDILRegWithSubReg<164, "r164", [Rxy164, Rzw164], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx164>;
+def R165 : AMDILRegWithSubReg<165, "r165", [Rxy165, Rzw165], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx165>;
+def R166 : AMDILRegWithSubReg<166, "r166", [Rxy166, Rzw166], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx166>;
+def R167 : AMDILRegWithSubReg<167, "r167", [Rxy167, Rzw167], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx167>;
+def R168 : AMDILRegWithSubReg<168, "r168", [Rxy168, Rzw168], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx168>;
+def R169 : AMDILRegWithSubReg<169, "r169", [Rxy169, Rzw169], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx169>;
+def R170 : AMDILRegWithSubReg<170, "r170", [Rxy170, Rzw170], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx170>;
+def R171 : AMDILRegWithSubReg<171, "r171", [Rxy171, Rzw171], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx171>;
+def R172 : AMDILRegWithSubReg<172, "r172", [Rxy172, Rzw172], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx172>;
+def R173 : AMDILRegWithSubReg<173, "r173", [Rxy173, Rzw173], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx173>;
+def R174 : AMDILRegWithSubReg<174, "r174", [Rxy174, Rzw174], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx174>;
+def R175 : AMDILRegWithSubReg<175, "r175", [Rxy175, Rzw175], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx175>;
+def R176 : AMDILRegWithSubReg<176, "r176", [Rxy176, Rzw176], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx176>;
+def R177 : AMDILRegWithSubReg<177, "r177", [Rxy177, Rzw177], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx177>;
+def R178 : AMDILRegWithSubReg<178, "r178", [Rxy178, Rzw178], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx178>;
+def R179 : AMDILRegWithSubReg<179, "r179", [Rxy179, Rzw179], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx179>;
+def R180 : AMDILRegWithSubReg<180, "r180", [Rxy180, Rzw180], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx180>;
+def R181 : AMDILRegWithSubReg<181, "r181", [Rxy181, Rzw181], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx181>;
+def R182 : AMDILRegWithSubReg<182, "r182", [Rxy182, Rzw182], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx182>;
+def R183 : AMDILRegWithSubReg<183, "r183", [Rxy183, Rzw183], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx183>;
+def R184 : AMDILRegWithSubReg<184, "r184", [Rxy184, Rzw184], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx184>;
+def R185 : AMDILRegWithSubReg<185, "r185", [Rxy185, Rzw185], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx185>;
+def R186 : AMDILRegWithSubReg<186, "r186", [Rxy186, Rzw186], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx186>;
+def R187 : AMDILRegWithSubReg<187, "r187", [Rxy187, Rzw187], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx187>;
+def R188 : AMDILRegWithSubReg<188, "r188", [Rxy188, Rzw188], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx188>;
+def R189 : AMDILRegWithSubReg<189, "r189", [Rxy189, Rzw189], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx189>;
+def R190 : AMDILRegWithSubReg<190, "r190", [Rxy190, Rzw190], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx190>;
+def R191 : AMDILRegWithSubReg<191, "r191", [Rxy191, Rzw191], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx191>;
+def R1000 : AMDILRegWithSubReg<1000, "r1000", [Rxy1000, Rzw1000], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1000>;
+def R1001 : AMDILRegWithSubReg<1001, "r1001", [Rxy1001, Rzw1001], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1001>;
+def R1002 : AMDILRegWithSubReg<1002, "r1002", [Rxy1002, Rzw1002], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1002>;
+def R1003 : AMDILRegWithSubReg<1003, "r1003", [Rxy1003, Rzw1003], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1003>;
+def R1004 : AMDILRegWithSubReg<1004, "r1004", [Rxy1004, Rzw1004], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1004>;
+def R1005 : AMDILRegWithSubReg<1005, "r1005", [Rxy1005, Rzw1005], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1005>;
+def R1006 : AMDILRegWithSubReg<1006, "r1006", [Rxy1006, Rzw1006], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1006>;
+def R1007 : AMDILRegWithSubReg<1007, "r1007", [Rxy1007, Rzw1007], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1007>;
+def R1008 : AMDILRegWithSubReg<1008, "r1008", [Rxy1008, Rzw1008], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1008>;
+def R1009 : AMDILRegWithSubReg<1009, "r1009", [Rxy1009, Rzw1009], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1009>;
+def R1010 : AMDILRegWithSubReg<1010, "r1010", [Rxy1010, Rzw1010], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1010>;
+def R1011 : AMDILRegWithSubReg<1011, "r1011", [Rxy1011, Rzw1011], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1011>;
+def R1012 : AMDILRegWithSubReg<1012, "r1012", [Rxy1012, Rzw1012], [sub_xy_comp, sub_zw_comp]>, DwarfRegAlias<Rx1012>;

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,212 @@
+//===-- AMDILRegisterInfo.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure This file is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILRegisterInfo.h"
+#include "AMDIL.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/FormattedStream.h"
+
+
+using namespace llvm;
+
+AMDILRegisterInfo::AMDILRegisterInfo(AMDILTargetMachine &tm,
+                                     const TargetInstrInfo &tii)
+  : AMDILGenRegisterInfo(0), // RA???
+    TM(tm), TII(tii)
+{
+  baseOffset = 0;
+  nextFuncOffset = 0;
+}
+
+const uint16_t*
+AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+  static const uint16_t CalleeSavedRegs[] = { 0 };
+  // TODO: Does IL need to actually have any callee saved regs?
+  // I don't think we do since we can just use sequential registers
+  // Maybe this would be easier if every function call was inlined first
+  // and then there would be no callee issues to deal with
+  //TODO(getCalleeSavedRegs);
+  return CalleeSavedRegs;
+}
+
+BitVector
+AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const
+{
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  // Set the frame pointer, stack pointer, RA, Stack Data Pointer as reserved.
+  if (TFI->hasFP(MF)) {
+    Reserved.set(AMDIL::FP);
+  }
+  Reserved.set(AMDIL::SP);
+  Reserved.set(AMDIL::SDP);
+  Reserved.set(AMDIL::RA);
+
+  // Set temps T1-T5 as reserved.
+  Reserved.set(AMDIL::T1);
+  Reserved.set(AMDIL::T2);
+  Reserved.set(AMDIL::T3);
+  Reserved.set(AMDIL::T4);
+  Reserved.set(AMDIL::T5);
+
+  // Set the mem register as reserved.
+  Reserved.set(AMDIL::MEMx);
+  Reserved.set(AMDIL::MEMxy);
+  Reserved.set(AMDIL::MEM);
+
+  // Set CFG1-CFG10 as reserved.
+  Reserved.set(AMDIL::CFG1);
+  Reserved.set(AMDIL::CFG2);
+  Reserved.set(AMDIL::CFG3);
+  Reserved.set(AMDIL::CFG4);
+  Reserved.set(AMDIL::CFG5);
+  Reserved.set(AMDIL::CFG6);
+  Reserved.set(AMDIL::CFG7);
+  Reserved.set(AMDIL::CFG8);
+  Reserved.set(AMDIL::CFG9);
+  Reserved.set(AMDIL::CFG10);
+
+  // Reserve the live-ins for the function.
+  MachineBasicBlock::livein_iterator LII = MF.begin()->livein_begin();
+  MachineBasicBlock::livein_iterator LIE = MF.begin()->livein_end();
+  while (LII != LIE) {
+    Reserved.set(*LII);
+    ++LII;
+  }
+  return Reserved;
+}
+
+const TargetRegisterClass* const*
+AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
+{
+  static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+  // TODO: Keep in sync with getCalleeSavedRegs
+  //TODO(getCalleeSavedRegClasses);
+  return CalleeSavedRegClasses;
+}
+void
+AMDILRegisterInfo::eliminateCallFramePseudoInstr(
+  MachineFunction &MF,
+  MachineBasicBlock &MBB,
+  MachineBasicBlock::iterator I) const
+{
+  MBB.erase(I);
+}
+
+// For each frame index we find, we store the offset in the stack which is
+// being pushed back into the global buffer. The offset into the stack where
+// the value is stored is copied into a new register and the frame index is
+// then replaced with that register.
+void
+AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj,
+                                       RegScavenger *RS) const
+{
+  assert(SPAdj == 0 && "Unexpected");
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned int y = MI.getNumOperands();
+  for (unsigned int x = 0; x < y; ++x) {
+    if (!MI.getOperand(x).isFI()) {
+      continue;
+    }
+    bool def = isStoreInst(TM, &MI);
+    int FrameIndex = MI.getOperand(x).getIndex();
+    int64_t Offset = MFI->getObjectOffset(FrameIndex);
+    //int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex);
+    // An optimization is to only use the offsets if the size
+    // is larger than 4, which means we are storing an array
+    // instead of just a pointer. If we are size 4 then we can
+    // just do register copies since we don't need to worry about
+    // indexing dynamically
+    MachineInstr *nMI = MF.CreateMachineInstr(
+                          TII.get(AMDIL::LOADCONST_i32), MI.getDebugLoc());
+    nMI->addOperand(MachineOperand::CreateReg(AMDIL::DFP, true));
+    nMI->addOperand(
+      MachineOperand::CreateImm(Offset));
+    MI.getParent()->insert(II, nMI);
+    if (MI.getOperand(x).isReg() == false)  {
+      MI.getOperand(x).ChangeToRegister(
+        nMI->getOperand(0).getReg(), def);
+    } else {
+      MI.getOperand(x).setReg(
+        nMI->getOperand(0).getReg());
+    }
+  }
+}
+
+void
+AMDILRegisterInfo::processFunctionBeforeFrameFinalized(
+  MachineFunction &MF) const
+{
+  //TODO(processFunctionBeforeFrameFinalized);
+  // Here we keep track of the amount of stack that the current function
+  // uses so
+  // that we can set the offset to the end of the stack and any other
+  // function call
+  // will not overwrite any stack variables.
+  // baseOffset = nextFuncOffset;
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) {
+    int64_t size = MFI->getObjectSize(x);
+    if (!(size % 4) && size > 1) {
+      nextFuncOffset += size;
+    } else {
+      nextFuncOffset += 16;
+    }
+  }
+}
+unsigned int
+AMDILRegisterInfo::getRARegister() const
+{
+  return AMDIL::RA;
+}
+
+unsigned int
+AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+  return AMDIL::FP;
+}
+
+unsigned int
+AMDILRegisterInfo::getEHExceptionRegister() const
+{
+  assert(0 && "What is the exception register");
+  return 0;
+}
+
+unsigned int
+AMDILRegisterInfo::getEHHandlerRegister() const
+{
+  assert(0 && "What is the exception handler register");
+  return 0;
+}
+
+
+int64_t
+AMDILRegisterInfo::getStackSize() const
+{
+  return nextFuncOffset - baseOffset;
+}
+
+#define GET_REGINFO_MC_DESC
+#define GET_REGINFO_TARGET_DESC
+#include "AMDILGenRegisterInfo.inc"
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,108 @@
+//===-- AMDILRegisterInfo.h -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AMDIL implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILREGISTERINFO_H_
+#define AMDILREGISTERINFO_H_
+
+#include "AMDILLLVMPC.h"
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "AMDILGenRegisterInfo.inc"
+// See header file for explanation
+
+namespace llvm
+{
+
+class AMDILTargetMachine;
+class TargetInstrInfo;
+class Type;
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour
+{
+enum {
+  AMDIL_Generic = 0
+};
+}
+
+struct AMDILRegisterInfo : public AMDILGenRegisterInfo {
+  AMDILTargetMachine &TM;
+  const TargetInstrInfo &TII;
+
+  AMDILRegisterInfo(AMDILTargetMachine &tm, const TargetInstrInfo &tii);
+  /// Code Generation virtual methods...
+  const uint16_t*
+  getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  const TargetRegisterClass* const*
+  getCalleeSavedRegClasses(
+    const MachineFunction *MF = 0) const;
+
+  BitVector
+  getReservedRegs(const MachineFunction &MF) const;
+
+  void
+  eliminateCallFramePseudoInstr(
+    MachineFunction &MF,
+    MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator I) const;
+  void
+  eliminateFrameIndex(MachineBasicBlock::iterator II,
+                      int SPAdj,
+                      RegScavenger *RS = NULL) const;
+
+  void
+  processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  // Debug information queries.
+  unsigned int
+  getRARegister() const;
+
+  unsigned int
+  getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned int
+  getEHExceptionRegister() const;
+  unsigned int
+  getEHHandlerRegister() const;
+
+
+  int64_t
+  getStackSize() const;
+#if 0
+  bool
+  requiresRegisterScavenging(const MachineFunction&) const {
+    return true;
+  }
+  bool
+  requireFrameIndexScavenging(const MachineFunction&)const {
+    return true;
+  }
+  bool
+  requiresVirtualBaseRegisters(const MachineFunction&) const {
+    return true;
+  }
+#endif
+
+private:
+  mutable int64_t baseOffset;
+  mutable int64_t nextFuncOffset;
+};
+
+} // end namespace llvm
+
+#endif // AMDILREGISTERINFO_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,268 @@
+//===-- AMDILRegisterInfo.td ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Declarations that describe the AMDIL register file.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDILRegWithSubReg<bits<16> num, string n, list<Register> subregs, list<SubRegIndex> subRegIdx>
+: RegisterWithSubRegs<n, subregs> {
+  field bits<16> Value;
+  let Value = num;
+  let Namespace = "AMDIL";
+  let SubRegIndices = subRegIdx;
+}
+class AMDILReg<bits<16> num, string n> : Register<n> {
+  field bits<16> Value;
+  let Value = num;
+  let Namespace = "AMDIL";
+}
+
+def sub_x_comp : SubRegIndex;
+def sub_y_comp : SubRegIndex;
+def sub_z_comp : SubRegIndex;
+def sub_w_comp : SubRegIndex;
+def sub_xy_comp : SubRegIndex;
+def sub_zw_comp : SubRegIndex;
+
+include "AMDILRegisterDefsScalar.td"
+include "AMDILRegisterDefsV2.td"
+include "AMDILRegisterDefsV4.td"
+
+// All registers between 1000 and 1024 are reserved and cannot be used
+// unless commented in this section
+// r1021-r1039 are used to dynamically calculate the local/group/thread/region/region_local ID's
+// r1020 is used to hold the frame index for local arrays
+// r1019 is used to hold the dynamic stack allocation pointer
+// r1018 is used as a temporary register for handwritten code
+// r1017 is used as a temporary register for handwritten code
+// r1016 is used as a temporary register for load/store code
+// r1015 is used as a temporary register for data segment offset
+// r1014 is used as a temporary register for store code
+// r1013 is used as the section data pointer register
+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
+// r1028 is used as the frame pointer register
+// r1027 is used as mem register
+// r1026 is used as the return address register.
+// r1030-r1039 are reserved for AMDILCFGStructurizer.
+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
+def FP : AMDILReg<5012, "r1028">, DwarfRegNum<[1028]>;
+def DFP : AMDILReg<5016, "r1029">, DwarfRegNum<[1029]>;
+def SP : AMDILReg<4076, "r1019">, DwarfRegNum<[1019]>;
+def T1 : AMDILReg<4072, "r1018">, DwarfRegNum<[1018]>;
+def T2 : AMDILReg<4068, "r1017">, DwarfRegNum<[1017]>;
+def T3 : AMDILReg<4064, "r1016">, DwarfRegNum<[1016]>;
+def T4 : AMDILReg<4060, "r1015">, DwarfRegNum<[1015]>;
+def T5 : AMDILReg<4056, "r1014">, DwarfRegNum<[1014]>;
+def SDP : AMDILReg<4052, "r1013">, DwarfRegNum<[1013]>;
+def MEMx : AMDILReg<5008,"mem0">, DwarfRegNum<[5008]>;
+def MEMxy : AMDILReg<5008, "mem0">, DwarfRegAlias<MEMx>;
+def MEMxyz : AMDILReg<5008, "mem0">, DwarfRegAlias<MEMx>;
+def MEM   : AMDILReg<5008, "mem0">, DwarfRegAlias<MEMx>;
+def RA : AMDILReg<5004, "r1026">, DwarfRegNum<[1026]>;
+def CFG1 : AMDILReg<5020, "r1030">, DwarfRegNum<[1030]>;
+def CFG2 : AMDILReg<5024, "r1031">, DwarfRegNum<[1031]>;
+def CFG3 : AMDILReg<5028, "r1032">, DwarfRegNum<[1032]>;
+def CFG4 : AMDILReg<5032, "r1033">, DwarfRegNum<[1033]>;
+def CFG5 : AMDILReg<5036, "r1034">, DwarfRegNum<[1034]>;
+def CFG6 : AMDILReg<5040, "r1035">, DwarfRegNum<[1035]>;
+def CFG7 : AMDILReg<5044, "r1036">, DwarfRegNum<[1036]>;
+def CFG8 : AMDILReg<5048, "r1037">, DwarfRegNum<[1037]>;
+def CFG9 : AMDILReg<5052, "r1038">, DwarfRegNum<[1038]>;
+def CFG10 : AMDILReg<5056, "r1039">, DwarfRegNum<[1039]>;
+
+def GPRI8 : RegisterClass<"AMDIL", [i8], 8,
+    ( add include "AMDILRegisterUsesScalar.td"
+    )>
+{
+        let AltOrders = [(add (sequence "Rx%u", 65, 191), (sequence "Ry%u", 65, 191), (sequence "Rz%u", 65, 191), (sequence "Rw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRI16 : RegisterClass<"AMDIL", [i16], 16,
+    ( add include "AMDILRegisterUsesScalar.td" )>
+{
+        let AltOrders = [(add (sequence "Rx%u", 65, 191), (sequence "Ry%u", 65, 191), (sequence "Rz%u", 65, 191), (sequence "Rw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+            }
+def GPRI32 : RegisterClass<"AMDIL", [i32], 32,
+    ( add include "AMDILRegisterUsesScalar.td"
+    , FP, RA, SDP, MEMx
+    )>
+    {
+        let AltOrders = [(add (sequence "Rx%u", 65, 191), (sequence "Ry%u", 65, 191), (sequence "Rz%u", 65, 191), (sequence "Rw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRXI32 : RegisterClass<"AMDIL", [i32], 32,
+    ( add include "AMDILRegisterUsesScalarX.td" )>
+    {
+        let AltOrders = [(add (sequence "Rx%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRYI32 : RegisterClass<"AMDIL", [i32], 32,
+    ( add include "AMDILRegisterUsesScalarY.td" )>
+    {
+        let AltOrders = [(add (sequence "Ry%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRZI32 : RegisterClass<"AMDIL", [i32], 32,
+    ( add include "AMDILRegisterUsesScalarZ.td" )>
+    {
+        let AltOrders = [(add (sequence "Rz%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRWI32 : RegisterClass<"AMDIL", [i32], 32,
+    ( add include "AMDILRegisterUsesScalarW.td" )>
+    {
+        let AltOrders = [(add (sequence "Rw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+
+def GPRF32 : RegisterClass<"AMDIL", [f32], 32,
+    ( add include "AMDILRegisterUsesScalar.td" )>
+    {
+        let AltOrders = [(add (sequence "Rx%u", 65, 191), (sequence "Ry%u", 65, 191), (sequence "Rz%u", 65, 191), (sequence "Rw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRI64 : RegisterClass<"AMDIL", [i64], 64,
+    ( add include "AMDILRegisterUsesV2.td" )>
+    {
+        let AltOrders = [(add (sequence "Rxy%u", 65, 191), (sequence "Rzw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRXYI64 : RegisterClass<"AMDIL", [i64], 64,
+    ( add include "AMDILRegisterUsesV2XY.td" )>
+    {
+        let AltOrders = [(add (sequence "Rxy%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRZWI64 : RegisterClass<"AMDIL", [i64], 64,
+    ( add include "AMDILRegisterUsesV2ZW.td" )>
+    {
+        let AltOrders = [(add (sequence "Rzw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRF64 : RegisterClass<"AMDIL", [f64], 64,
+    ( add include "AMDILRegisterUsesV2.td" )>
+    {
+        let AltOrders = [(add (sequence "Rxy%u", 65, 191), (sequence "Rzw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRV2I8 : RegisterClass<"AMDIL", [v2i8], 16,
+    ( add include "AMDILRegisterUsesV2.td" )>
+    {
+        let AltOrders = [(add (sequence "Rxy%u", 65, 191), (sequence "Rzw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRV2I16 : RegisterClass<"AMDIL", [v2i16], 32,
+    ( add include "AMDILRegisterUsesV2.td" )>
+    {
+        let AltOrders = [(add (sequence "Rxy%u", 65, 191), (sequence "Rzw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRV2I32 : RegisterClass<"AMDIL", [v2i32], 64,
+    ( add include "AMDILRegisterUsesV2.td"
+    , MEMxy
+    )>
+    {
+        let AltOrders = [(add (sequence "Rxy%u", 65, 191), (sequence "Rzw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRV2F32 : RegisterClass<"AMDIL", [v2f32], 64,
+    ( add include "AMDILRegisterUsesV2.td" )>
+    {
+        let AltOrders = [(add (sequence "Rxy%u", 65, 191), (sequence "Rzw%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRV2I64 : RegisterClass<"AMDIL", [v2i64], 128,
+    ( add include "AMDILRegisterUsesV4.td" )>
+    {
+        let AltOrders = [(add (sequence "R%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRV2F64 : RegisterClass<"AMDIL", [v2f64], 128,
+    ( add include "AMDILRegisterUsesV4.td" )>
+    {
+        let AltOrders = [(add (sequence "R%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+  }];
+}
+def GPRV4I8 : RegisterClass<"AMDIL", [v4i8], 32,
+    ( add include "AMDILRegisterUsesV4.td" )>
+{
+        let AltOrders = [(add (sequence "R%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+  }];
+}
+def GPRV4I16 : RegisterClass<"AMDIL", [v4i16], 64,
+    ( add include "AMDILRegisterUsesV4.td" )>
+{
+        let AltOrders = [(add (sequence "R%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+  }];
+}
+def GPRV4F32 : RegisterClass<"AMDIL", [v4f32], 128,
+    ( add include "AMDILRegisterUsesV4.td" )>
+{
+        let AltOrders = [(add (sequence "R%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+  }];
+}
+def GPRV4I32 : RegisterClass<"AMDIL", [v4i32], 128,
+    ( add include "AMDILRegisterUsesV4.td"
+    , SP, T1, T2, T3, T4, T5, MEM
+    )>
+{
+        let AltOrders = [(add (sequence "R%u", 65, 191))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalar.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalar.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalar.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalar.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesScalar.td ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Rx1, Ry1, Rz1, Rw1, Rx2, Ry2, Rz2, Rw2, Rx3, Ry3, Rz3, Rw3, Rx4, Ry4, Rz4, Rw4, Rx5, Ry5, Rz5, Rw5, Rx6, Ry6, Rz6, Rw6, Rx7, Ry7, Rz7, Rw7, Rx8, Ry8, Rz8, Rw8, Rx9, Ry9, Rz9, Rw9, Rx10, Ry10, Rz10, Rw10, Rx11, Ry11, Rz11, Rw11, Rx12, Ry12, Rz12, Rw12, Rx13, Ry13, Rz13, Rw13, Rx14, Ry14, Rz14, Rw14, Rx15, Ry15, Rz15, Rw15, Rx16, Ry16, Rz16, Rw16, Rx17, Ry17, Rz17, Rw17, Rx18, Ry18, Rz18, Rw18, Rx19, Ry19, Rz19, Rw19, Rx20, Ry20, Rz20, Rw20, Rx21, Ry21, Rz21, Rw21, Rx22, Ry22, Rz22, Rw22, Rx23, Ry23, Rz23, Rw23, Rx24, Ry24, Rz24, Rw24, Rx25, Ry25, Rz25, Rw25, Rx26, Ry26, Rz26, Rw26, Rx27, Ry27, Rz27, Rw27, Rx28, Ry28, Rz28, Rw28, Rx29, Ry29, Rz29, Rw29, Rx30, Ry30, Rz30, Rw30, Rx31, Ry31, Rz31, Rw31, Rx32, Ry32, Rz32, Rw32, Rx33, Ry33, Rz33, Rw33, Rx34, Ry34, Rz34, Rw34, Rx35, Ry35, Rz35, Rw35, Rx36, Ry36, Rz36, Rw36, Rx37, Ry37, Rz37, Rw37, Rx38, Ry38, Rz38, Rw38, Rx39, Ry39, Rz39, Rw39, Rx40, Ry40, Rz40, Rw40, Rx41, Ry41, Rz41, Rw41, Rx42, Ry42, Rz42, Rw42, Rx43, Ry43, Rz43,
  Rw43, Rx44, Ry44, Rz44, Rw44, Rx45, Ry45, Rz45, Rw45, Rx46, Ry46, Rz46, Rw46, Rx47, Ry47, Rz47, Rw47, Rx48, Ry48, Rz48, Rw48, Rx49, Ry49, Rz49, Rw49, Rx50, Ry50, Rz50, Rw50, Rx51, Ry51, Rz51, Rw51, Rx52, Ry52, Rz52, Rw52, Rx53, Ry53, Rz53, Rw53, Rx54, Ry54, Rz54, Rw54, Rx55, Ry55, Rz55, Rw55, Rx56, Ry56, Rz56, Rw56, Rx57, Ry57, Rz57, Rw57, Rx58, Ry58, Rz58, Rw58, Rx59, Ry59, Rz59, Rw59, Rx60, Ry60, Rz60, Rw60, Rx61, Ry61, Rz61, Rw61, Rx62, Ry62, Rz62, Rw62, Rx63, Ry63, Rz63, Rw63, Rx64, Ry64, Rz64, Rw64, Rx65, Ry65, Rz65, Rw65, Rx66, Ry66, Rz66, Rw66, Rx67, Ry67, Rz67, Rw67, Rx68, Ry68, Rz68, Rw68, Rx69, Ry69, Rz69, Rw69, Rx70, Ry70, Rz70, Rw70, Rx71, Ry71, Rz71, Rw71, Rx72, Ry72, Rz72, Rw72, Rx73, Ry73, Rz73, Rw73, Rx74, Ry74, Rz74, Rw74, Rx75, Ry75, Rz75, Rw75, Rx76, Ry76, Rz76, Rw76, Rx77, Ry77, Rz77, Rw77, Rx78, Ry78, Rz78, Rw78, Rx79, Ry79, Rz79, Rw79, Rx80, Ry80, Rz80, Rw80, Rx81, Ry81, Rz81, Rw81, Rx82, Ry82, Rz82, Rw82, Rx83, Ry83, Rz83, Rw83, Rx84, Ry84, Rz84, Rw84
 , Rx85, Ry85, Rz85, Rw85, Rx86, Ry86, Rz86, Rw86, Rx87, Ry87, Rz87, Rw87, Rx88, Ry88, Rz88, Rw88, Rx89, Ry89, Rz89, Rw89, Rx90, Ry90, Rz90, Rw90, Rx91, Ry91, Rz91, Rw91, Rx92, Ry92, Rz92, Rw92, Rx93, Ry93, Rz93, Rw93, Rx94, Ry94, Rz94, Rw94, Rx95, Ry95, Rz95, Rw95, Rx96, Ry96, Rz96, Rw96, Rx97, Ry97, Rz97, Rw97, Rx98, Ry98, Rz98, Rw98, Rx99, Ry99, Rz99, Rw99, Rx100, Ry100, Rz100, Rw100, Rx101, Ry101, Rz101, Rw101, Rx102, Ry102, Rz102, Rw102, Rx103, Ry103, Rz103, Rw103, Rx104, Ry104, Rz104, Rw104, Rx105, Ry105, Rz105, Rw105, Rx106, Ry106, Rz106, Rw106, Rx107, Ry107, Rz107, Rw107, Rx108, Ry108, Rz108, Rw108, Rx109, Ry109, Rz109, Rw109, Rx110, Ry110, Rz110, Rw110, Rx111, Ry111, Rz111, Rw111, Rx112, Ry112, Rz112, Rw112, Rx113, Ry113, Rz113, Rw113, Rx114, Ry114, Rz114, Rw114, Rx115, Ry115, Rz115, Rw115, Rx116, Ry116, Rz116, Rw116, Rx117, Ry117, Rz117, Rw117, Rx118, Ry118, Rz118, Rw118, Rx119, Ry119, Rz119, Rw119, Rx120, Ry120, Rz120, Rw120, Rx121, Ry121, Rz121, Rw121, Rx122, Ry12
 2, Rz122, Rw122, Rx123, Ry123, Rz123, Rw123, Rx124, Ry124, Rz124, Rw124, Rx125, Ry125, Rz125, Rw125, Rx126, Ry126, Rz126, Rw126, Rx127, Ry127, Rz127, Rw127, Rx128, Ry128, Rz128, Rw128, Rx129, Ry129, Rz129, Rw129, Rx130, Ry130, Rz130, Rw130, Rx131, Ry131, Rz131, Rw131, Rx132, Ry132, Rz132, Rw132, Rx133, Ry133, Rz133, Rw133, Rx134, Ry134, Rz134, Rw134, Rx135, Ry135, Rz135, Rw135, Rx136, Ry136, Rz136, Rw136, Rx137, Ry137, Rz137, Rw137, Rx138, Ry138, Rz138, Rw138, Rx139, Ry139, Rz139, Rw139, Rx140, Ry140, Rz140, Rw140, Rx141, Ry141, Rz141, Rw141, Rx142, Ry142, Rz142, Rw142, Rx143, Ry143, Rz143, Rw143, Rx144, Ry144, Rz144, Rw144, Rx145, Ry145, Rz145, Rw145, Rx146, Ry146, Rz146, Rw146, Rx147, Ry147, Rz147, Rw147, Rx148, Ry148, Rz148, Rw148, Rx149, Ry149, Rz149, Rw149, Rx150, Ry150, Rz150, Rw150, Rx151, Ry151, Rz151, Rw151, Rx152, Ry152, Rz152, Rw152, Rx153, Ry153, Rz153, Rw153, Rx154, Ry154, Rz154, Rw154, Rx155, Ry155, Rz155, Rw155, Rx156, Ry156, Rz156, Rw156, Rx157, Ry157, Rz157,
  Rw157, Rx158, Ry158, Rz158, Rw158, Rx159, Ry159, Rz159, Rw159, Rx160, Ry160, Rz160, Rw160, Rx161, Ry161, Rz161, Rw161, Rx162, Ry162, Rz162, Rw162, Rx163, Ry163, Rz163, Rw163, Rx164, Ry164, Rz164, Rw164, Rx165, Ry165, Rz165, Rw165, Rx166, Ry166, Rz166, Rw166, Rx167, Ry167, Rz167, Rw167, Rx168, Ry168, Rz168, Rw168, Rx169, Ry169, Rz169, Rw169, Rx170, Ry170, Rz170, Rw170, Rx171, Ry171, Rz171, Rw171, Rx172, Ry172, Rz172, Rw172, Rx173, Ry173, Rz173, Rw173, Rx174, Ry174, Rz174, Rw174, Rx175, Ry175, Rz175, Rw175, Rx176, Ry176, Rz176, Rw176, Rx177, Ry177, Rz177, Rw177, Rx178, Ry178, Rz178, Rw178, Rx179, Ry179, Rz179, Rw179, Rx180, Ry180, Rz180, Rw180, Rx181, Ry181, Rz181, Rw181, Rx182, Ry182, Rz182, Rw182, Rx183, Ry183, Rz183, Rw183, Rx184, Ry184, Rz184, Rw184, Rx185, Ry185, Rz185, Rw185, Rx186, Ry186, Rz186, Rw186, Rx187, Ry187, Rz187, Rw187, Rx188, Ry188, Rz188, Rw188, Rx189, Ry189, Rz189, Rw189, Rx190, Ry190, Rz190, Rw190, Rx191, Ry191, Rz191, Rw191, Rx1000, Ry1000, Rz1000, Rw100
 0, Rx1001, Ry1001, Rz1001, Rw1001, Rx1002, Ry1002, Rz1002, Rw1002, Rx1003, Ry1003, Rz1003, Rw1003, Rx1004, Ry1004, Rz1004, Rw1004, Rx1005, Ry1005, Rz1005, Rw1005, Rx1006, Ry1006, Rz1006, Rw1006, Rx1007, Ry1007, Rz1007, Rw1007, Rx1008, Ry1008, Rz1008, Rw1008, Rx1009, Ry1009, Rz1009, Rw1009, Rx1010, Ry1010, Rz1010, Rw1010, Rx1011, Ry1011, Rz1011, Rw1011, Rx1012, Ry1012, Rz1012, Rw1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarW.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarW.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarW.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarW.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesScalarW.td ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Rw1, Rw2, Rw3, Rw4, Rw5, Rw6, Rw7, Rw8, Rw9, Rw10, Rw11, Rw12, Rw13, Rw14, Rw15, Rw16, Rw17, Rw18, Rw19, Rw20, Rw21, Rw22, Rw23, Rw24, Rw25, Rw26, Rw27, Rw28, Rw29, Rw30, Rw31, Rw32, Rw33, Rw34, Rw35, Rw36, Rw37, Rw38, Rw39, Rw40, Rw41, Rw42, Rw43, Rw44, Rw45, Rw46, Rw47, Rw48, Rw49, Rw50, Rw51, Rw52, Rw53, Rw54, Rw55, Rw56, Rw57, Rw58, Rw59, Rw60, Rw61, Rw62, Rw63, Rw64, Rw65, Rw66, Rw67, Rw68, Rw69, Rw70, Rw71, Rw72, Rw73, Rw74, Rw75, Rw76, Rw77, Rw78, Rw79, Rw80, Rw81, Rw82, Rw83, Rw84, Rw85, Rw86, Rw87, Rw88, Rw89, Rw90, Rw91, Rw92, Rw93, Rw94, Rw95, Rw96, Rw97, Rw98, Rw99, Rw100, Rw101, Rw102, Rw103, Rw104, Rw105, Rw106, Rw107, Rw108, Rw109, Rw110, Rw111, Rw112, Rw113, Rw114, Rw115, Rw116, Rw117, Rw118, Rw119, Rw120, Rw121, Rw122, Rw123, Rw124, Rw125, Rw126, Rw127, Rw128, Rw129, Rw130, Rw131, Rw132, Rw133, Rw134, Rw135, Rw136, Rw137, Rw138, Rw139, Rw140, Rw141, Rw142, Rw143, Rw144, Rw145, Rw146, Rw147, Rw148, Rw149, Rw150, Rw151, Rw152, Rw153, Rw154, Rw155, Rw156, Rw157
 , Rw158, Rw159, Rw160, Rw161, Rw162, Rw163, Rw164, Rw165, Rw166, Rw167, Rw168, Rw169, Rw170, Rw171, Rw172, Rw173, Rw174, Rw175, Rw176, Rw177, Rw178, Rw179, Rw180, Rw181, Rw182, Rw183, Rw184, Rw185, Rw186, Rw187, Rw188, Rw189, Rw190, Rw191, Rw1000, Rw1001, Rw1002, Rw1003, Rw1004, Rw1005, Rw1006, Rw1007, Rw1008, Rw1009, Rw1010, Rw1011, Rw1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarX.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarX.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarX.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesScalarX.td ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Rx1, Rx2, Rx3, Rx4, Rx5, Rx6, Rx7, Rx8, Rx9, Rx10, Rx11, Rx12, Rx13, Rx14, Rx15, Rx16, Rx17, Rx18, Rx19, Rx20, Rx21, Rx22, Rx23, Rx24, Rx25, Rx26, Rx27, Rx28, Rx29, Rx30, Rx31, Rx32, Rx33, Rx34, Rx35, Rx36, Rx37, Rx38, Rx39, Rx40, Rx41, Rx42, Rx43, Rx44, Rx45, Rx46, Rx47, Rx48, Rx49, Rx50, Rx51, Rx52, Rx53, Rx54, Rx55, Rx56, Rx57, Rx58, Rx59, Rx60, Rx61, Rx62, Rx63, Rx64, Rx65, Rx66, Rx67, Rx68, Rx69, Rx70, Rx71, Rx72, Rx73, Rx74, Rx75, Rx76, Rx77, Rx78, Rx79, Rx80, Rx81, Rx82, Rx83, Rx84, Rx85, Rx86, Rx87, Rx88, Rx89, Rx90, Rx91, Rx92, Rx93, Rx94, Rx95, Rx96, Rx97, Rx98, Rx99, Rx100, Rx101, Rx102, Rx103, Rx104, Rx105, Rx106, Rx107, Rx108, Rx109, Rx110, Rx111, Rx112, Rx113, Rx114, Rx115, Rx116, Rx117, Rx118, Rx119, Rx120, Rx121, Rx122, Rx123, Rx124, Rx125, Rx126, Rx127, Rx128, Rx129, Rx130, Rx131, Rx132, Rx133, Rx134, Rx135, Rx136, Rx137, Rx138, Rx139, Rx140, Rx141, Rx142, Rx143, Rx144, Rx145, Rx146, Rx147, Rx148, Rx149, Rx150, Rx151, Rx152, Rx153, Rx154, Rx155, Rx156, Rx157
 , Rx158, Rx159, Rx160, Rx161, Rx162, Rx163, Rx164, Rx165, Rx166, Rx167, Rx168, Rx169, Rx170, Rx171, Rx172, Rx173, Rx174, Rx175, Rx176, Rx177, Rx178, Rx179, Rx180, Rx181, Rx182, Rx183, Rx184, Rx185, Rx186, Rx187, Rx188, Rx189, Rx190, Rx191, Rx1000, Rx1001, Rx1002, Rx1003, Rx1004, Rx1005, Rx1006, Rx1007, Rx1008, Rx1009, Rx1010, Rx1011, Rx1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarY.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarY.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarY.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarY.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesScalarY.td ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Ry1, Ry2, Ry3, Ry4, Ry5, Ry6, Ry7, Ry8, Ry9, Ry10, Ry11, Ry12, Ry13, Ry14, Ry15, Ry16, Ry17, Ry18, Ry19, Ry20, Ry21, Ry22, Ry23, Ry24, Ry25, Ry26, Ry27, Ry28, Ry29, Ry30, Ry31, Ry32, Ry33, Ry34, Ry35, Ry36, Ry37, Ry38, Ry39, Ry40, Ry41, Ry42, Ry43, Ry44, Ry45, Ry46, Ry47, Ry48, Ry49, Ry50, Ry51, Ry52, Ry53, Ry54, Ry55, Ry56, Ry57, Ry58, Ry59, Ry60, Ry61, Ry62, Ry63, Ry64, Ry65, Ry66, Ry67, Ry68, Ry69, Ry70, Ry71, Ry72, Ry73, Ry74, Ry75, Ry76, Ry77, Ry78, Ry79, Ry80, Ry81, Ry82, Ry83, Ry84, Ry85, Ry86, Ry87, Ry88, Ry89, Ry90, Ry91, Ry92, Ry93, Ry94, Ry95, Ry96, Ry97, Ry98, Ry99, Ry100, Ry101, Ry102, Ry103, Ry104, Ry105, Ry106, Ry107, Ry108, Ry109, Ry110, Ry111, Ry112, Ry113, Ry114, Ry115, Ry116, Ry117, Ry118, Ry119, Ry120, Ry121, Ry122, Ry123, Ry124, Ry125, Ry126, Ry127, Ry128, Ry129, Ry130, Ry131, Ry132, Ry133, Ry134, Ry135, Ry136, Ry137, Ry138, Ry139, Ry140, Ry141, Ry142, Ry143, Ry144, Ry145, Ry146, Ry147, Ry148, Ry149, Ry150, Ry151, Ry152, Ry153, Ry154, Ry155, Ry156, Ry157
 , Ry158, Ry159, Ry160, Ry161, Ry162, Ry163, Ry164, Ry165, Ry166, Ry167, Ry168, Ry169, Ry170, Ry171, Ry172, Ry173, Ry174, Ry175, Ry176, Ry177, Ry178, Ry179, Ry180, Ry181, Ry182, Ry183, Ry184, Ry185, Ry186, Ry187, Ry188, Ry189, Ry190, Ry191, Ry1000, Ry1001, Ry1002, Ry1003, Ry1004, Ry1005, Ry1006, Ry1007, Ry1008, Ry1009, Ry1010, Ry1011, Ry1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarZ.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarZ.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarZ.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarZ.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesScalarZ.td ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Rz1, Rz2, Rz3, Rz4, Rz5, Rz6, Rz7, Rz8, Rz9, Rz10, Rz11, Rz12, Rz13, Rz14, Rz15, Rz16, Rz17, Rz18, Rz19, Rz20, Rz21, Rz22, Rz23, Rz24, Rz25, Rz26, Rz27, Rz28, Rz29, Rz30, Rz31, Rz32, Rz33, Rz34, Rz35, Rz36, Rz37, Rz38, Rz39, Rz40, Rz41, Rz42, Rz43, Rz44, Rz45, Rz46, Rz47, Rz48, Rz49, Rz50, Rz51, Rz52, Rz53, Rz54, Rz55, Rz56, Rz57, Rz58, Rz59, Rz60, Rz61, Rz62, Rz63, Rz64, Rz65, Rz66, Rz67, Rz68, Rz69, Rz70, Rz71, Rz72, Rz73, Rz74, Rz75, Rz76, Rz77, Rz78, Rz79, Rz80, Rz81, Rz82, Rz83, Rz84, Rz85, Rz86, Rz87, Rz88, Rz89, Rz90, Rz91, Rz92, Rz93, Rz94, Rz95, Rz96, Rz97, Rz98, Rz99, Rz100, Rz101, Rz102, Rz103, Rz104, Rz105, Rz106, Rz107, Rz108, Rz109, Rz110, Rz111, Rz112, Rz113, Rz114, Rz115, Rz116, Rz117, Rz118, Rz119, Rz120, Rz121, Rz122, Rz123, Rz124, Rz125, Rz126, Rz127, Rz128, Rz129, Rz130, Rz131, Rz132, Rz133, Rz134, Rz135, Rz136, Rz137, Rz138, Rz139, Rz140, Rz141, Rz142, Rz143, Rz144, Rz145, Rz146, Rz147, Rz148, Rz149, Rz150, Rz151, Rz152, Rz153, Rz154, Rz155, Rz156, Rz157
 , Rz158, Rz159, Rz160, Rz161, Rz162, Rz163, Rz164, Rz165, Rz166, Rz167, Rz168, Rz169, Rz170, Rz171, Rz172, Rz173, Rz174, Rz175, Rz176, Rz177, Rz178, Rz179, Rz180, Rz181, Rz182, Rz183, Rz184, Rz185, Rz186, Rz187, Rz188, Rz189, Rz190, Rz191, Rz1000, Rz1001, Rz1002, Rz1003, Rz1004, Rz1005, Rz1006, Rz1007, Rz1008, Rz1009, Rz1010, Rz1011, Rz1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesV2.td --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Rxy1, Rzw1, Rxy2, Rzw2, Rxy3, Rzw3, Rxy4, Rzw4, Rxy5, Rzw5, Rxy6, Rzw6, Rxy7, Rzw7, Rxy8, Rzw8, Rxy9, Rzw9, Rxy10, Rzw10, Rxy11, Rzw11, Rxy12, Rzw12, Rxy13, Rzw13, Rxy14, Rzw14, Rxy15, Rzw15, Rxy16, Rzw16, Rxy17, Rzw17, Rxy18, Rzw18, Rxy19, Rzw19, Rxy20, Rzw20, Rxy21, Rzw21, Rxy22, Rzw22, Rxy23, Rzw23, Rxy24, Rzw24, Rxy25, Rzw25, Rxy26, Rzw26, Rxy27, Rzw27, Rxy28, Rzw28, Rxy29, Rzw29, Rxy30, Rzw30, Rxy31, Rzw31, Rxy32, Rzw32, Rxy33, Rzw33, Rxy34, Rzw34, Rxy35, Rzw35, Rxy36, Rzw36, Rxy37, Rzw37, Rxy38, Rzw38, Rxy39, Rzw39, Rxy40, Rzw40, Rxy41, Rzw41, Rxy42, Rzw42, Rxy43, Rzw43, Rxy44, Rzw44, Rxy45, Rzw45, Rxy46, Rzw46, Rxy47, Rzw47, Rxy48, Rzw48, Rxy49, Rzw49, Rxy50, Rzw50, Rxy51, Rzw51, Rxy52, Rzw52, Rxy53, Rzw53, Rxy54, Rzw54, Rxy55, Rzw55, Rxy56, Rzw56, Rxy57, Rzw57, Rxy58, Rzw58, Rxy59, Rzw59, Rxy60, Rzw60, Rxy61, Rzw61, Rxy62, Rzw62, Rxy63, Rzw63, Rxy64, Rzw64, Rxy65, Rzw65, Rxy66, Rzw66, Rxy67, Rzw67, Rxy68, Rzw68, Rxy69, Rzw69, Rxy70, Rzw70, Rxy71, Rzw71, Rxy72, Rzw72,
  Rxy73, Rzw73, Rxy74, Rzw74, Rxy75, Rzw75, Rxy76, Rzw76, Rxy77, Rzw77, Rxy78, Rzw78, Rxy79, Rzw79, Rxy80, Rzw80, Rxy81, Rzw81, Rxy82, Rzw82, Rxy83, Rzw83, Rxy84, Rzw84, Rxy85, Rzw85, Rxy86, Rzw86, Rxy87, Rzw87, Rxy88, Rzw88, Rxy89, Rzw89, Rxy90, Rzw90, Rxy91, Rzw91, Rxy92, Rzw92, Rxy93, Rzw93, Rxy94, Rzw94, Rxy95, Rzw95, Rxy96, Rzw96, Rxy97, Rzw97, Rxy98, Rzw98, Rxy99, Rzw99, Rxy100, Rzw100, Rxy101, Rzw101, Rxy102, Rzw102, Rxy103, Rzw103, Rxy104, Rzw104, Rxy105, Rzw105, Rxy106, Rzw106, Rxy107, Rzw107, Rxy108, Rzw108, Rxy109, Rzw109, Rxy110, Rzw110, Rxy111, Rzw111, Rxy112, Rzw112, Rxy113, Rzw113, Rxy114, Rzw114, Rxy115, Rzw115, Rxy116, Rzw116, Rxy117, Rzw117, Rxy118, Rzw118, Rxy119, Rzw119, Rxy120, Rzw120, Rxy121, Rzw121, Rxy122, Rzw122, Rxy123, Rzw123, Rxy124, Rzw124, Rxy125, Rzw125, Rxy126, Rzw126, Rxy127, Rzw127, Rxy128, Rzw128, Rxy129, Rzw129, Rxy130, Rzw130, Rxy131, Rzw131, Rxy132, Rzw132, Rxy133, Rzw133, Rxy134, Rzw134, Rxy135, Rzw135, Rxy136, Rzw136, Rxy137, Rzw137, Rx
 y138, Rzw138, Rxy139, Rzw139, Rxy140, Rzw140, Rxy141, Rzw141, Rxy142, Rzw142, Rxy143, Rzw143, Rxy144, Rzw144, Rxy145, Rzw145, Rxy146, Rzw146, Rxy147, Rzw147, Rxy148, Rzw148, Rxy149, Rzw149, Rxy150, Rzw150, Rxy151, Rzw151, Rxy152, Rzw152, Rxy153, Rzw153, Rxy154, Rzw154, Rxy155, Rzw155, Rxy156, Rzw156, Rxy157, Rzw157, Rxy158, Rzw158, Rxy159, Rzw159, Rxy160, Rzw160, Rxy161, Rzw161, Rxy162, Rzw162, Rxy163, Rzw163, Rxy164, Rzw164, Rxy165, Rzw165, Rxy166, Rzw166, Rxy167, Rzw167, Rxy168, Rzw168, Rxy169, Rzw169, Rxy170, Rzw170, Rxy171, Rzw171, Rxy172, Rzw172, Rxy173, Rzw173, Rxy174, Rzw174, Rxy175, Rzw175, Rxy176, Rzw176, Rxy177, Rzw177, Rxy178, Rzw178, Rxy179, Rzw179, Rxy180, Rzw180, Rxy181, Rzw181, Rxy182, Rzw182, Rxy183, Rzw183, Rxy184, Rzw184, Rxy185, Rzw185, Rxy186, Rzw186, Rxy187, Rzw187, Rxy188, Rzw188, Rxy189, Rzw189, Rxy190, Rzw190, Rxy191, Rzw191, Rxy1000, Rzw1000, Rxy1001, Rzw1001, Rxy1002, Rzw1002, Rxy1003, Rzw1003, Rxy1004, Rzw1004, Rxy1005, Rzw1005, Rxy1006, Rzw1006, R
 xy1007, Rzw1007, Rxy1008, Rzw1008, Rxy1009, Rzw1009, Rxy1010, Rzw1010, Rxy1011, Rzw1011, Rxy1012, Rzw1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2XY.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2XY.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2XY.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2XY.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesV2XY.td ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Rxy1, Rxy2, Rxy3, Rxy4, Rxy5, Rxy6, Rxy7, Rxy8, Rxy9, Rxy10, Rxy11, Rxy12, Rxy13, Rxy14, Rxy15, Rxy16, Rxy17, Rxy18, Rxy19, Rxy20, Rxy21, Rxy22, Rxy23, Rxy24, Rxy25, Rxy26, Rxy27, Rxy28, Rxy29, Rxy30, Rxy31, Rxy32, Rxy33, Rxy34, Rxy35, Rxy36, Rxy37, Rxy38, Rxy39, Rxy40, Rxy41, Rxy42, Rxy43, Rxy44, Rxy45, Rxy46, Rxy47, Rxy48, Rxy49, Rxy50, Rxy51, Rxy52, Rxy53, Rxy54, Rxy55, Rxy56, Rxy57, Rxy58, Rxy59, Rxy60, Rxy61, Rxy62, Rxy63, Rxy64, Rxy65, Rxy66, Rxy67, Rxy68, Rxy69, Rxy70, Rxy71, Rxy72, Rxy73, Rxy74, Rxy75, Rxy76, Rxy77, Rxy78, Rxy79, Rxy80, Rxy81, Rxy82, Rxy83, Rxy84, Rxy85, Rxy86, Rxy87, Rxy88, Rxy89, Rxy90, Rxy91, Rxy92, Rxy93, Rxy94, Rxy95, Rxy96, Rxy97, Rxy98, Rxy99, Rxy100, Rxy101, Rxy102, Rxy103, Rxy104, Rxy105, Rxy106, Rxy107, Rxy108, Rxy109, Rxy110, Rxy111, Rxy112, Rxy113, Rxy114, Rxy115, Rxy116, Rxy117, Rxy118, Rxy119, Rxy120, Rxy121, Rxy122, Rxy123, Rxy124, Rxy125, Rxy126, Rxy127, Rxy128, Rxy129, Rxy130, Rxy131, Rxy132, Rxy133, Rxy134, Rxy135, Rxy136, Rxy137, R
 xy138, Rxy139, Rxy140, Rxy141, Rxy142, Rxy143, Rxy144, Rxy145, Rxy146, Rxy147, Rxy148, Rxy149, Rxy150, Rxy151, Rxy152, Rxy153, Rxy154, Rxy155, Rxy156, Rxy157, Rxy158, Rxy159, Rxy160, Rxy161, Rxy162, Rxy163, Rxy164, Rxy165, Rxy166, Rxy167, Rxy168, Rxy169, Rxy170, Rxy171, Rxy172, Rxy173, Rxy174, Rxy175, Rxy176, Rxy177, Rxy178, Rxy179, Rxy180, Rxy181, Rxy182, Rxy183, Rxy184, Rxy185, Rxy186, Rxy187, Rxy188, Rxy189, Rxy190, Rxy191, Rxy1000, Rxy1001, Rxy1002, Rxy1003, Rxy1004, Rxy1005, Rxy1006, Rxy1007, Rxy1008, Rxy1009, Rxy1010, Rxy1011, Rxy1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2ZW.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2ZW.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2ZW.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2ZW.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesV2ZW.td ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+Rzw1, Rzw2, Rzw3, Rzw4, Rzw5, Rzw6, Rzw7, Rzw8, Rzw9, Rzw10, Rzw11, Rzw12, Rzw13, Rzw14, Rzw15, Rzw16, Rzw17, Rzw18, Rzw19, Rzw20, Rzw21, Rzw22, Rzw23, Rzw24, Rzw25, Rzw26, Rzw27, Rzw28, Rzw29, Rzw30, Rzw31, Rzw32, Rzw33, Rzw34, Rzw35, Rzw36, Rzw37, Rzw38, Rzw39, Rzw40, Rzw41, Rzw42, Rzw43, Rzw44, Rzw45, Rzw46, Rzw47, Rzw48, Rzw49, Rzw50, Rzw51, Rzw52, Rzw53, Rzw54, Rzw55, Rzw56, Rzw57, Rzw58, Rzw59, Rzw60, Rzw61, Rzw62, Rzw63, Rzw64, Rzw65, Rzw66, Rzw67, Rzw68, Rzw69, Rzw70, Rzw71, Rzw72, Rzw73, Rzw74, Rzw75, Rzw76, Rzw77, Rzw78, Rzw79, Rzw80, Rzw81, Rzw82, Rzw83, Rzw84, Rzw85, Rzw86, Rzw87, Rzw88, Rzw89, Rzw90, Rzw91, Rzw92, Rzw93, Rzw94, Rzw95, Rzw96, Rzw97, Rzw98, Rzw99, Rzw100, Rzw101, Rzw102, Rzw103, Rzw104, Rzw105, Rzw106, Rzw107, Rzw108, Rzw109, Rzw110, Rzw111, Rzw112, Rzw113, Rzw114, Rzw115, Rzw116, Rzw117, Rzw118, Rzw119, Rzw120, Rzw121, Rzw122, Rzw123, Rzw124, Rzw125, Rzw126, Rzw127, Rzw128, Rzw129, Rzw130, Rzw131, Rzw132, Rzw133, Rzw134, Rzw135, Rzw136, Rzw137, R
 zw138, Rzw139, Rzw140, Rzw141, Rzw142, Rzw143, Rzw144, Rzw145, Rzw146, Rzw147, Rzw148, Rzw149, Rzw150, Rzw151, Rzw152, Rzw153, Rzw154, Rzw155, Rzw156, Rzw157, Rzw158, Rzw159, Rzw160, Rzw161, Rzw162, Rzw163, Rzw164, Rzw165, Rzw166, Rzw167, Rzw168, Rzw169, Rzw170, Rzw171, Rzw172, Rzw173, Rzw174, Rzw175, Rzw176, Rzw177, Rzw178, Rzw179, Rzw180, Rzw181, Rzw182, Rzw183, Rzw184, Rzw185, Rzw186, Rzw187, Rzw188, Rzw189, Rzw190, Rzw191, Rzw1000, Rzw1001, Rzw1002, Rzw1003, Rzw1004, Rzw1005, Rzw1006, Rzw1007, Rzw1008, Rzw1009, Rzw1010, Rzw1011, Rzw1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV4.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV4.td?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV4.td (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV4.td Tue Aug 14 16:38:58 2012
@@ -0,0 +1,14 @@
+//===-- AMDILRegisterUsesV4.td --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127, R128, R129, R130, R131, R132, R133, R134, R135, R136, R137, R138, R139, R140, R141, R142, R143, R144, R145, R146, R147, R148, R149, R150, R151, R152, R153, R154, R155, R156, R157, R158, R159, R160, R161, R162, R163, R164, R165, R166, R167, R168, R169, R170, R171, R172, R173, R174, R175, R176, R177, R178, R179, R180, R181, R182, R183,
  R184, R185, R186, R187, R188, R189, R190, R191, R1000, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, R1009, R1010, R1011, R1012
\ No newline at end of file

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,213 @@
+//===-- AMDILSIAsmPrinter.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILSIAsmPrinter.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+
+// TODO: Add support for verbose.
+AMDILSIAsmPrinter::AMDILSIAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
+  : AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
+{
+}
+
+AMDILSIAsmPrinter::~AMDILSIAsmPrinter()
+{
+}
+//
+// @param name
+// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
+// and returns that name if both of the tokens are present.
+//
+static
+std::string Strip(const std::string &name)
+{
+  size_t start = name.find("__OpenCL_");
+  size_t end = name.find("_kernel");
+  if (start == std::string::npos
+      || end == std::string::npos
+      || (start == end)) {
+    return name;
+  } else {
+    return name.substr(9, name.length()-16);
+  }
+}
+void
+AMDILSIAsmPrinter::emitMacroFunc(const MachineInstr *MI,
+                                 OSTREAM_TYPE &O)
+{
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  const char *name = "unknown";
+  llvm::StringRef nameRef;
+  if (MI->getOperand(0).isGlobal()) {
+    nameRef = MI->getOperand(0).getGlobal()->getName();
+    name = nameRef.data();
+  }
+  if (!::strncmp(name, "__fma_f32", 9) && curTarget->device()->usesHardware(
+        AMDILDeviceInfo::FMA)) {
+    name = "__hwfma_f32";
+  }
+  emitMCallInst(MI, O, name);
+}
+
+bool
+AMDILSIAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
+{
+  this->MF = &lMF;
+  mMeta->setMF(&lMF);
+  mMFI = lMF.getInfo<AMDILMachineFunctionInfo>();
+  mAMI = &(lMF.getMMI().getObjFileInfo<AMDILModuleInfo>());
+
+  SetupMachineFunction(lMF);
+  std::string kernelName = MF->getFunction()->getName();
+  mName = Strip(kernelName);
+
+  mKernelName = kernelName;
+  EmitFunctionHeader();
+  EmitFunctionBody();
+  return false;
+}
+
+void
+AMDILSIAsmPrinter::EmitInstruction(const MachineInstr *II)
+{
+  std::string FunStr;
+  raw_string_ostream OFunStr(FunStr);
+  formatted_raw_ostream O(OFunStr);
+  const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
+  if (mDebugMode) {
+    O << ";" ;
+    II->print(O);
+  }
+  if (isMacroFunc(II)) {
+    emitMacroFunc(II, O);
+    O.flush();
+    OutStreamer.EmitRawText(StringRef(FunStr));
+    return;
+  }
+
+  if (isMacroCall(II)) {
+    unsigned reg = 0;
+    unsigned newDst = 0;
+    OpSwizzle opSwiz, oldSwiz;
+    const char *name = mTM->getInstrInfo()->getName(II->getOpcode()) + 5;
+    if (!::strncmp(name, "__fma_f32", 9)
+        && curTarget->device()->usesHardware(
+          AMDILDeviceInfo::FMA)) {
+      name = "__hwfma_f32";
+    }
+    //assert(0 &&
+    //"Found a macro that is still in use!");
+    int macronum = amd::MacroDBFindMacro(name);
+    O << "\t;"<< name<<"\n";
+    O << "\tmcall("<<macronum<<") ";
+    reg = II->getOperand(0).getReg();
+    newDst = AMDIL::R1000;
+    oldSwiz.u8all = opSwiz.u8all =
+                      II->getOperand(0).getTargetFlags();
+    if (isXComponentReg(reg)) {
+      newDst = AMDIL::Rx1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isYComponentReg(reg)) {
+      newDst = AMDIL::Ry1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isZComponentReg(reg)) {
+      newDst = AMDIL::Rz1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isWComponentReg(reg)) {
+      newDst = AMDIL::Rw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isXYComponentReg(reg)) {
+      newDst = AMDIL::Rxy1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else if (isZWComponentReg(reg)) {
+      newDst = AMDIL::Rzw1000;
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_DST_DFLT;
+    }
+    for (unsigned x = 0, y = II->getNumOperands(); x < y; ++x) {
+      if (!x) {
+        O << "(";
+        O << getRegisterName(newDst);
+        O << getDstSwizzle(opSwiz.bits.swizzle);
+      } else {
+        printOperand(II, x
+                     , O
+                    );
+      }
+      if (!x) {
+        O << "), (";
+      } else if (x != y - 1) {
+        O << ", ";
+      } else {
+        O << ")\n";
+      }
+    }
+    O << "\tmov " << getRegisterName(reg) << getDstSwizzle(oldSwiz.bits.swizzle)
+      << ", " << getRegisterName(newDst);
+    if (isXComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_X000);
+    } else if (isYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_0X00);
+    } else if (isZComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00X0);
+    } else if (isWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_000X);
+    } else if (isXYComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_XY00);
+    } else if (isZWComponentReg(reg)) {
+      O << getSrcSwizzle(AMDIL_SRC_00XY);
+    } else {
+      O << getSrcSwizzle(AMDIL_SRC_DFLT);
+    }
+    O << "\n";
+    if (curTarget->device()->isSupported(
+          AMDILDeviceInfo::MacroDB)) {
+      mMacroIDs.insert(macronum);
+    } else {
+      mMFI->addCalledIntr(macronum);
+    }
+  } else {
+
+    // Print the assembly for the instruction.
+    // We want to make sure that we do HW constants
+    // before we do arena segment
+    printInstruction(II, O);
+  }
+  O.flush();
+  OutStreamer.EmitRawText(StringRef(FunStr));
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,59 @@
+//===-- AMDILSIAsmPrinter.h -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_SI_ASM_PRINTER_H_
+#define _AMDIL_SI_ASM_PRINTER_H_
+#include "AMDILAsmPrinter.h"
+
+namespace llvm
+{
+class LLVM_LIBRARY_VISIBILITY AMDILSIAsmPrinter : public AMDILAsmPrinter
+{
+public:
+  //
+  // Constructor for the AMDIL SI specific AsmPrinter class.
+  // Interface is defined by LLVM proper and should reference
+  // there for more information.
+  //
+  AMDILSIAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS);
+
+  //
+  // Destructor for the EG Asm Printer class that deletes
+  // all of the allocated memory
+  //
+  virtual ~AMDILSIAsmPrinter();
+
+  void
+  EmitInstruction(const MachineInstr *MI);
+
+  //
+  // @param F MachineFunction to print the assembly for
+  // @brief parse the specified machine function and print
+  // out the assembly for all the instructions in the function
+  //
+  bool
+  runOnMachineFunction(MachineFunction &F);
+
+protected:
+  //
+  // @param MI Machine instruction to emit the macro code for
+  //
+  // Emits a fully functional macro function that uses the argument
+  // registers as the macro arguments.
+  //
+  virtual void
+  emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
+
+}; // AMDILSIAsmPrinter
+} // end of llvm namespace
+#endif // _AMDIL_SI_ASM_PRINTER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,206 @@
+//===-- AMDILSIDevice.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILSIDevice.h"
+#include "AMDILSubtarget.h"
+#include "AMDILSIIOExpansion.h"
+#include "AMDILSIPointerManager.h"
+#include "AMDILSIAsmPrinter.h"
+
+using namespace llvm;
+
+AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
+  : AMDILCaymanDevice(ST)
+{
+  setCaps();
+  std::string name = ST->getDeviceName();
+  if (name == "tahiti") {
+    mDeviceFlag = OCL_DEVICE_TAHITI;
+  } else if (name == "pitcairn") {
+    mDeviceFlag = OCL_DEVICE_PITCAIRN;
+  } else if (name == "dogs") {    // "dogs" for Oland, which is CI series but has a SI core
+    mDeviceFlag = OCL_DEVICE_DOGS;
+  } else {
+    mDeviceFlag = OCL_DEVICE_CAPEVERDE;
+  }
+}
+
+AMDILSIDevice::~AMDILSIDevice()
+{
+}
+
+void
+AMDILSIDevice::setCaps()
+{
+  mHWBits.set(AMDILDeviceInfo::PrivateUAV);
+  mHWBits.reset(AMDILDeviceInfo::ArenaUAV);
+  mSWBits.reset(AMDILDeviceInfo::ArenaSegment);
+  mHWBits.reset(AMDILDeviceInfo::ArenaSegment);
+  mHWBits.set(AMDILDeviceInfo::ByteStores);
+  if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
+    mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+    mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
+  }
+  if (!mSTM->isApple()) {
+    if (mSTM->isOverride(AMDILDeviceInfo::Images)) {
+      mHWBits.set(AMDILDeviceInfo::Images);
+    }
+  } else {
+    mHWBits.set(AMDILDeviceInfo::Images);
+  }
+  if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+    mHWBits.set(AMDILDeviceInfo::CachedMem);
+  }
+  mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+  mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+  mHWBits.set(AMDILDeviceInfo::LongOps);
+  mSWBits.reset(AMDILDeviceInfo::LongOps);
+  mHWBits.set(AMDILDeviceInfo::TmrReg);
+  mHWBits.set(AMDILDeviceInfo::PPAMode);
+  // The software mode is enabled until global memory has
+  // been verified, then we can enable constant/private/local/region
+  // memory in hw mode.
+  //mHWBits.reset(AMDILDeviceInfo::ConstantMem);
+  //mHWBits.reset(AMDILDeviceInfo::PrivateMem);
+  //mSWBits.set(AMDILDeviceInfo::ConstantMem);
+  //mSWBits.set(AMDILDeviceInfo::PrivateMem);
+  //mHWBits.set(AMDILDeviceInfo::LocalMem);
+  //mHWBits.set(AMDILDeviceInfo::RegionMem);
+}
+
+uint32_t
+AMDILSIDevice::getGeneration() const
+{
+  return AMDILDeviceInfo::HD7XXX;
+}
+
+uint32_t
+AMDILSIDevice::getMaxNumUAVs() const
+{
+  return 1024;
+}
+uint32_t
+AMDILSIDevice::getResourceID(uint32_t id) const
+{
+  switch(id) {
+  default:
+    assert(0 && "ID type passed in is unknown!");
+    break;
+  case ARENA_UAV_ID:
+    assert(0 && "Arena UAV is not supported on SI device.");
+  case GLOBAL_ID:
+  case CONSTANT_ID:
+  case RAW_UAV_ID:
+    return 9;
+  case LDS_ID:
+    if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+      return DEFAULT_LDS_ID;
+    } else {
+      return getResourceID(GLOBAL_ID);
+    }
+  case GDS_ID:
+    if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+      return DEFAULT_GDS_ID;
+    } else {
+      return getResourceID(GLOBAL_ID);
+    }
+  case SCRATCH_ID:
+    if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+      return 8;
+    } else {
+      return getResourceID(GLOBAL_ID);
+    }
+  };
+  return 0;
+}
+FunctionPass*
+AMDILSIDevice::getIOExpansion(
+  TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+  return new AMDILSIIOExpansion(TM, OptLevel);
+}
+
+AsmPrinter*
+AMDILSIDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
+{
+  return new AMDILSIAsmPrinter(ASM_PRINTER_ARGUMENTS);
+}
+
+FunctionPass*
+AMDILSIDevice::getPointerManager(
+  TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+{
+  return new AMDILSIPointerManager(TM, OptLevel);
+}
+
+AMDILSIDevice32::AMDILSIDevice32(AMDILSubtarget *ST)
+  : AMDILSIDevice(ST)
+{
+}
+
+AMDILSIDevice32::~AMDILSIDevice32()
+{
+}
+
+std::string
+AMDILSIDevice32::getDataLayout() const
+{
+  return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+                     "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+                     "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+                     "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+                     "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"
+                     "-n8:16:32:64");
+}
+#if 0
+AMDILSIDevice64on32::AMDILSIDevice32(AMDILSubtarget *ST)
+  : AMDILSIDevice(ST)
+{
+}
+
+AMDILSIDevice64on32::~AMDILSIDevice64on32()
+{
+}
+
+std::string
+AMDILSIDevice64on32::getDataLayout() const
+{
+  return std::string("e-p:64:32:32-i1:8:8-i8:8:8-i16:16:16"
+                     "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+                     "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+                     "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+                     "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"
+                     "-n8:16:32:64");
+}
+#endif
+
+AMDILSIDevice64::AMDILSIDevice64(AMDILSubtarget *ST)
+  : AMDILSIDevice(ST)
+{
+}
+
+AMDILSIDevice64::~AMDILSIDevice64()
+{
+}
+
+std::string
+AMDILSIDevice64::getDataLayout() const
+{
+  return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+                     "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+                     "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+                     "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+                     "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+                     "-n8:16:32:64");
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,76 @@
+//===-- AMDILSIDevice.h ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file will define the interface that the SI generation needs to
+// implement in order to correctly answer queries on the capabilities of the specific
+// hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILSIDEVICE_H_
+#define _AMDILSIDEVICE_H_
+#include "AMDILNIDevice.h"
+namespace llvm
+{
+class AMDILSubtarget;
+
+// Device that matches the SI family. The SI family is a scalar architecture
+// with a vector unit to do math. The SI device has 8/16 bit native load/store
+// instructions, 1024 UAVs and no arena. It
+//
+class AMDILSIDevice : public AMDILCaymanDevice
+{
+protected:
+  AMDILSIDevice(AMDILSubtarget*);
+  FunctionPass*
+  getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+  AsmPrinter*
+  getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
+  FunctionPass*
+  getPointerManager(TargetMachine&, CodeGenOpt::Level) const;
+
+public:
+  virtual ~AMDILSIDevice();
+  virtual uint32_t getGeneration() const;
+  virtual uint32_t getMaxNumUAVs() const;
+  virtual uint32_t getResourceID(uint32_t) const;
+  virtual std::string getDataLayout() const = 0;
+protected:
+  virtual void setCaps();
+}; // AMDILSIDevice
+// 32bit SI device
+class AMDILSIDevice32 : public AMDILSIDevice
+{
+public:
+  AMDILSIDevice32(AMDILSubtarget*);
+  virtual ~AMDILSIDevice32();
+  virtual std::string getDataLayout() const;
+}; // AMDILSIDevice32
+#if 0
+// 64bit ptr 32bit SI device
+class AMDILSIDevice64on32 : public AMDILSIDevice
+{
+public:
+  AMDILSIDevice64on32(AMDILSubtarget*);
+  virtual ~AMDILSIDevice64on32();
+  virtual std::string getDataLayout() const;
+}; // AMDILSIDevice64on32
+#endif
+// 64bit SI device
+class AMDILSIDevice64 : public AMDILSIDevice
+{
+public:
+  AMDILSIDevice64(AMDILSubtarget*);
+  virtual ~AMDILSIDevice64();
+  virtual std::string getDataLayout() const;
+}; // AMDILSIDevice64
+static const unsigned int MAX_LDS_SIZE_1000 = AMDILDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+
+#endif // _AMDILSIDEVICE_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,259 @@
+//===-- AMDILSIIOExpansion.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the I/O expansion class for SI devices.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILSIIOExpansion.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cstdio>
+using namespace llvm;
+AMDILSIIOExpansion::AMDILSIIOExpansion(TargetMachine &tm,
+                                       CodeGenOpt::Level OptLevel) : AMDILEGIOExpansion(tm, OptLevel)
+{
+}
+
+AMDILSIIOExpansion::~AMDILSIIOExpansion()
+{
+}
+const char *AMDILSIIOExpansion::getPassName() const
+{
+  return "AMDIL SI IO Expansion Pass";
+}
+
+bool
+AMDILSIIOExpansion::isCacheableOp(MachineInstr *MI)
+{
+  AMDILAS::InstrResEnc curRes;
+  getAsmPrinterFlags(MI, curRes);
+  return curRes.bits.CacheableRead;
+}
+
+bool
+AMDILSIIOExpansion::isIOInstruction(TargetMachine &TM, MachineInstr *MI)
+{
+  if (!MI) {
+    return false;
+  }
+  if (is64BitImageInst(TM, MI)) {
+    return true;
+  }
+  return AMDILEGIOExpansion::isIOInstruction(MI);
+}
+
+void
+AMDILSIIOExpansion::expandIOInstruction(TargetMachine &TM, MachineInstr *MI)
+{
+  assert(isIOInstruction(TM, MI) && "Must be an IO instruction to "
+         "be passed to this function!");
+  if (is64BitImageInst(TM, MI)) {
+    if (isReadImageInst(TM, MI) || isImageTXLDInst(TM, MI)) {
+      expandImageLoad(mBB, MI);
+      return;
+    }
+    if (isWriteImageInst(TM, MI)) {
+      expandImageStore(mBB, MI);
+      return;
+    }
+    if (isImageInfoInst(TM, MI)) {
+      expandImageParam(mBB, MI);
+      return;
+    }
+  }
+  AMDILEGIOExpansion::expandIOInstruction(MI);
+}
+
+void
+AMDILSIIOExpansion::expandGlobalLoad(MachineInstr *MI)
+{
+  // These instructions are generated before the current MI.
+  expandLoadStartCode(MI);
+  DebugLoc DL = MI->getDebugLoc();
+  uint32_t ID = getPointerID(MI);
+  bool cacheable = isCacheableOp(MI);
+  bool is64bit = is64bitLSOp(TM, MI);
+  bool aligned = mSTM->calVersion() >= CAL_CACHED_ALIGNED_UAVS;
+  mKM->setOutputInst();
+  uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+  switch (getMemorySize(MI)) {
+  default:
+    if (cacheable) {
+      if (aligned) {
+        BuildMI(*mBB, MI, DL, mTII->get((is64bit) ?
+                                        AMDIL::UAVRAWLOADCACHEDALIGNED64_v4i32
+                                        : AMDIL::UAVRAWLOADCACHEDALIGNED_v4i32),
+                AMDIL::R1011).addReg(addyReg).addImm(ID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(
+                  (is64bit) ? AMDIL::UAVRAWLOADCACHED64_v4i32 :
+                  AMDIL::UAVRAWLOADCACHED_v4i32),
+                AMDIL::R1011).addReg(addyReg).addImm(ID);
+      }
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(
+                (is64bit) ? AMDIL::UAVRAWLOAD_v4i32 :
+                AMDIL::UAVRAWLOAD_v4i32),
+              AMDIL::R1011)
+      .addReg(addyReg)
+      .addImm(ID);
+    }
+    break;
+  case 1:
+    if (cacheable) {
+      BuildMI(*mBB, MI, DL, mTII->get((isSWSExtLoadInst(MI)
+                                       ? ((is64bit) ? AMDIL::UAVRAWLOADCACHED64_i8
+                                          : AMDIL::UAVRAWLOADCACHED_i8) :
+                                         ((is64bit) ? AMDIL::UAVRAWLOADCACHED64_u8
+                                          : AMDIL::UAVRAWLOADCACHED_u8))),
+                AMDIL::Rx1011)
+        .addReg(addyReg)
+        .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get((isSWSExtLoadInst(MI)
+                                       ? ((is64bit) ? AMDIL::UAVRAWLOAD64_i8 : AMDIL::UAVRAWLOAD_i8) :
+                                         ((is64bit) ? AMDIL::UAVRAWLOAD64_u8 : AMDIL::UAVRAWLOAD_u8))),
+                AMDIL::Rx1011)
+        .addReg(addyReg)
+        .addImm(ID);
+    }
+    break;
+  case 2:
+    if (cacheable) {
+      BuildMI(*mBB, MI, DL, mTII->get((isSWSExtLoadInst(MI)
+                                       ?  ((is64bit) ? AMDIL::UAVRAWLOADCACHED64_i16
+                                           : AMDIL::UAVRAWLOADCACHED_i16) :
+                                         ((is64bit) ? AMDIL::UAVRAWLOADCACHED64_u16
+                                          : AMDIL::UAVRAWLOADCACHED_u16))),
+                AMDIL::Rx1011)
+        .addReg(addyReg)
+        .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get((isSWSExtLoadInst(MI)
+                                       ? ((is64bit) ? AMDIL::UAVRAWLOAD64_i16 : AMDIL::UAVRAWLOAD_i16) :
+                                         ((is64bit) ? AMDIL::UAVRAWLOAD64_u16 : AMDIL::UAVRAWLOAD_u16))),
+                AMDIL::Rx1011)
+        .addReg(addyReg)
+        .addImm(ID);
+    }
+    break;
+  case 4:
+    if (cacheable) {
+      BuildMI(*mBB, MI, DL, mTII->get(
+                (is64bit) ? AMDIL::UAVRAWLOADCACHED64_i32 : AMDIL::UAVRAWLOADCACHED_i32),
+              AMDIL::Rx1011)
+      .addReg(addyReg)
+      .addImm(ID);
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(
+                (is64bit) ? AMDIL::UAVRAWLOAD64_i32 : AMDIL::UAVRAWLOAD_i32),
+              AMDIL::Rx1011)
+      .addReg(addyReg)
+      .addImm(ID);
+    }
+    break;
+  case 8:
+    if (cacheable) {
+      if (aligned) {
+        BuildMI(*mBB, MI, DL, mTII->get(
+                  (is64bit) ? AMDIL::UAVRAWLOADCACHEDALIGNED64_v2i32
+                  : AMDIL::UAVRAWLOADCACHEDALIGNED_v2i32),
+                AMDIL::Rxy1011).addReg(addyReg).addImm(ID);
+      } else {
+        BuildMI(*mBB, MI, DL, mTII->get(
+                  (is64bit) ? AMDIL::UAVRAWLOADCACHED64_v2i32 : AMDIL::UAVRAWLOADCACHED_v2i32),
+                AMDIL::Rxy1011).addReg(addyReg).addImm(ID);
+      }
+    } else {
+      BuildMI(*mBB, MI, DL, mTII->get(
+                (is64bit) ? AMDIL::UAVRAWLOAD64_v2i32 : AMDIL::UAVRAWLOAD_v2i32),
+              AMDIL::Rxy1011)
+      .addReg(addyReg)
+      .addImm(ID);
+    }
+    break;
+  };
+  expandPackedData(MI);
+  unsigned dataReg = expandExtendLoad(MI);
+  if (!dataReg) {
+    dataReg = getDataReg(MI);
+  }
+  BuildMI(*mBB, MI, MI->getDebugLoc(),
+          mTII->get(getMoveInstFromID(
+                      MI->getDesc().OpInfo[0].RegClass)))
+  .addOperand(MI->getOperand(0))
+  .addReg(dataReg);
+  MI->getOperand(0).setReg(dataReg);
+}
+
+void
+AMDILSIIOExpansion::expandGlobalStore(MachineInstr *MI)
+{
+  // These instructions are expandted before the current MI.
+  AMDIL789IOExpansion::expandStoreSetupCode(MI);
+  uint32_t ID = getPointerID(MI);
+  mKM->setOutputInst();
+  bool is64bit = is64bitLSOp(TM, MI);
+  DebugLoc DL = MI->getDebugLoc();
+  uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+  switch (getMemorySize(MI)) {
+  default:
+    BuildMI(*mBB, MI, DL, mTII->get(
+              (is64bit) ? AMDIL::UAVRAWSTORE64_v4i32 :
+              AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
+    .addReg(addyReg)
+    .addReg(AMDIL::R1011)
+    .addImm(ID);
+    break;
+  case 1:
+    BuildMI(*mBB, MI, DL, mTII->get(
+              (is64bit) ? AMDIL::UAVRAWSTORE64_i8 :
+              AMDIL::UAVRAWSTORE_i8), AMDIL::MEMx)
+    .addReg(addyReg)
+    .addReg(AMDIL::Rx1011)
+    .addImm(ID);
+    break;
+  case 2:
+    BuildMI(*mBB, MI, DL, mTII->get(
+              (is64bit) ? AMDIL::UAVRAWSTORE64_i16 :
+              AMDIL::UAVRAWSTORE_i16), AMDIL::MEMx)
+    .addReg(addyReg)
+    .addReg(AMDIL::Rx1011)
+    .addImm(ID);
+    break;
+  case 4:
+    BuildMI(*mBB, MI, DL, mTII->get(
+              (is64bit) ? AMDIL::UAVRAWSTORE64_i32 :
+              AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
+    .addReg(addyReg)
+    .addReg(AMDIL::Rx1011)
+    .addImm(ID);
+    break;
+  case 8:
+    BuildMI(*mBB, MI, DL, mTII->get(
+              (is64bit) ? AMDIL::UAVRAWSTORE64_v2i32 :
+              AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEMxy)
+    .addReg(addyReg)
+    .addReg(AMDIL::Rxy1011)
+    .addImm(ID);
+    break;
+  };
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,42 @@
+//===-- AMDILSIIOExpansion.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_SIIO_EXPANSION_H_
+#define _AMDIL_SIIO_EXPANSION_H_
+#include "AMDILIOExpansion.h"
+namespace llvm
+{
+// Class that expands IO instructions for the SI family of devices.
+// The Global Load/Store functions need to be overloaded from the EG
+// class as an arena is not a valid operation on SI, but are valid
+// on the EG/NI devices.
+class AMDILSIIOExpansion : public AMDILEGIOExpansion
+{
+public:
+  AMDILSIIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+  virtual ~AMDILSIIOExpansion();
+  const char* getPassName() const;
+protected:
+  virtual bool
+  isIOInstruction(TargetMachine &TM, MachineInstr *MI);
+  virtual void
+  expandIOInstruction(TargetMachine &TM, MachineInstr *MI);
+  void
+  expandGlobalStore(MachineInstr *MI);
+  void
+  expandGlobalLoad(MachineInstr *MI);
+  virtual bool
+  isCacheableOp(MachineInstr* MI);
+}; // class AMDILSIIOExpansion
+} // namespace llvm
+#endif // _AMDIL_SIIO_EXPANSION_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,368 @@
+//===-- AMDILSIPointerManager.cpp -----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILPointerManagerImpl.h"
+#include "AMDILSIPointerManager.h"
+#include "AMDILCompilerErrors.h"
+#include "AMDILDeviceInfo.h"
+#include "AMDILKernelManager.h"
+#include "AMDILMachineFunctionInfo.h"
+#include "AMDILModuleInfo.h"
+#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include <iostream>
+#include <set>
+#include <map>
+#include <list>
+#include <queue>
+#include <iomanip>
+using namespace llvm;
+namespace
+{
+class AMDILSIPointerManagerImpl : public AMDILPointerManagerImpl
+{
+public:
+  AMDILSIPointerManagerImpl(MachineFunction &mf, TargetMachine &tm)
+    : AMDILPointerManagerImpl(mf, tm) {}
+  virtual ~AMDILSIPointerManagerImpl() {}
+protected:
+  virtual void annotateBytePtrs();
+  virtual void annotateRawPtrs();
+  virtual void annotateCacheablePtrs();
+  virtual void annotateCacheableInstrs();
+};
+}
+
+// A byte pointer is a pointer that along the pointer path has a
+// byte store assigned to it.
+void
+AMDILSIPointerManagerImpl::annotateBytePtrs()
+{
+  PtrSet::iterator siBegin, siEnd;
+  std::vector<MachineInstr*>::iterator miBegin, miEnd;
+  AMDILMachineFunctionInfo *mMFI = NULL;
+  for (siBegin = bytePtrs.begin(), siEnd = bytePtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    const PointerType *PT = dyn_cast<PointerType>(siBegin->second->getType());
+    if (!PT) {
+      continue;
+    }
+    if (conflictPtrs.count(*siBegin)) {
+      continue;
+    }
+    assert(!rawPtrs.count(*siBegin) && "Found a byte pointer "
+           "that also exists as a raw pointer!");
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      MachineInstr *MI = (*miBegin);
+      if (DEBUGME) {
+        dbgs() << "Annotating pointer as arena. Inst: ";
+        (*miBegin)->dump();
+      }
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+
+      if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
+          && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+        // If hardware constant mem is enabled, then we need to
+        // get the constant pointer CB number and use that to specify
+        // the resource ID.
+        MachineFunction *MF = (*miBegin)->getParent()->getParent();
+        AMDILModuleInfo *mAMI = &(MF->getMMI().getObjFileInfo<AMDILModuleInfo>());
+        const StringRef funcName = (*miBegin)->getParent()->getParent()
+                                   ->getFunction()->getName();
+        if (mAMI->isKernel(funcName)) {
+          const AMDILKernel *krnl = mAMI->getKernel(funcName);
+          curRes.bits.ResourceID = mAMI->getConstPtrCB(krnl,
+                                   siBegin->second->getName());
+          curRes.bits.HardwareInst = 1;
+        } else {
+          curRes.bits.ResourceID = STM->device()
+                                   ->getResourceID(AMDILDevice::CONSTANT_ID);
+        }
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+                 && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+        // If hardware local mem is enabled, get the local mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::LDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          MI->getOperand(MI->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
+                 && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+        // If hardware region mem is enabled, get the gds mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::GDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
+                 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::SCRATCH_ID);
+      } else {
+        if (DEBUGME) {
+          dbgs() << __LINE__ << ": Setting byte store bit on instruction: ";
+          (*miBegin)->dump();
+        }
+        curRes.bits.ByteStore = 1;
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::GLOBAL_ID);
+        if (MI->getOperand(MI->getNumOperands() - 1).isImm()) {
+          MI->getOperand(MI->getNumOperands() - 1).setImm(curRes.bits.ResourceID);
+        }
+        if (DEBUGME) {
+          dbgs() << "Annotating pointer as default. Inst: ";
+          (*miBegin)->dump();
+        }
+      }
+      setAsmPrinterFlags(*miBegin, curRes);
+      KM->setUAVID(siBegin->second, curRes.bits.ResourceID);
+      if (!mMFI) {
+        mMFI = (*miBegin)->getParent()->getParent()
+               ->getInfo<AMDILMachineFunctionInfo>();
+      }
+      mMFI->uav_insert(curRes.bits.ResourceID);
+    }
+  }
+}
+
+// A raw pointer is any pointer that does not have byte store in its path.
+// This function is unique to SI devices as arena is not part of it.
+void
+AMDILSIPointerManagerImpl::annotateRawPtrs()
+{
+  PtrSet::iterator siBegin, siEnd;
+  std::vector<MachineInstr*>::iterator miBegin, miEnd;
+  AMDILMachineFunctionInfo *mMFI = NULL;
+  // Now all of the raw pointers will go their own uav ID
+  unsigned id = STM->device()
+                ->getResourceID(AMDILDevice::GLOBAL_ID);
+  for (siBegin = rawPtrs.begin(), siEnd = rawPtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    const PointerType *PT = dyn_cast<PointerType>(siBegin->second->getType());
+    if (!PT) {
+      continue;
+    }
+    if (PT->getAddressSpace() == AMDILAS::GLOBAL_ADDRESS) {
+      // If we have a conflict, we don't change it from default.
+      if (conflictPtrs.count(*siBegin)) {
+        continue;
+      }
+      ++id;
+    }
+    assert(!bytePtrs.count(*siBegin) && "Found a raw pointer "
+           " that also exists as a byte pointers!");
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      if (DEBUGME) {
+        dbgs() << "Annotating pointer as raw. Inst: ";
+        (*miBegin)->dump();
+      }
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+      if (!curRes.bits.ConflictPtr) {
+        assert(!curRes.bits.ByteStore
+               && "Found a instruction that is marked as "
+               "raw but has a byte store bit set!");
+      } else if (curRes.bits.ConflictPtr) {
+        if (curRes.bits.ByteStore) {
+          curRes.bits.ByteStore = 0;
+        }
+      }
+      if (STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)
+          && PT->getAddressSpace() == AMDILAS::CONSTANT_ADDRESS) {
+        // If hardware constant mem is enabled, then we need to
+        // get the constant pointer CB number and use that to specify
+        // the resource ID.
+        MachineFunction *MF = (*miBegin)->getParent()->getParent();
+        AMDILModuleInfo *mAMI = &(MF->getMMI().getObjFileInfo<AMDILModuleInfo>());
+        const StringRef funcName = (*miBegin)->getParent()->getParent()
+                                   ->getFunction()->getName();
+        if (mAMI->isKernel(funcName)) {
+          const AMDILKernel *krnl = mAMI->getKernel(funcName);
+          curRes.bits.ResourceID = mAMI->getConstPtrCB(krnl,
+                                   siBegin->second->getName());
+          curRes.bits.HardwareInst = 1;
+        } else {
+          curRes.bits.ResourceID = STM->device()
+                                   ->getResourceID(AMDILDevice::CONSTANT_ID);
+        }
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+                 && PT->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+        // If hardware local mem is enabled, get the local mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::LDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)
+                 && PT->getAddressSpace() == AMDILAS::REGION_ADDRESS) {
+        // If hardware region mem is enabled, get the gds mem ID from
+        // the device to use as the ResourceID
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::GDS_ID);
+        if (isAtomicInst(TM, *miBegin)) {
+          assert(curRes.bits.ResourceID && "Atomic resource ID "
+                 "cannot be non-zero!");
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+      } else if (STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)
+                 && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+        curRes.bits.ResourceID = STM->device()
+                                 ->getResourceID(AMDILDevice::SCRATCH_ID);
+      } else {
+        const Argument *curArg = dyn_cast<Argument>(siBegin->second);
+        if (!curRes.bits.ConflictPtr || !curArg
+            || curArg->hasNoAliasAttr()
+            || STM->device()->isSupported(AMDILDeviceInfo::NoAlias)) {
+          curRes.bits.ResourceID = id;
+        } else {
+          curRes.bits.ResourceID = STM->device()
+                                   ->getResourceID(AMDILDevice::GLOBAL_ID);
+        }
+        if (isAtomicInst(TM, *miBegin)) {
+          (*miBegin)->getOperand((*miBegin)->getNumOperands()-1)
+          .setImm(curRes.bits.ResourceID);
+        }
+        if (!mMFI) {
+          mMFI = (*miBegin)->getParent()->getParent()
+                 ->getInfo<AMDILMachineFunctionInfo>();
+        }
+        KM->setUAVID(siBegin->second, curRes.bits.ResourceID);
+        mMFI->uav_insert(curRes.bits.ResourceID);
+      }
+      if (DEBUGME) {
+        dbgs() << "Setting pointer to resource ID "
+               << curRes.bits.ResourceID << ": ";
+        siBegin->second->dump();
+      }
+      setAsmPrinterFlags(*miBegin, curRes);
+    }
+  }
+
+}
+// This function annotates the cacheable pointers with the
+// CacheableRead bit.
+void
+AMDILSIPointerManagerImpl::annotateCacheablePtrs()
+{
+  PtrSet::iterator siBegin, siEnd;
+  std::vector<MachineInstr*>::iterator miBegin, miEnd;
+  // First we can check the cacheable pointers
+  for (siBegin = cacheablePtrs.begin(), siEnd = cacheablePtrs.end();
+       siBegin != siEnd; ++siBegin) {
+    assert(!bytePtrs.count(*siBegin) && "Found a cacheable pointer "
+           "that also exists as a byte pointer!");
+    // If we have any kind of conflict, don't add it as cacheable.
+    if (conflictPtrs.count(*siBegin)) {
+      continue;
+    }
+    for (miBegin = PtrToInstMap[siBegin->second].begin(),
+         miEnd = PtrToInstMap[siBegin->second].end();
+         miBegin != miEnd; ++miBegin) {
+      if (DEBUGME) {
+        dbgs() << "Annotating pointer as cacheable. Inst: ";
+        (*miBegin)->dump();
+      }
+      AMDILAS::InstrResEnc curRes;
+      getAsmPrinterFlags(*miBegin, curRes);
+      assert(!curRes.bits.ByteStore && "No cacheable pointers should have the "
+             "byte Store flag set!");
+      curRes.bits.CacheableRead = 1;
+      setAsmPrinterFlags(*miBegin, curRes);
+    }
+  }
+}
+void
+AMDILSIPointerManagerImpl::annotateCacheableInstrs()
+{
+  CacheableInstrSet::iterator miBegin, miEnd;
+
+  for (miBegin = cacheableSet.begin(),
+       miEnd = cacheableSet.end();
+       miBegin != miEnd; ++miBegin) {
+    if (DEBUGME) {
+      dbgs() << "Annotating instr as cacheable. Inst: ";
+      (*miBegin)->dump();
+    }
+    AMDILAS::InstrResEnc curRes;
+    getAsmPrinterFlags(*miBegin, curRes);
+    assert(!curRes.bits.ByteStore && "No cacheable pointers should have the "
+           "byte Store flag set!");
+    curRes.bits.CacheableRead = 1;
+    setAsmPrinterFlags(*miBegin, curRes);
+  }
+}
+
+AMDILSIPointerManager::AMDILSIPointerManager(
+  TargetMachine &tm,
+  CodeGenOpt::Level OL) :
+  AMDILPointerManager(tm, OL)
+{
+}
+
+AMDILSIPointerManager::~AMDILSIPointerManager()
+{
+}
+
+const char*
+AMDILSIPointerManager::getPassName() const
+{
+  return "AMD IL SI Pointer Manager Pass";
+}
+bool
+AMDILSIPointerManager::runOnMachineFunction(MachineFunction &MF)
+{
+  if (DEBUGME) {
+    dbgs() << getPassName() << "\n";
+    dbgs() << MF.getFunction()->getName() << "\n";
+    MF.dump();
+  }
+
+  AMDILSIPointerManagerImpl impl(MF, TM);
+  bool changed = impl.perform();
+
+  clearTempMIFlags(MF);
+
+  return changed;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,39 @@
+//===-- AMDILSIPointerManager.h -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_SI_POINTER_MANAGER_H_
+#define _AMDIL_SI_POINTER_MANAGER_H_
+#include "AMDILPointerManager.h"
+namespace llvm
+{
+class MachineFunction;
+
+// The pointer manager for Southern Island
+// devices. This pointer manager allocates and trackes
+// cached memory, raw resources and
+// whether multi-uav is utilized or not.
+class AMDILSIPointerManager : public AMDILPointerManager
+{
+public:
+  AMDILSIPointerManager(
+    TargetMachine &tm,
+    CodeGenOpt::Level OL);
+  virtual ~AMDILSIPointerManager();
+  virtual const char*
+  getPassName() const;
+  virtual bool
+  runOnMachineFunction(MachineFunction &F);
+private:
+}; // class AMDILEGPointerManager
+} // end llvm namespace
+#endif // _AMDIL_SI_POINTER_MANAGER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,198 @@
+//===-- AMDILSubtarget.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AMD IL specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILSubtarget.h"
+#include "AMDIL.h"
+#include "AMDILDevices.h"
+#include "AMDILKernelManager.h"
+#include "AMDILUtilityFunctions.h"
+#include "AMDILGenSubtarget.inc"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/SubtargetFeature.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_MC_DESC
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "AMDILGenSubtarget.inc"
+
+AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS )
+{
+  memset(CapsOverride, 0, sizeof(*CapsOverride)
+         * AMDILDeviceInfo::MaxNumberCapabilities);
+  // Default card
+  std::string GPU = "rv770";
+  mIs64bit = false;
+  mFlatAddress = false;
+  mVersion = -1U;
+  mMetadata30 = false;
+  SmallVector<StringRef, DEFAULT_VEC_SLOTS> Features;
+  SplitString(FS, Features, ",");
+  mDefaultSize[0] = 64;
+  mDefaultSize[1] = 1;
+  mDefaultSize[2] = 1;
+  std::string newFeatures = "";
+#if defined(_DEBUG) || defined(DEBUG)
+  bool useTest = false;
+#endif
+  for (size_t x = 0; x < Features.size(); ++x) {
+    if (Features[x].startswith("+mwgs")) {
+      SmallVector<StringRef, DEFAULT_VEC_SLOTS> sizes;
+      SplitString(Features[x], sizes, "-");
+      size_t mDim = ::atoi(sizes[1].data());
+      if (mDim > 3) {
+        mDim = 3;
+      }
+      for (size_t y = 0; y < mDim; ++y) {
+        mDefaultSize[y] = ::atoi(sizes[y+2].data());
+      }
+#if defined(_DEBUG) || defined(DEBUG)
+    } else if (!Features[x].compare("test")) {
+      useTest = true;
+#endif
+    } else if (Features[x].startswith("+cal")) {
+      SmallVector<StringRef, DEFAULT_VEC_SLOTS> version;
+      SplitString(Features[x], version, "=");
+      mVersion = ::atoi(version[1].data());
+    } else {
+      GPU = CPU;
+      if (newFeatures.length() > 0) newFeatures += ',';
+      newFeatures += Features[x];
+    }
+  }
+  // If we don't have a version then set it to
+  // -1 which enables everything. This is for
+  // offline devices.
+  if (!mVersion) {
+    mVersion = (uint32_t)-1;
+  }
+  for (int x = 0; x < 3; ++x) {
+    if (!mDefaultSize[x]) {
+      mDefaultSize[x] = 1;
+    }
+  }
+#if defined(_DEBUG) || defined(DEBUG)
+  if (useTest) {
+    GPU = "kauai";
+  }
+#endif
+  ParseSubtargetFeatures(GPU, newFeatures);
+#if defined(_DEBUG) || defined(DEBUG)
+  if (useTest) {
+    GPU = "test";
+  }
+#endif
+  mDevName = GPU;
+  mDevice = getDeviceFromName(mDevName, this, mIs64bit);
+}
+AMDILSubtarget::~AMDILSubtarget()
+{
+  delete mDevice;
+}
+bool
+AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const
+{
+  assert(caps < AMDILDeviceInfo::MaxNumberCapabilities &&
+         "Caps index is out of bounds!");
+  return CapsOverride[caps];
+}
+bool
+AMDILSubtarget::isApple() const
+{
+  return false;
+}
+
+bool
+AMDILSubtarget::overridesFlatAS() const
+{
+  return mFlatAddress;
+}
+
+bool
+AMDILSubtarget::is64bit() const
+{
+  return mIs64bit;
+}
+
+bool
+AMDILSubtarget::isTargetELF() const
+{
+  return false;
+}
+
+bool
+AMDILSubtarget::supportMetadata30() const
+{
+  return mMetadata30;
+}
+
+size_t
+AMDILSubtarget::getDefaultSize(uint32_t dim) const
+{
+  if (dim > 3) {
+    return 1;
+  } else {
+    return mDefaultSize[dim];
+  }
+}
+uint32_t
+AMDILSubtarget::calVersion() const
+{
+  return mVersion;
+}
+
+AMDILKernelManager*
+AMDILSubtarget::getKernelManager() const
+{
+  return mKM;
+}
+void
+AMDILSubtarget::setKernelManager(AMDILKernelManager *km) const
+{
+  mKM = km;
+}
+std::string
+AMDILSubtarget::getDataLayout() const
+{
+  if (!mDevice) {
+    if (is64bit()) {
+      return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+                         "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+                         "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+                         "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+                         "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+    } else {
+      return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+                         "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+                         "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+                         "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+                         "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+    }
+  }
+  return mDevice->getDataLayout();
+}
+
+std::string
+AMDILSubtarget::getDeviceName() const
+{
+  return mDevName;
+}
+const AMDILDevice *
+AMDILSubtarget::device() const
+{
+  return mDevice;
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,77 @@
+//===-- AMDILSubtarget.h --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AMDIL specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILSUBTARGET_H_
+#define _AMDILSUBTARGET_H_
+
+#include "AMDIL.h"
+#include "AMDILDevice.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDILGenSubtarget.inc"
+
+#include <string>
+#include <cstdlib>
+#define MAX_CB_SIZE (1 << 16)
+namespace llvm
+{
+class Module;
+class AMDILKernelManager;
+class AMDILDevice;
+class AMDILSubtarget : public AMDILGenSubtargetInfo
+{
+private:
+  bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities];
+  mutable AMDILKernelManager *mKM;
+  const AMDILDevice *mDevice;
+  size_t mDefaultSize[3];
+  size_t mMinimumSize[3];
+  std::string mDevName;
+  uint32_t mVersion;
+  bool mIs64bit;
+  bool mIs32on64bit;
+  bool mMetadata30;
+  bool mFlatAddress;
+public:
+  AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
+  virtual ~AMDILSubtarget();
+  bool isOverride(AMDILDeviceInfo::Caps) const;
+  bool isApple() const;
+  bool is64bit() const;
+  bool supportMetadata30() const;
+  bool overridesFlatAS() const;
+
+  // Helper functions to simplify if statements
+  bool isTargetELF() const;
+  AMDILKernelManager* getKernelManager() const;
+  void setKernelManager(AMDILKernelManager *gm) const;
+  const AMDILDevice* device() const;
+  std::string getDataLayout() const;
+  std::string getDeviceName() const;
+  virtual size_t getDefaultSize(uint32_t dim) const;
+  // Return the version of CAL that the backend should target.
+  uint32_t calVersion() const;
+  // ParseSubtargetFeatures - Parses features string setting specified
+  // subtarget options.  Definition of function is
+  //auto generated by tblgen.
+  void
+  ParseSubtargetFeatures(
+    llvm::StringRef CPU,
+    llvm::StringRef FS);
+
+};
+
+} // end namespace llvm
+
+#endif // AMDILSUBTARGET_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,932 @@
+//===-- AMDILSwizzleEncoder.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation of the AMDIL Swizzle Encoder. The swizzle encoder goes
+// through all instructions in a machine function and all operands and encodes swizzle
+// information in the operands. The AsmParser can then use the swizzle information to
+// print out the swizzles correctly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "SwizzleEncoder"
+#if !defined(NDEBUG)
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME (false)
+#endif
+#include "AMDILSwizzleEncoder.h"
+#include "AMDILAlgorithms.tpp"
+#include "AMDILUtilityFunctions.h"
+#include "AMDILRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+/// Encode all of the swizzles for the instructions in the machine operand.
+static void encodeSwizzles(MachineFunction &MF, bool mDebug);
+/// Get the swizzle id for the src swizzle that corresponds to the
+/// current operand.
+static OpSwizzle getSrcSwizzleID(MachineInstr *MI, unsigned opNum);
+
+/// Get the swizzle id for the dst swizzle that corresponds to the
+/// current instruction.
+static OpSwizzle getDstSwizzleID(MachineInstr *MI);
+
+/// Determine if the custom source swizzle or the
+/// default swizzle for the specified operand should be used.
+static bool isCustomSrcInst(MachineInstr *MI, unsigned opNum);
+
+/// Get the custom source swizzle that corresponds to the specified
+/// operand for the instruction.
+static OpSwizzle getCustomSrcSwizzle(MachineInstr *MI, unsigned opNum);
+
+/// Determine if the custom destination swizzle or the
+/// default swizzle should be used for the instruction.
+static bool isCustomDstInst(MachineInstr *MI);
+
+/// Get the custom destination swizzle that corresponds tothe
+/// instruction.
+static OpSwizzle getCustomDstSwizzle(MachineInstr *MI);
+
+/// Determine if the instruction is a custom vector instruction
+/// that needs a unique swizzle type.
+static bool isCustomVectorInst(MachineInstr *MI);
+
+/// Encode the new swizzle for the vector instruction.
+static void encodeVectorInst(MachineInstr *MI, bool mDebug);
+/// Helper function to dump the operand for the machine instruction
+/// and the relevant target flags.
+static void dumpOperand(MachineInstr *MI, unsigned opNum);
+namespace llvm
+{
+FunctionPass*
+createAMDILSwizzleEncoder(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+{
+  return new AMDILSwizzleEncoder(TM, OptLevel);
+}
+}
+
+AMDILSwizzleEncoder::AMDILSwizzleEncoder(TargetMachine &tm,
+    CodeGenOpt::Level OptLevel) :
+  MachineFunctionPass(ID)
+{
+  mDebug = DEBUGME;
+  opt = OptLevel;
+}
+
+const char* AMDILSwizzleEncoder::getPassName() const
+{
+  return "AMD IL Swizzle Encoder Pass";
+}
+
+bool AMDILSwizzleEncoder::runOnMachineFunction(MachineFunction &MF)
+{
+  // Encode swizzles in instruction operands.
+  encodeSwizzles(MF, mDebug);
+  return true;
+}
+
+/// Dump the operand swizzle information to the dbgs() stream.
+void dumpOperand(MachineInstr *MI, unsigned opNum)
+{
+  OpSwizzle swizID;
+  swizID.u8all = MI->getOperand(opNum).getTargetFlags();
+  dbgs() << "\t" << (swizID.bits.dst ? "Dst" : "Src")
+         << " Operand: " << opNum << " SwizID: "
+         << (unsigned)swizID.bits.swizzle
+         << " Swizzle: " << (swizID.bits.dst
+                             ? getDstSwizzle(swizID.bits.swizzle)
+                             : getSrcSwizzle(swizID.bits.swizzle)) << "\n";
+
+}
+
+// This function checks for instructions that don't have
+// normal swizzle patterns to their source operands. These have to be
+// handled on a case by case basis.
+bool isCustomSrcInst(MachineInstr *MI, unsigned opNum)
+{
+  unsigned opcode = MI->getOpcode();
+  switch (opcode) {
+  default:
+    break;
+  case AMDIL::LDSLOAD:
+  case AMDIL::LDSLOAD_i8:
+  case AMDIL::LDSLOAD_u8:
+  case AMDIL::LDSLOAD_i16:
+  case AMDIL::LDSLOAD_u16:
+  case AMDIL::LDSSTORE:
+  case AMDIL::LDSSTORE_i8:
+  case AMDIL::LDSSTORE_i16:
+  case AMDIL::GDSLOAD:
+  case AMDIL::GDSSTORE:
+  case AMDIL::SCRATCHLOAD:
+  case AMDIL::CBLOAD:
+  case AMDIL::UAVARENALOAD_i8:
+  case AMDIL::UAVARENALOAD_i16:
+  case AMDIL::UAVARENALOAD_i32:
+  case AMDIL::UAVARENASTORE_i8:
+  case AMDIL::UAVARENASTORE_i16:
+  case AMDIL::UAVARENASTORE_i32:
+  case AMDIL::LDSLOAD64:
+  case AMDIL::LDSLOAD64_i8:
+  case AMDIL::LDSLOAD64_u8:
+  case AMDIL::LDSLOAD64_i16:
+  case AMDIL::LDSLOAD64_u16:
+  case AMDIL::LDSSTORE64:
+  case AMDIL::LDSSTORE64_i8:
+  case AMDIL::LDSSTORE64_i16:
+  case AMDIL::GDSLOAD64:
+  case AMDIL::GDSSTORE64:
+  case AMDIL::SCRATCHLOAD64:
+  case AMDIL::CBLOAD64:
+    return true;
+  case AMDIL::CMOVLOG_f64:
+  case AMDIL::CMOVLOG_i64:
+    return (opNum == 1) ? true : false;
+  case AMDIL::SEMAPHORE_INIT:
+  case AMDIL::SEMAPHORE_WAIT:
+  case AMDIL::SEMAPHORE_SIGNAL:
+  case AMDIL::APPEND_CONSUME:
+  case AMDIL::APPEND_ALLOC:
+  case AMDIL::APPEND64_CONSUME:
+  case AMDIL::APPEND64_ALLOC:
+  case AMDIL::LLO:
+  case AMDIL::LLO_v2i64:
+  case AMDIL::LHI:
+  case AMDIL::LHI_v2i64:
+  case AMDIL::LCREATE:
+  case AMDIL::LCREATE_v2i64:
+  case AMDIL::CALL:
+  case AMDIL::RETURN:
+  case AMDIL::RETDYN:
+  case AMDIL::DHI:
+  case AMDIL::DLO:
+  case AMDIL::DCREATE:
+  case AMDIL::DHI_v2f64:
+  case AMDIL::DLO_v2f64:
+  case AMDIL::DCREATE_v2f64:
+  case AMDIL::HILO_BITOR_v2i32:
+  case AMDIL::HILO_BITOR_v4i16:
+  case AMDIL::HILO_BITOR_v2i64:
+  case AMDIL::CONTINUE_LOGICALNZ_f64:
+  case AMDIL::BREAK_LOGICALNZ_f64:
+  case AMDIL::IF_LOGICALNZ_f64:
+  case AMDIL::CONTINUE_LOGICALZ_f64:
+  case AMDIL::BREAK_LOGICALZ_f64:
+  case AMDIL::IF_LOGICALZ_f64:
+  case AMDIL::CONTINUE_LOGICALNZ_i64:
+  case AMDIL::BREAK_LOGICALNZ_i64:
+  case AMDIL::IF_LOGICALNZ_i64:
+  case AMDIL::CONTINUE_LOGICALZ_i64:
+  case AMDIL::BREAK_LOGICALZ_i64:
+  case AMDIL::IF_LOGICALZ_i64:
+    return true;
+  case AMDIL::UBIT_INSERT_i32:
+  case AMDIL::UBIT_INSERT_v2i32:
+  case AMDIL::UBIT_INSERT_v4i32:
+    return (opNum == 1 || opNum == 2);
+  };
+  return false;
+}
+
+// This function returns the OpSwizzle with the custom swizzle set
+// correclty for source operands.
+OpSwizzle getCustomSrcSwizzle(MachineInstr *MI, unsigned opNum)
+{
+  OpSwizzle opSwiz;
+  opSwiz.u8all = 0;
+  unsigned opcode = MI->getOpcode();
+  unsigned reg = (MI->getOperand(opNum).isReg()
+                  ? MI->getOperand(opNum).getReg() : 0);
+  switch (opcode) {
+  default:
+    break;
+  case AMDIL::SCRATCHLOAD:
+  case AMDIL::CBLOAD:
+  case AMDIL::LDSLOAD:
+  case AMDIL::LDSLOAD_i8:
+  case AMDIL::LDSLOAD_u8:
+  case AMDIL::LDSLOAD_i16:
+  case AMDIL::LDSLOAD_u16:
+  case AMDIL::GDSLOAD:
+  case AMDIL::GDSSTORE:
+  case AMDIL::LDSSTORE:
+  case AMDIL::LDSSTORE_i8:
+  case AMDIL::LDSSTORE_i16:
+  case AMDIL::SCRATCHLOAD64:
+  case AMDIL::CBLOAD64:
+  case AMDIL::LDSLOAD64:
+  case AMDIL::LDSLOAD64_i8:
+  case AMDIL::LDSLOAD64_u8:
+  case AMDIL::LDSLOAD64_i16:
+  case AMDIL::LDSLOAD64_u16:
+  case AMDIL::GDSLOAD64:
+  case AMDIL::GDSSTORE64:
+  case AMDIL::LDSSTORE64:
+  case AMDIL::LDSSTORE64_i8:
+  case AMDIL::LDSSTORE64_i16:
+    opSwiz.bits.swizzle = (opNum == 1)
+                          ? AMDIL_SRC_XXXX: AMDIL_SRC_DFLT;
+    break;
+  case AMDIL::UAVARENALOAD_i8:
+  case AMDIL::UAVARENALOAD_i16:
+  case AMDIL::UAVARENALOAD_i32:
+  case AMDIL::UAVARENASTORE_i8:
+  case AMDIL::UAVARENASTORE_i16:
+  case AMDIL::UAVARENASTORE_i32:
+    if (isXComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_XXXX;
+    } else if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_YYYY;
+    } else if (isZComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_ZZZZ;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_WWWW;
+    }
+    if (opNum != 1) {
+      opSwiz.bits.swizzle = AMDIL_SRC_DFLT;
+    }
+    break;
+  case AMDIL::SEMAPHORE_INIT:
+  case AMDIL::SEMAPHORE_WAIT:
+  case AMDIL::SEMAPHORE_SIGNAL:
+  case AMDIL::APPEND_CONSUME:
+  case AMDIL::APPEND_ALLOC:
+  case AMDIL::APPEND64_CONSUME:
+  case AMDIL::APPEND64_ALLOC:
+  case AMDIL::CALL:
+  case AMDIL::RETURN:
+  case AMDIL::RETDYN:
+    opSwiz.bits.swizzle = AMDIL_SRC_DFLT;
+    break;
+  case AMDIL::CMOVLOG_f64:
+  case AMDIL::CMOVLOG_i64:
+    assert(opNum == 1 && "Only operand number 1 is custom!");
+    if (isZWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_ZZZZ;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_SRC_XXXX;
+    }
+    break;
+  case AMDIL::DHI:
+  case AMDIL::LLO:
+    if (isZWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_Z000;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_SRC_X000;
+    }
+    reg = MI->getOperand(0).getReg();
+    if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle += 1;
+    } else if (isZComponentReg(reg)
+               || isZWComponentReg(reg)) {
+      opSwiz.bits.swizzle += 2;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle += 3;
+    }
+    break;
+  case AMDIL::DHI_v2f64:
+  case AMDIL::LLO_v2i64:
+    opSwiz.bits.swizzle = AMDIL_SRC_XZXZ;
+    break;
+  case AMDIL::DLO:
+  case AMDIL::LHI:
+    if (isZWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_W000;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_SRC_Y000;
+    }
+    reg = MI->getOperand(0).getReg();
+    if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle += 1;
+    } else if (isZComponentReg(reg)) {
+      opSwiz.bits.swizzle += 2;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle += 3;
+    } else if (isZWComponentReg(reg)) {
+      opSwiz.bits.swizzle += 2;
+    }
+    break;
+  case AMDIL::DLO_v2f64:
+  case AMDIL::LHI_v2i64:
+    opSwiz.bits.swizzle = AMDIL_SRC_YWYW;
+    break;
+  case AMDIL::DCREATE: {
+    unsigned swiz = AMDIL_SRC_X000;
+    if (isWComponentReg(reg)) {
+      swiz = AMDIL_SRC_W000;
+    } else if (isYComponentReg(reg)) {
+      swiz = AMDIL_SRC_Y000;
+    } else if (isZComponentReg(reg)) {
+      swiz = AMDIL_SRC_Z000;
+    }
+    reg = MI->getOperand(0).getReg();
+    if (isZWComponentReg(reg)) {
+      swiz += 2;
+    }
+    opSwiz.bits.swizzle = swiz + (opNum == 1);
+  }
+  break;
+  case AMDIL::DCREATE_v2f64:
+    opSwiz.bits.swizzle = (opNum == 1)
+                          ? AMDIL_SRC_0X0Y : AMDIL_SRC_X0Y0;
+    break;
+  case AMDIL::LCREATE: {
+    unsigned swiz1 = (opNum == 1) ? AMDIL_SRC_X000 : AMDIL_SRC_0X00;
+    if (MI->getOperand(opNum).isReg()) {
+      reg = MI->getOperand(opNum).getReg();
+      if (isWComponentReg(reg)) {
+        swiz1 += 12;
+      } else if (isYComponentReg(reg)) {
+        swiz1 += 4;
+      } else if (isZComponentReg(reg)) {
+        swiz1 += 8;
+      }
+    }
+    reg = MI->getOperand(0).getReg();
+    if (isZWComponentReg(reg)) {
+      swiz1 += 2;
+    }
+    opSwiz.bits.swizzle = swiz1;
+  }
+  break;
+  case AMDIL::LCREATE_v2i64:
+    if (isXYComponentReg(reg)) {
+      opSwiz.bits.swizzle = opNum + AMDIL_SRC_YWYW;
+    } else {
+      opSwiz.bits.swizzle = opNum + AMDIL_SRC_YZW0;
+    }
+    break;
+  case AMDIL::CONTINUE_LOGICALNZ_f64:
+  case AMDIL::BREAK_LOGICALNZ_f64:
+  case AMDIL::IF_LOGICALNZ_f64:
+  case AMDIL::CONTINUE_LOGICALZ_f64:
+  case AMDIL::BREAK_LOGICALZ_f64:
+  case AMDIL::IF_LOGICALZ_f64:
+  case AMDIL::CONTINUE_LOGICALNZ_i64:
+  case AMDIL::BREAK_LOGICALNZ_i64:
+  case AMDIL::IF_LOGICALNZ_i64:
+  case AMDIL::CONTINUE_LOGICALZ_i64:
+  case AMDIL::BREAK_LOGICALZ_i64:
+  case AMDIL::IF_LOGICALZ_i64:
+    assert(opNum == 0
+           && "Only operand numbers 0 is custom!");
+  case AMDIL::SWITCH:
+    if (isXYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_XXXX;
+    } else if (isZWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_ZZZZ;
+    } else {
+      assert(!"Found a case we don't handle!");
+    }
+    break;
+  case AMDIL::UBIT_INSERT_i32:
+    assert((opNum == 1 || opNum == 2)
+           && "Only operand numbers 1 or 2 is custom!");
+    if (isXComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_XXXX;
+    } else if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_YYYY;
+    } else if (isZComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_ZZZZ;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_WWWW;
+    }
+    break;
+  case AMDIL::UBIT_INSERT_v2i32:
+    assert((opNum == 1 || opNum == 2)
+           && "Only operand numbers 1 or 2 is custom!");
+    if (isXYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_XYXY;
+    } else if (isZWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_ZWZW;
+    }
+    break;
+  case AMDIL::UBIT_INSERT_v4i32:
+    assert((opNum == 1 || opNum == 2)
+           && "Only operand numbers 1 or 2 is custom!");
+    opSwiz.bits.swizzle = AMDIL_SRC_DFLT;
+    break;
+  case AMDIL::HILO_BITOR_v4i16:
+    opSwiz.bits.swizzle = AMDIL_SRC_XZXZ + (opNum - 1);
+    break;
+  case AMDIL::HILO_BITOR_v2i32:
+    if (isXComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_X000;
+    } else if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_Y000;
+    } else if (isZComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_Z000;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_W000;
+    }
+    reg = MI->getOperand(0).getReg();
+    if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle += 1;
+    } else if (isZComponentReg(reg)) {
+      opSwiz.bits.swizzle += 2;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle += 3;
+    }
+    break;
+  case AMDIL::HILO_BITOR_v2i64: {
+    unsigned offset = 0;
+
+    if (isXYComponentReg(reg)) {
+      offset = AMDIL_SRC_XY00;
+    } else if (isZWComponentReg(reg)) {
+      offset = AMDIL_SRC_ZW00;
+    }
+    reg = MI->getOperand(0).getReg();
+    if (isZWComponentReg(reg)) {
+      offset += 1;
+    }
+    opSwiz.bits.swizzle = offset;
+  }
+  break;
+  };
+  return opSwiz;
+}
+
+// This function checks for instructions that don't have
+// normal swizzle patterns to their destination operand.
+// These have to be handled on a case by case basis.
+bool isCustomDstInst(MachineInstr *MI)
+{
+  unsigned opcode = MI->getOpcode();
+  switch (opcode) {
+  default:
+    break;
+  case AMDIL::UAVARENASTORE_i8:
+  case AMDIL::UAVARENASTORE_i16:
+  case AMDIL::UAVARENASTORE_i32:
+  case AMDIL::UAVARENALOAD_i8:
+  case AMDIL::UAVARENALOAD_i16:
+  case AMDIL::UAVARENALOAD_i32:
+  case AMDIL::LDSLOAD:
+  case AMDIL::LDSLOAD_i8:
+  case AMDIL::LDSLOAD_u8:
+  case AMDIL::LDSLOAD_i16:
+  case AMDIL::LDSLOAD_u16:
+  case AMDIL::LDSSTORE:
+  case AMDIL::LDSSTORE_i8:
+  case AMDIL::LDSSTORE_i16:
+  case AMDIL::GDSLOAD:
+  case AMDIL::GDSSTORE:
+  case AMDIL::SEMAPHORE_INIT:
+  case AMDIL::SEMAPHORE_WAIT:
+  case AMDIL::SEMAPHORE_SIGNAL:
+  case AMDIL::APPEND_CONSUME:
+  case AMDIL::APPEND_ALLOC:
+  case AMDIL::HILO_BITOR_v4i16:
+  case AMDIL::HILO_BITOR_v2i64:
+    // 64 bit IO Instructions
+  case AMDIL::LDSLOAD64:
+  case AMDIL::LDSLOAD64_i8:
+  case AMDIL::LDSLOAD64_u8:
+  case AMDIL::LDSLOAD64_i16:
+  case AMDIL::LDSLOAD64_u16:
+  case AMDIL::LDSSTORE64:
+  case AMDIL::LDSSTORE64_i8:
+  case AMDIL::LDSSTORE64_i16:
+  case AMDIL::GDSLOAD64:
+  case AMDIL::GDSSTORE64:
+  case AMDIL::APPEND64_CONSUME:
+  case AMDIL::APPEND64_ALLOC:
+    return true;
+  }
+
+  return false;
+}
+// This function returns the OpSwizzle with the custom swizzle set
+// correclty for destination operands.
+OpSwizzle getCustomDstSwizzle(MachineInstr *MI)
+{
+  OpSwizzle opSwiz;
+  opSwiz.u8all = 0;
+  unsigned opcode = MI->getOpcode();
+  opSwiz.bits.dst = 1;
+  unsigned reg = MI->getOperand(0).isReg() ?
+                 MI->getOperand(0).getReg() : 0;
+  switch (opcode) {
+  case AMDIL::LDSLOAD:
+  case AMDIL::LDSLOAD_i8:
+  case AMDIL::LDSLOAD_u8:
+  case AMDIL::LDSLOAD_i16:
+  case AMDIL::LDSLOAD_u16:
+  case AMDIL::LDSSTORE:
+  case AMDIL::LDSSTORE_i8:
+  case AMDIL::LDSSTORE_i16:
+  case AMDIL::UAVARENALOAD_i8:
+  case AMDIL::UAVARENALOAD_i16:
+  case AMDIL::UAVARENALOAD_i32:
+  case AMDIL::UAVARENASTORE_i8:
+  case AMDIL::UAVARENASTORE_i16:
+  case AMDIL::UAVARENASTORE_i32:
+  case AMDIL::GDSLOAD:
+  case AMDIL::GDSSTORE:
+  case AMDIL::SEMAPHORE_INIT:
+  case AMDIL::SEMAPHORE_WAIT:
+  case AMDIL::SEMAPHORE_SIGNAL:
+  case AMDIL::APPEND_CONSUME:
+  case AMDIL::APPEND_ALLOC:
+    // 64 bit IO instructions
+  case AMDIL::LDSLOAD64:
+  case AMDIL::LDSLOAD64_i8:
+  case AMDIL::LDSLOAD64_u8:
+  case AMDIL::LDSLOAD64_i16:
+  case AMDIL::LDSLOAD64_u16:
+  case AMDIL::LDSSTORE64:
+  case AMDIL::LDSSTORE64_i8:
+  case AMDIL::LDSSTORE64_i16:
+  case AMDIL::GDSLOAD64:
+  case AMDIL::GDSSTORE64:
+  case AMDIL::APPEND64_CONSUME:
+  case AMDIL::APPEND64_ALLOC:
+    opSwiz.bits.dst = 0;
+    if (isXComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_XXXX;
+    } else if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_YYYY;
+    } else if (isZComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_ZZZZ;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_SRC_WWWW;
+    }
+    break;
+  case AMDIL::HILO_BITOR_v4i16:
+  case AMDIL::HILO_BITOR_v2i64:
+    if (isXYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_DST_XY__;
+    } else {
+      opSwiz.bits.swizzle = AMDIL_DST___ZW;
+    }
+    break;
+  default:
+    assert(0
+           && "getCustomDstSwizzle hit an opcode it doesnt' understand!");
+    if (isXComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_DST_X___;
+    } else if (isYComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_DST__Y__;
+    } else if (isZComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_DST___Z_;
+    } else if (isWComponentReg(reg)) {
+      opSwiz.bits.swizzle = AMDIL_DST____W;
+    }
+  };
+  return opSwiz;
+}
+
+OpSwizzle getSrcSwizzleID(MachineInstr *MI, unsigned opNum)
+{
+  assert(opNum < MI->getNumOperands() &&
+         "Must pass in a valid operand number.");
+  OpSwizzle curSwiz;
+  curSwiz.u8all = 0;
+  curSwiz.bits.dst = 0; // We need to reset the dst bit.
+  unsigned reg = 0;
+  if (MI->getOperand(opNum).isReg()) {
+    reg = MI->getOperand(opNum).getReg();
+  }
+  if (isCustomSrcInst(MI, opNum)) {
+    curSwiz = getCustomSrcSwizzle(MI, opNum);
+  } else if (isXComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_SRC_XXXX;
+  } else if (isYComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_SRC_YYYY;
+  } else if (isZComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_SRC_ZZZZ;
+  } else if (isWComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_SRC_WWWW;
+  } else if (isXYComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_SRC_XYXY;
+  } else if (isZWComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_SRC_ZWZW;
+  } else if (reg == AMDIL::R1011 && isMove(MI->getOpcode())) {
+    reg = MI->getOperand(0).getReg();
+    if (isXComponentReg(reg) || isYComponentReg(reg)
+        || isZComponentReg(reg) || isWComponentReg(reg)) {
+      curSwiz.bits.swizzle = AMDIL_SRC_XXXX;
+    } else if (isXYComponentReg(reg) || isZWComponentReg(reg)) {
+      curSwiz.bits.swizzle = AMDIL_SRC_XYXY;
+    }
+  } else {
+    curSwiz.bits.swizzle = AMDIL_SRC_DFLT;
+  }
+  return curSwiz;
+}
+
+OpSwizzle getDstSwizzleID(MachineInstr *MI)
+{
+  OpSwizzle curSwiz;
+  curSwiz.bits.dst = 1;
+  curSwiz.bits.swizzle = AMDIL_DST_DFLT;
+  unsigned reg = 0;
+  if (MI->getOperand(0).isReg()) {
+    reg = MI->getOperand(0).getReg();
+  }
+  if (isCustomDstInst(MI)) {
+    curSwiz = getCustomDstSwizzle(MI);
+  } else if (isXComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_DST_X___;
+  } else if (isYComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_DST__Y__;
+  } else if (isZComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_DST___Z_;
+  } else if (isWComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_DST____W;
+  } else if (isXYComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_DST_XY__;
+  } else if (isZWComponentReg(reg)) {
+    curSwiz.bits.swizzle = AMDIL_DST___ZW;
+  } else {
+    curSwiz.bits.swizzle = AMDIL_DST_DFLT;
+  }
+
+  return curSwiz;
+}
+
+/// All vector instructions except for VCREATE_* need to be handled
+/// with custom swizzle packing code.
+bool isCustomVectorInst(MachineInstr *MI)
+{
+  unsigned opcode = MI->getOpcode();
+  return (opcode >= AMDIL::VCONCAT_v2f32 && opcode <= AMDIL::VCONCAT_v4i8)
+         || (opcode >= AMDIL::VEXTRACT_v2f32 && opcode <= AMDIL::VINSERT_v4i8);
+}
+
+void encodeVectorInst(MachineInstr *MI, bool mDebug)
+{
+  assert(isCustomVectorInst(MI) && "Only a vector instruction can be"
+         " used to generate a new vector instruction!");
+  unsigned opcode = MI->getOpcode();
+  // For all of the opcodes, the destination swizzle is the same.
+  OpSwizzle swizID = getDstSwizzleID(MI);
+  OpSwizzle srcID;
+  srcID.u8all = 0;
+  MI->getOperand(0).setTargetFlags(swizID.u8all);
+  unsigned offset = 0;
+  unsigned reg = MI->getOperand(0).getReg();
+  switch (opcode) {
+  case AMDIL::VCONCAT_v2f32:
+  case AMDIL::VCONCAT_v2i16:
+  case AMDIL::VCONCAT_v2i32:
+  case AMDIL::VCONCAT_v2i8:
+    if (isZWComponentReg(reg)) {
+      offset = 2;
+    }
+    for (unsigned x = 1; x < 3; ++x) {
+      reg = MI->getOperand(x).getReg();
+      unsigned offset2 = 0;
+      if (isXComponentReg(reg)) {
+        offset2 = 0;
+      } else if (isYComponentReg(reg)) {
+        offset2 = 4;
+      } else if (isZComponentReg(reg)) {
+        offset2 = 8;
+      } else if (isWComponentReg(reg)) {
+        offset2 = 12;
+      }
+      srcID.bits.swizzle = AMDIL_SRC_X000 + offset + (x - 1) + offset2;
+      MI->getOperand(x).setTargetFlags(srcID.u8all);
+    }
+    break;
+  case AMDIL::VCONCAT_v2f64:
+  case AMDIL::VCONCAT_v2i64:
+  case AMDIL::VCONCAT_v4f32:
+  case AMDIL::VCONCAT_v4i16:
+  case AMDIL::VCONCAT_v4i32:
+  case AMDIL::VCONCAT_v4i8:
+    for (unsigned x = 1; x < 3; ++x) {
+      reg = MI->getOperand(x).getReg();
+      if (isZWComponentReg(reg)) {
+        srcID.bits.swizzle = AMDIL_SRC_ZW00 + (x - 1);
+      } else {
+        srcID.bits.swizzle = AMDIL_SRC_XY00 + (x - 1);
+      }
+      MI->getOperand(x).setTargetFlags(srcID.u8all);
+    }
+    break;
+  case AMDIL::VEXTRACT_v2f32:
+  case AMDIL::VEXTRACT_v2i16:
+  case AMDIL::VEXTRACT_v2i32:
+  case AMDIL::VEXTRACT_v2i8:
+    assert(MI->getOperand(2).getImm() <= 2
+           && "Invalid immediate value encountered for this formula!");
+    if (isXComponentReg(reg)) {
+      offset = 0;
+    } else if (isYComponentReg(reg)) {
+      offset = 1;
+    } else if (isZComponentReg(reg)) {
+      offset = 2;
+    } else if (isWComponentReg(reg)) {
+      offset = 3;
+    }
+    assert(MI->getOperand(2).getImm() <= 4
+           && "Invalid immediate value encountered for this formula!");
+    reg = MI->getOperand(1).getReg();
+    if (isZWComponentReg(reg)) {
+      srcID.bits.swizzle = AMDIL_SRC_Z000;
+    } else {
+      srcID.bits.swizzle = AMDIL_SRC_X000;
+    }
+    srcID.bits.swizzle += offset + (MI->getOperand(2).getImm()-1) * 4;
+    MI->getOperand(1).setTargetFlags(srcID.u8all);
+    MI->getOperand(2).setTargetFlags(0);
+    break;
+  case AMDIL::VEXTRACT_v4f32:
+  case AMDIL::VEXTRACT_v4i16:
+  case AMDIL::VEXTRACT_v4i32:
+  case AMDIL::VEXTRACT_v4i8:
+    if (isXComponentReg(reg)) {
+      offset = 0;
+    } else if (isYComponentReg(reg)) {
+      offset = 1;
+    } else if (isZComponentReg(reg)) {
+      offset = 2;
+    } else if (isWComponentReg(reg)) {
+      offset = 3;
+    } else if (isXYComponentReg(reg)) {
+      offset = 0;
+    } else if (isZWComponentReg(reg)) {
+      offset = 2;
+    }
+    assert(MI->getOperand(2).getImm() <= 4
+           && "Invalid immediate value encountered for this formula!");
+    srcID.bits.swizzle = ((MI->getOperand(2).getImm() - 1) * 4) + 1 + offset;
+    MI->getOperand(1).setTargetFlags(srcID.u8all);
+    MI->getOperand(2).setTargetFlags(0);
+    break;
+  case AMDIL::VEXTRACT_v2i64:
+  case AMDIL::VEXTRACT_v2f64:
+    assert(MI->getOperand(2).getImm() <= 2
+           && "Invalid immediate value encountered for this formula!");
+    if (isZWComponentReg(reg)) {
+      offset = 1;
+    }
+    srcID.bits.swizzle = AMDIL_SRC_XY00
+                         + ((MI->getOperand(2).getImm() - 1) * 2) + offset;
+    MI->getOperand(1).setTargetFlags(srcID.u8all);
+    MI->getOperand(2).setTargetFlags(0);
+    break;
+  case AMDIL::VINSERT_v2f32:
+  case AMDIL::VINSERT_v2i32:
+  case AMDIL::VINSERT_v2i16:
+  case AMDIL::VINSERT_v2i8: {
+    unsigned swizVal = (unsigned)MI->getOperand(4).getImm();
+    OpSwizzle src2ID;
+    src2ID.u8all = 0;
+    if (reg >= AMDIL::Rzw1 && reg < AMDIL::SDP) {
+      offset = 2;
+    }
+
+    unsigned offset1 = 0;
+    reg = MI->getOperand(1).getReg();
+    if (isZWComponentReg(reg)) {
+      offset1 = 8;
+    }
+
+    unsigned offset2 = 0;
+    reg = MI->getOperand(2).getReg();
+    if (isYComponentReg(reg)) {
+      offset2 = 4;
+    } else if (isZComponentReg(reg)) {
+      offset2 = 8;
+    } else if (isWComponentReg(reg)) {
+      offset2 = 12;
+    }
+    if (((swizVal >> 8) & 0xFF) == 1) {
+      srcID.bits.swizzle = AMDIL_SRC_X000 + offset1 + offset;
+      src2ID.bits.swizzle = AMDIL_SRC_0X00 + offset2 + offset;
+    } else {
+      srcID.bits.swizzle = AMDIL_SRC_0Y00 + offset1 + offset;
+      src2ID.bits.swizzle = AMDIL_SRC_X000 + offset2 + offset;
+    }
+    MI->getOperand(1).setTargetFlags(srcID.u8all);
+    MI->getOperand(2).setTargetFlags(src2ID.u8all);
+    MI->getOperand(3).setTargetFlags(0);
+    MI->getOperand(4).setTargetFlags(0);
+  }
+  break;
+  case AMDIL::VINSERT_v4f32:
+  case AMDIL::VINSERT_v4i16:
+  case AMDIL::VINSERT_v4i32:
+  case AMDIL::VINSERT_v4i8: {
+    unsigned swizVal = (unsigned)MI->getOperand(4).getImm();
+    OpSwizzle src2ID;
+    src2ID.u8all = 0;
+    if (reg >= AMDIL::Rzw1 && reg < AMDIL::SDP) {
+      offset = 2;
+    }
+    unsigned offset2 = 0;
+    reg = MI->getOperand(2).getReg();
+    if (isYComponentReg(reg)) {
+      offset2 = 4;
+    } else if (isZComponentReg(reg)) {
+      offset2 = 8;
+    } else if (isWComponentReg(reg)) {
+      offset2 = 12;
+    } else if (isZWComponentReg(reg)) {
+      offset2 = 2;
+    }
+    if ((swizVal >> 8 & 0xFF) == 1) {
+      srcID.bits.swizzle = (!offset) ? AMDIL_SRC_X0ZW : AMDIL_SRC_XYZ0;
+      src2ID.bits.swizzle = AMDIL_SRC_0X00 + offset2 + offset;
+    } else if ((swizVal >> 16 & 0xFF) == 1) {
+      srcID.bits.swizzle = AMDIL_SRC_XY0W;
+      src2ID.bits.swizzle = AMDIL_SRC_00X0 + offset2;
+    } else if ((swizVal >> 24 & 0xFF) == 1) {
+      srcID.bits.swizzle = AMDIL_SRC_XYZ0;
+      src2ID.bits.swizzle = AMDIL_SRC_000X + offset2;
+    } else {
+      srcID.bits.swizzle = (!offset) ? AMDIL_SRC_0YZW : AMDIL_SRC_XY0W;
+      src2ID.bits.swizzle = AMDIL_SRC_X000 + offset2 + offset;
+    }
+    MI->getOperand(1).setTargetFlags(srcID.u8all);
+    MI->getOperand(2).setTargetFlags(src2ID.u8all);
+    MI->getOperand(3).setTargetFlags(0);
+    MI->getOperand(4).setTargetFlags(0);
+  }
+  break;
+  case AMDIL::VINSERT_v2f64:
+  case AMDIL::VINSERT_v2i64: {
+    unsigned swizVal = (unsigned)MI->getOperand(4).getImm();
+    OpSwizzle src2ID;
+    src2ID.u8all = 0;
+    reg = MI->getOperand(2).getReg();
+    if (isZWComponentReg(reg)) {
+      offset = 2;
+    }
+    if (((swizVal >> 8) & 0xFF) == 1) {
+      srcID.bits.swizzle = AMDIL_SRC_XY00;
+      src2ID.bits.swizzle = AMDIL_SRC_00XY + offset;
+    } else {
+      srcID.bits.swizzle = AMDIL_SRC_00ZW;
+      src2ID.bits.swizzle = AMDIL_SRC_XY00 + offset;
+    }
+    MI->getOperand(1).setTargetFlags(srcID.u8all);
+    MI->getOperand(2).setTargetFlags(src2ID.u8all);
+    MI->getOperand(3).setTargetFlags(0);
+    MI->getOperand(4).setTargetFlags(0);
+  }
+  break;
+  };
+  if (mDebug) {
+    for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+      dumpOperand(MI, i);
+    }
+    dbgs() << "\n";
+  }
+}
+
+// This function loops through all of the instructions, skipping function
+// calls, and encodes the swizzles in the operand.
+void encodeSwizzles(MachineFunction &MF, bool mDebug)
+{
+  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+       MFI != MFE; ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    for (MachineBasicBlock::iterator MBI = MBB->begin(), MBE = MBB->end();
+         MBI != MBE; ++MBI) {
+      MachineInstr *MI = MBI;
+      if (MI->getOpcode() == AMDIL::RETDYN
+          || MI->getOpcode() == AMDIL::RETURN
+          || MI->getOpcode() == AMDIL::DBG_VALUE) {
+        continue;
+      }
+      if (mDebug) {
+        dbgs() << "Encoding instruction: ";
+        MI->print(dbgs());
+      }
+      if (isCustomVectorInst(MI)) {
+        encodeVectorInst(MI, mDebug);
+        continue;
+      }
+      for (unsigned a = 0, z = MI->getNumOperands(); a < z; ++a) {
+        OpSwizzle swizID;
+        if (MI->getOperand(a).isReg() && MI->getOperand(a).isDef()) {
+          swizID = getDstSwizzleID(MI);
+        } else {
+          swizID = getSrcSwizzleID(MI, a);
+        }
+        MI->getOperand(a).setTargetFlags(swizID.u8all);
+        if (mDebug) {
+          dumpOperand(MI, a);
+        }
+      }
+      if (mDebug) {
+        dbgs() << "\n";
+      }
+    }
+  }
+}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,44 @@
+//===-- AMDILSwizzleEncoder.h ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDIL Swizzle Encoder is a class that encodes swizzle information in
+// the machine operand target specific flag. This encoding can then be used to
+// optimize the swizzles of a specific instruction to better pack the registers which
+// will help allocation with SC.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDIL_SWIZZLE_ENCODER_H_
+#define _AMDIL_SWIZZLE_ENCODER_H_
+#include "AMDIL.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include <set>
+#include <map>
+namespace llvm
+{
+struct AMDILRegisterInfo;
+class AMDILSwizzleEncoder : public MachineFunctionPass
+{
+public:
+  AMDILSwizzleEncoder(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+  const char* getPassName() const;
+  bool runOnMachineFunction(MachineFunction &MF);
+  static char ID;
+private:
+  bool mDebug; ///< Flag to specify whether to dump debug information.
+  CodeGenOpt::Level opt;
+}; // class AMDILSwizzleEncoder
+char AMDILSwizzleEncoder::ID = 0;
+} // end llvm namespace
+#endif // _AMDIL_SWIZZLE_ENCODER_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,39 @@
+//===-- AMDILTNDevice.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILSubtarget.h"
+#include "AMDILTNDevice.h"
+#include "AMDILNIDevice.h"
+using namespace llvm;
+
+AMDILTrinityDevice::AMDILTrinityDevice(AMDILSubtarget *ST)
+  : AMDILCaymanDevice(ST)
+{
+  std::string name = ST->getDeviceName();
+  if(name == "trinity") {
+    mDeviceFlag = OCL_DEVICE_TRINITY;
+  }
+  setCaps();
+}
+
+AMDILTrinityDevice::~AMDILTrinityDevice()
+{
+}
+
+void
+AMDILTrinityDevice::setCaps()
+{
+  // Trinity inherits everything from Cayman. If there are any
+  // differences (like disabling FP64, do it here).
+}
+

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,39 @@
+//===-- AMDILTNDevice.h ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDILTNDEVICE_H_
+#define _AMDILTNDEVICE_H_
+#include "AMDILNIDevice.h"
+#include "AMDILSubtarget.h"
+namespace llvm
+{
+class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// Trinity devices (devastator and scrapper) are APUs based on Cayman.
+// Like Cayman, they have a 4 wide ALU. They do support FP64, but this
+// maybe not be advertised at the OpenCL API layer depending on
+// performance.
+class AMDILTrinityDevice: public AMDILCaymanDevice
+{
+public:
+  AMDILTrinityDevice(AMDILSubtarget*);
+  virtual ~AMDILTrinityDevice();
+private:
+  virtual void setCaps();
+}; // AMDILTrinityDevice
+
+} // namespace llvm
+#endif // _AMDILTNDEVICE_H_

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.cpp?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.cpp (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.cpp Tue Aug 14 16:38:58 2012
@@ -0,0 +1,227 @@
+//===-- AMDILTargetMachine.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILTargetMachine.h"
+#include "AMDILDevices.h"
+#include "AMDILFrameLowering.h"
+#include "AMDILMCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+static MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT)
+{
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  default:
+  case Triple::UnknownOS:
+    return new AMDILMCAsmInfo(TheTriple);
+  }
+}
+
+extern "C" void LLVMInitializeAMDILTarget()
+{
+  // Register the target
+  RegisterTargetMachine<TheAMDILTargetMachine> X(TheAMDILTarget);
+
+  // Register the target asm info
+  RegisterMCAsmInfoFn A(TheAMDILTarget, createMCAsmInfo);
+
+  // Register the code emitter
+  //TargetRegistry::RegisterCodeEmitter(TheAMDILTarget,
+  //createAMDILMCCodeEmitter);
+}
+
+TheAMDILTargetMachine::TheAMDILTargetMachine(const Target &T,
+    StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options,
+    Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL)
+  : AMDILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL)
+{
+}
+
+/// AMDILTargetMachine ctor -
+///
+AMDILTargetMachine::AMDILTargetMachine(const Target &T,
+                                       StringRef TT, StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+  :
+  LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+  Subtarget(TT, CPU, FS),
+  DataLayout(Subtarget.getDataLayout()),
+  FrameLowering(TargetFrameLowering::StackGrowsUp,
+                Subtarget.device()->getStackAlignment(), 0),
+  InstrInfo(*this), //JITInfo(*this),
+  TLInfo(*this),
+  IntrinsicInfo(this),
+  ELFWriterInfo(false, true)
+{
+  setAsmVerbosityDefault(true);
+  setMCUseLoc(false);
+}
+
+AMDILTargetLowering*
+AMDILTargetMachine::getTargetLowering() const
+{
+  return const_cast<AMDILTargetLowering*>(&TLInfo);
+}
+
+const AMDILInstrInfo*
+AMDILTargetMachine::getInstrInfo() const
+{
+  return &InstrInfo;
+}
+const AMDILFrameLowering*
+AMDILTargetMachine::getFrameLowering() const
+{
+  return &FrameLowering;
+}
+
+const AMDILSubtarget*
+AMDILTargetMachine::getSubtargetImpl() const
+{
+  return &Subtarget;
+}
+
+const AMDILRegisterInfo*
+AMDILTargetMachine::getRegisterInfo() const
+{
+  return &InstrInfo.getRegisterInfo();
+}
+
+const TargetData*
+AMDILTargetMachine::getTargetData() const
+{
+  return &DataLayout;
+}
+
+const AMDILELFWriterInfo*
+AMDILTargetMachine::getELFWriterInfo() const
+{
+  return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+}
+
+TargetPassConfig*
+AMDILTargetMachine::createPassConfig(PassManagerBase &PM)
+{
+  return new AMDILPassConfig(this, PM);
+}
+
+const AMDILIntrinsicInfo*
+AMDILTargetMachine::getIntrinsicInfo() const
+{
+  return &IntrinsicInfo;
+}
+bool
+AMDILPassConfig::addPreISel()
+{
+  // Vector Coarsening as the current implementation does not support
+  // big endian yet.
+  /*  ---- TODO: WHAT TO DO WITH THIS???
+  if (getAMDILTargetMachine().getOptLevel() != CodeGenOpt::None &&
+      getAMDILTargetMachine().getTargetData()->isLittleEndian())
+    addPass(createVectorCoarseningPass());
+  */
+
+  return true;
+}
+bool
+AMDILPassConfig::addInstSelector()
+{
+  addPass(createAMDILBarrierDetect(getAMDILTargetMachine(),
+                                   getAMDILTargetMachine().getOptLevel()));
+  addPass(createAMDILPrintfConvert(getAMDILTargetMachine(),
+                                   getAMDILTargetMachine().getOptLevel()));
+  addPass(createAMDILInlinePass(getAMDILTargetMachine(),
+                                getAMDILTargetMachine().getOptLevel()));
+  addPass(createAMDILPeepholeOpt(getAMDILTargetMachine(),
+                                 getAMDILTargetMachine().getOptLevel()));
+  addPass(createAMDILISelDag(getAMDILTargetMachine(),
+                             getAMDILTargetMachine().getOptLevel()));
+  return false;
+}
+bool
+AMDILPassConfig::addPreRegAlloc()
+
+{
+  // If debugging, reduce code motion. Use less aggressive pre-RA scheduler
+  if (getOptLevel() == CodeGenOpt::None) {
+    llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler);
+  }
+
+  addPass(createAMDILMachinePeephole(getAMDILTargetMachine(),
+                                     getAMDILTargetMachine().getOptLevel()));
+  addPass(createAMDILPointerManager(getAMDILTargetMachine(),
+                                    getAMDILTargetMachine().getOptLevel()));
+  return false;
+}
+
+bool
+AMDILPassConfig::addPostRegAlloc()
+{
+  return false;  // -print-machineinstr should print after this.
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted.  This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool
+AMDILPassConfig::addPreEmitPass()
+{
+  addPass(createAMDILCFGPreparationPass());
+  addPass(createAMDILCFGStructurizerPass());
+  addPass(createAMDILLiteralManager(getAMDILTargetMachine(),
+                                    getAMDILTargetMachine().getOptLevel()));
+  addPass(createAMDILIOExpansion(getAMDILTargetMachine(),
+                                 getAMDILTargetMachine().getOptLevel()));
+  addPass(createAMDILSwizzleEncoder(getAMDILTargetMachine(),
+                                    getAMDILTargetMachine().getOptLevel()));
+  return true;
+}
+
+void
+AMDILTargetMachine::dump(OSTREAM_TYPE &O)
+{
+  if (!mDebugMode) {
+    return;
+  }
+  O << ";AMDIL Target Machine State Dump: \n";
+}
+
+void
+AMDILTargetMachine::setDebug(bool debugMode)
+{
+  mDebugMode = debugMode;
+}
+
+bool
+AMDILTargetMachine::getDebug() const
+{
+  return mDebugMode;
+}
+
+extern "C" void LLVMInitializeAMDILTargetMC() {}

Added: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.h?rev=161895&view=auto
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.h (added)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.h Tue Aug 14 16:38:58 2012
@@ -0,0 +1,115 @@
+//===-- AMDILTargetMachine.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AMDIL specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILTARGETMACHINE_H_
+#define AMDILTARGETMACHINE_H_
+
+
+#include "AMDIL.h"
+#include "AMDILELFWriterInfo.h"
+#include "AMDILFrameLowering.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILISelLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILSubtarget.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+namespace llvm
+{
+class raw_ostream;
+
+class AMDILTargetMachine : public LLVMTargetMachine
+{
+private:
+  AMDILSubtarget Subtarget;
+  const TargetData DataLayout;       // Calculates type size & alignment
+  AMDILFrameLowering FrameLowering;
+  AMDILInstrInfo InstrInfo;
+  AMDILTargetLowering TLInfo;
+  AMDILIntrinsicInfo IntrinsicInfo;
+  AMDILELFWriterInfo ELFWriterInfo;
+  bool mDebugMode;
+
+protected:
+
+public:
+  AMDILTargetMachine(const Target &T,
+                     StringRef TT, StringRef CPU, StringRef FS,
+                     const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+