[llvm] r271571 - AMDGPU: Cleanup load tests

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 2 12:54:27 PDT 2016


Author: arsenm
Date: Thu Jun  2 14:54:26 2016
New Revision: 271571

URL: http://llvm.org/viewvc/llvm-project?rev=271571&view=rev
Log:
AMDGPU: Cleanup load tests

There are a lot of different kinds of loads to test for,
and these were scattered around inconsistently with
some redundancy. Try to comprehensively test all loads
in a consistent way.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll
Removed:
    llvm/trunk/test/CodeGen/AMDGPU/global-extload-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/global-extload-i16.ll
    llvm/trunk/test/CodeGen/AMDGPU/global-extload-i32.ll
    llvm/trunk/test/CodeGen/AMDGPU/global-extload-i8.ll
    llvm/trunk/test/CodeGen/AMDGPU/load-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/load.ll
    llvm/trunk/test/CodeGen/AMDGPU/load.vec.ll
    llvm/trunk/test/CodeGen/AMDGPU/load64.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/extload.ll
    llvm/trunk/test/CodeGen/AMDGPU/fpext.ll
    llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll
    llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll
    llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll

Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=271571&r1=271570&r2=271571&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Thu Jun  2 14:54:26 2016
@@ -63,6 +63,16 @@ R600TargetLowering::R600TargetLowering(T
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
   }
 
+  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
+
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
+
+
   setOperationAction(ISD::STORE, MVT::i8, Custom);
   setOperationAction(ISD::STORE, MVT::i32, Custom);
   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
@@ -71,6 +81,10 @@ R600TargetLowering::R600TargetLowering(T
   setTruncStoreAction(MVT::i32, MVT::i8, Custom);
   setTruncStoreAction(MVT::i32, MVT::i16, Custom);
 
+  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
+  setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
+  setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
+
   // Set condition code actions
   setCondCodeAction(ISD::SETO,   MVT::f32, Expand);
   setCondCodeAction(ISD::SETUO,  MVT::f32, Expand);

Modified: llvm/trunk/test/CodeGen/AMDGPU/extload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extload.ll?rev=271571&r1=271570&r2=271571&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/extload.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/extload.ll Thu Jun  2 14:54:26 2016
@@ -1,14 +1,16 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}anyext_load_i8:
-; SI: buffer_load_dword v{{[0-9]+}}
-; SI: buffer_store_dword v{{[0-9]+}}
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: This seems to not ever actually become an extload
+; FUNC-LABEL: {{^}}global_anyext_load_i8:
+; GCN: buffer_load_dword v{{[0-9]+}}
+; GCN: buffer_store_dword v{{[0-9]+}}
 
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
 ; EG: VTX_READ_32 [[VAL]]
-define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
+define void @global_anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
   %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
   %load = load i32, i32 addrspace(1)* %cast
   %x = bitcast i32 %load to <4 x i8>
@@ -17,13 +19,13 @@ define void @anyext_load_i8(i8 addrspace
   ret void
 }
 
-; FUNC-LABEL: {{^}}anyext_load_i16:
-; SI: buffer_load_dword v{{[0-9]+}}
-; SI: buffer_store_dword v{{[0-9]+}}
+; FUNC-LABEL: {{^}}global_anyext_load_i16:
+; GCN: buffer_load_dword v{{[0-9]+}}
+; GCN: buffer_store_dword v{{[0-9]+}}
 
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
 ; EG: VTX_READ_32 [[VAL]]
-define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
+define void @global_anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
   %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
   %load = load i32, i32 addrspace(1)* %cast
   %x = bitcast i32 %load to <2 x i16>
@@ -32,13 +34,13 @@ define void @anyext_load_i16(i16 addrspa
   ret void
 }
 
-; FUNC-LABEL: {{^}}anyext_load_lds_i8:
-; SI: ds_read_b32 v{{[0-9]+}}
-; SI: ds_write_b32 v{{[0-9]+}}
+; FUNC-LABEL: {{^}}local_anyext_load_i8:
+; GCN: ds_read_b32 v{{[0-9]+}}
+; GCN: ds_write_b32 v{{[0-9]+}}
 
 ; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
 ; EG: LDS_WRITE * [[VAL]]
-define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
+define void @local_anyext_load_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
   %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
   %load = load i32, i32 addrspace(3)* %cast
   %x = bitcast i32 %load to <4 x i8>
@@ -47,13 +49,13 @@ define void @anyext_load_lds_i8(i8 addrs
   ret void
 }
 
-; FUNC-LABEL: {{^}}anyext_load_lds_i16:
-; SI: ds_read_b32 v{{[0-9]+}}
-; SI: ds_write_b32 v{{[0-9]+}}
+; FUNC-LABEL: {{^}}local_anyext_load_i16:
+; GCN: ds_read_b32 v{{[0-9]+}}
+; GCN: ds_write_b32 v{{[0-9]+}}
 
 ; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
 ; EG: LDS_WRITE * [[VAL]]
-define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
+define void @local_anyext_load_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
   %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
   %load = load i32, i32 addrspace(3)* %cast
   %x = bitcast i32 %load to <2 x i16>

Modified: llvm/trunk/test/CodeGen/AMDGPU/fpext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fpext.ll?rev=271571&r1=271570&r2=271571&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fpext.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fpext.ll Thu Jun  2 14:54:26 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; FUNC-LABEL: {{^}}fpext_f32_to_f64:
@@ -18,6 +18,16 @@ define void @fpext_v2f32_to_v2f64(<2 x d
   ret void
 }
 
+; FUNC-LABEL: {{^}}fpext_v3f32_to_v3f64:
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+define void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, <3 x float> %in) {
+  %result = fpext <3 x float> %in to <3 x double>
+  store <3 x double> %result, <3 x double> addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}fpext_v4f32_to_v4f64:
 ; SI: v_cvt_f64_f32_e32
 ; SI: v_cvt_f64_f32_e32

Removed: llvm/trunk/test/CodeGen/AMDGPU/global-extload-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-extload-i1.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-extload-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-extload-i1.ll (removed)
@@ -1,302 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FIXME: Evergreen broken
-
-; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1, i1 addrspace(1)* %in
-  %ext = zext i1 %a to i32
-  store i32 %ext, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1, i1 addrspace(1)* %in
-  %ext = sext i1 %a to i32
-  store i32 %ext, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
-; SI: s_endpgm
-define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
-  %ext = zext <1 x i1> %load to <1 x i32>
-  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
-; SI: s_endpgm
-define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
-  %ext = sext <1 x i1> %load to <1 x i32>
-  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
-; SI: s_endpgm
-define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
-  %ext = zext <2 x i1> %load to <2 x i32>
-  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
-; SI: s_endpgm
-define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
-  %ext = sext <2 x i1> %load to <2 x i32>
-  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
-; SI: s_endpgm
-define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
-  %ext = zext <4 x i1> %load to <4 x i32>
-  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
-; SI: s_endpgm
-define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
-  %ext = sext <4 x i1> %load to <4 x i32>
-  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
-; SI: s_endpgm
-define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
-  %ext = zext <8 x i1> %load to <8 x i32>
-  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
-; SI: s_endpgm
-define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
-  %ext = sext <8 x i1> %load to <8 x i32>
-  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
-; SI: s_endpgm
-define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
-  %ext = zext <16 x i1> %load to <16 x i32>
-  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
-; SI: s_endpgm
-define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
-  %ext = sext <16 x i1> %load to <16 x i32>
-  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
-  ret void
-}
-
-; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
-; XSI: s_endpgm
-; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-;   %ext = zext <32 x i1> %load to <32 x i32>
-;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
-; XSI: s_endpgm
-; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-;   %ext = sext <32 x i1> %load to <32 x i32>
-;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
-; XSI: s_endpgm
-; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-;   %ext = zext <64 x i1> %load to <64 x i32>
-;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
-; XSI: s_endpgm
-; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-;   %ext = sext <64 x i1> %load to <64 x i32>
-;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
-; SI-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
-; SI-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
-; SI: buffer_store_dwordx2
-define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1, i1 addrspace(1)* %in
-  %ext = zext i1 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
-; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1, i1 addrspace(1)* %in
-  %ext = sext i1 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
-; SI: s_endpgm
-define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
-  %ext = zext <1 x i1> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
-; SI: s_endpgm
-define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
-  %ext = sext <1 x i1> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
-; SI: s_endpgm
-define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
-  %ext = zext <2 x i1> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
-; SI: s_endpgm
-define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
-  %ext = sext <2 x i1> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
-; SI: s_endpgm
-define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
-  %ext = zext <4 x i1> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
-; SI: s_endpgm
-define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
-  %ext = sext <4 x i1> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
-; SI: s_endpgm
-define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
-  %ext = zext <8 x i1> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
-; SI: s_endpgm
-define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
-  %ext = sext <8 x i1> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
-; SI: s_endpgm
-define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
-  %ext = zext <16 x i1> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
-; SI: s_endpgm
-define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
-  %ext = sext <16 x i1> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
-; XSI: s_endpgm
-; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-;   %ext = zext <32 x i1> %load to <32 x i64>
-;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
-; XSI: s_endpgm
-; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-;   %ext = sext <32 x i1> %load to <32 x i64>
-;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
-; XSI: s_endpgm
-; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-;   %ext = zext <64 x i1> %load to <64 x i64>
-;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
-; XSI: s_endpgm
-; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-;   %ext = sext <64 x i1> %load to <64 x i64>
-;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-;   ret void
-; }

Removed: llvm/trunk/test/CodeGen/AMDGPU/global-extload-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-extload-i16.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-extload-i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-extload-i16.ll (removed)
@@ -1,302 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
-
-; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
-; SI: buffer_load_ushort
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16, i16 addrspace(1)* %in
-  %ext = zext i16 %a to i32
-  store i32 %ext, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
-; SI: buffer_load_sshort
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16, i16 addrspace(1)* %in
-  %ext = sext i16 %a to i32
-  store i32 %ext, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
-; SI: buffer_load_ushort
-; SI: s_endpgm
-define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
-  %ext = zext <1 x i16> %load to <1 x i32>
-  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
-; SI: buffer_load_sshort
-; SI: s_endpgm
-define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
-  %ext = sext <1 x i16> %load to <1 x i32>
-  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
-; SI: s_endpgm
-define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %ext = zext <2 x i16> %load to <2 x i32>
-  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
-; SI: s_endpgm
-define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %ext = sext <2 x i16> %load to <2 x i32>
-  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
-; SI: s_endpgm
-define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
-  %ext = zext <4 x i16> %load to <4 x i32>
-  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
-; SI: s_endpgm
-define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
-  %ext = sext <4 x i16> %load to <4 x i32>
-  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
-; SI: s_endpgm
-define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
-  %ext = zext <8 x i16> %load to <8 x i32>
-  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
-; SI: s_endpgm
-define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
-  %ext = sext <8 x i16> %load to <8 x i32>
-  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
-; SI: s_endpgm
-define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
-  %ext = zext <16 x i16> %load to <16 x i32>
-  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
-; SI: s_endpgm
-define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
-  %ext = sext <16 x i16> %load to <16 x i32>
-  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
-; SI: s_endpgm
-define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
-  %ext = zext <32 x i16> %load to <32 x i32>
-  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
-; SI: s_endpgm
-define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
-  %ext = sext <32 x i16> %load to <32 x i32>
-  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
-; SI: s_endpgm
-define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
-  %ext = zext <64 x i16> %load to <64 x i32>
-  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
-; SI: s_endpgm
-define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
-  %ext = sext <64 x i16> %load to <64 x i32>
-  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
-; SI-DAG: buffer_load_ushort v[[LO:[0-9]+]],
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
-define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16, i16 addrspace(1)* %in
-  %ext = zext i16 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
-; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16, i16 addrspace(1)* %in
-  %ext = sext i16 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
-; SI: s_endpgm
-define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
-  %ext = zext <1 x i16> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
-; SI: s_endpgm
-define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
-  %ext = sext <1 x i16> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
-; SI: s_endpgm
-define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %ext = zext <2 x i16> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
-; SI: s_endpgm
-define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %ext = sext <2 x i16> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
-; SI: s_endpgm
-define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
-  %ext = zext <4 x i16> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
-; SI: s_endpgm
-define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
-  %ext = sext <4 x i16> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
-; SI: s_endpgm
-define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
-  %ext = zext <8 x i16> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
-; SI: s_endpgm
-define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
-  %ext = sext <8 x i16> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
-; SI: s_endpgm
-define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
-  %ext = zext <16 x i16> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
-; SI: s_endpgm
-define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
-  %ext = sext <16 x i16> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
-; SI: s_endpgm
-define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
-  %ext = zext <32 x i16> %load to <32 x i64>
-  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
-; SI: s_endpgm
-define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
-  %ext = sext <32 x i16> %load to <32 x i64>
-  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
-; SI: s_endpgm
-define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
-  %ext = zext <64 x i16> %load to <64 x i64>
-  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
-; SI: s_endpgm
-define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
-  %ext = sext <64 x i16> %load to <64 x i64>
-  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/AMDGPU/global-extload-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-extload-i32.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-extload-i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-extload-i32.ll (removed)
@@ -1,308 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
-; SI-DAG: buffer_load_dword v[[LO:[0-9]+]],
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
-define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32, i32 addrspace(1)* %in
-  %ext = zext i32 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
-; SI: buffer_load_dword [[LOAD:v[0-9]+]],
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32, i32 addrspace(1)* %in
-  %ext = sext i32 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
-; SI: buffer_load_dword
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
-  %ext = zext <1 x i32> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
-; SI: buffer_load_dword
-; SI: v_ashrrev_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
-  %ext = sext <1 x i32> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
-; SI: buffer_load_dwordx2
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
-  %ext = zext <2 x i32> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
-; SI: buffer_load_dwordx2
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
-  %ext = sext <2 x i32> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
-  %ext = zext <4 x i32> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
-; SI: buffer_load_dwordx4
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
-  %ext = sext <4 x i32> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
-  %ext = zext <8 x i32> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
-  %ext = sext <8 x i32> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
-  %ext = sext <16 x i32> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
-  %ext = zext <16 x i32> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI: s_endpgm
-define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
-  %ext = sext <32 x i32> %load to <32 x i64>
-  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI: s_endpgm
-define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
-  %ext = zext <32 x i32> %load to <32 x i64>
-  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/AMDGPU/global-extload-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-extload-i8.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-extload-i8.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-extload-i8.ll (removed)
@@ -1,299 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8, i8 addrspace(1)* %in
-  %ext = zext i8 %a to i32
-  store i32 %ext, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
-; SI: buffer_load_sbyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8, i8 addrspace(1)* %in
-  %ext = sext i8 %a to i32
-  store i32 %ext, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
-; SI: s_endpgm
-define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
-  %ext = zext <1 x i8> %load to <1 x i32>
-  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
-; SI: s_endpgm
-define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
-  %ext = sext <1 x i8> %load to <1 x i32>
-  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
-; SI: s_endpgm
-define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
-  %ext = zext <2 x i8> %load to <2 x i32>
-  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
-; SI: s_endpgm
-define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
-  %ext = sext <2 x i8> %load to <2 x i32>
-  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
-; SI: s_endpgm
-define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
-  %ext = zext <4 x i8> %load to <4 x i32>
-  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
-; SI: s_endpgm
-define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
-  %ext = sext <4 x i8> %load to <4 x i32>
-  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
-; SI: s_endpgm
-define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
-  %ext = zext <8 x i8> %load to <8 x i32>
-  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
-; SI: s_endpgm
-define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
-  %ext = sext <8 x i8> %load to <8 x i32>
-  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
-; SI: s_endpgm
-define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
-  %ext = zext <16 x i8> %load to <16 x i32>
-  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
-; SI: s_endpgm
-define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
-  %ext = sext <16 x i8> %load to <16 x i32>
-  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
-  ret void
-}
-
-; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
-; XSI: s_endpgm
-; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
-;   %ext = zext <32 x i8> %load to <32 x i32>
-;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
-; XSI: s_endpgm
-; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
-;   %ext = sext <32 x i8> %load to <32 x i32>
-;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
-; XSI: s_endpgm
-; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-;   %ext = zext <64 x i8> %load to <64 x i32>
-;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
-; XSI: s_endpgm
-; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-;   %ext = sext <64 x i8> %load to <64 x i32>
-;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-;   ret void
-; }
-
-; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
-; SI-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
-define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8, i8 addrspace(1)* %in
-  %ext = zext i8 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
-; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8, i8 addrspace(1)* %in
-  %ext = sext i8 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
-; SI: s_endpgm
-define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
-  %ext = zext <1 x i8> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
-; SI: s_endpgm
-define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
-  %ext = sext <1 x i8> %load to <1 x i64>
-  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
-; SI: s_endpgm
-define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
-  %ext = zext <2 x i8> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
-; SI: s_endpgm
-define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
-  %ext = sext <2 x i8> %load to <2 x i64>
-  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
-; SI: s_endpgm
-define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
-  %ext = zext <4 x i8> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
-; SI: s_endpgm
-define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
-  %ext = sext <4 x i8> %load to <4 x i64>
-  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
-; SI: s_endpgm
-define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
-  %ext = zext <8 x i8> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
-; SI: s_endpgm
-define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
-  %ext = sext <8 x i8> %load to <8 x i64>
-  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
-; SI: s_endpgm
-define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
-  %ext = zext <16 x i8> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
-; SI: s_endpgm
-define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
-  %ext = sext <16 x i8> %load to <16 x i64>
-  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
-; SI: s_endpgm
-define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
-  %ext = zext <32 x i8> %load to <32 x i64>
-  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
-; SI: s_endpgm
-define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
-  %ext = sext <32 x i8> %load to <32 x i64>
-  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-  ret void
-}
-
-; ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
-; ; XSI: s_endpgm
-; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-;   %ext = zext <64 x i8> %load to <64 x i64>
-;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-;   ret void
-; }
-
-; ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
-; ; XSI: s_endpgm
-; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-;   %ext = sext <64 x i8> %load to <64 x i64>
-;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-;   ret void
-; }

Modified: llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll?rev=271571&r1=271570&r2=271571&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll Thu Jun  2 14:54:26 2016
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
@@ -475,3 +475,55 @@ entry:
 ;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
 ;   ret void
 ; }
+
+; FUNC-LABEL: {{^}}i1_arg:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32
+; SI: buffer_store_byte
+; SI: s_endpgm
+define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
+  store i1 %x, i1 addrspace(1)* %out, align 1
+  ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_zext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = zext i1 %x to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_zext_i64:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = zext i1 %x to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_sext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = sext i1 %x to i32
+  store i32 %ext, i32addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_sext_i64:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: v_ashrrev_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = sext i1 %x to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}

Added: llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-f64.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_f64:
+; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
+; GCN-NOHSA: buffer_store_dwordx2
+; GCN-HSA: flat_store_dwordx2
+define void @constant_load_f64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
+  %ld = load double, double addrspace(2)* %in
+  store double %ld, double addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i1.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,371 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_i1:
+; GCN: buffer_load_ubyte
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
+; GCN: buffer_store_byte
+
+; EG: VTX_READ_8
+; EG: AND_INT
+define void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+  %load = load i1, i1 addrspace(2)* %in
+  store i1 %load, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i1:
+define void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+  store <2 x i1> %load, <2 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i1:
+define void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+  store <3 x i1> %load, <3 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i1:
+define void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+  store <4 x i1> %load, <4 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i1:
+define void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+  store <8 x i1> %load, <8 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i1:
+define void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+  store <16 x i1> %load, <16 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v32i1:
+define void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+  store <32 x i1> %load, <32 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v64i1:
+define void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+  store <64 x i1> %load, <64 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: buffer_store_dword
+define void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+  %a = load i1, i1 addrspace(2)* %in
+  %ext = zext i1 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; GCN: buffer_store_dword
+
+; EG: VTX_READ_8
+; EG: BFE_INT
+define void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+  %a = load i1, i1 addrspace(2)* %in
+  %ext = sext i1 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i32:
+define void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+  %ext = zext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i32:
+define void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+  %ext = sext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i32:
+define void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+  %ext = zext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i32:
+define void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+  %ext = sext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i32:
+define void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+  %ext = zext <3 x i1> %load to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i32:
+define void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+  %ext = sext <3 x i1> %load to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i32:
+define void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+  %ext = zext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i32:
+define void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+  %ext = sext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i32:
+define void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+  %ext = zext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i32:
+define void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+  %ext = sext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i32:
+define void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+  %ext = zext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i32:
+define void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+  %ext = sext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i32:
+define void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+  %ext = zext <32 x i1> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i32:
+define void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+  %ext = sext <32 x i1> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i32:
+define void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+  %ext = zext <64 x i1> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i32:
+define void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+  %ext = sext <64 x i1> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i1_to_i64:
+; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]
+; GCN: buffer_store_dwordx2
+define void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+  %a = load i1, i1 addrspace(2)* %in
+  %ext = zext i1 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i1_to_i64:
+; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; GCN: buffer_store_dwordx2
+define void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+  %a = load i1, i1 addrspace(2)* %in
+  %ext = sext i1 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i64:
+define void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+  %ext = zext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i64:
+define void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+  %ext = sext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i64:
+define void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+  %ext = zext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i64:
+define void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+  %ext = sext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i64:
+define void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+  %ext = zext <3 x i1> %load to <3 x i64>
+  store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i64:
+define void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+  %ext = sext <3 x i1> %load to <3 x i64>
+  store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i64:
+define void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+  %ext = zext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i64:
+define void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+  %ext = sext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i64:
+define void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+  %ext = zext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i64:
+define void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+  %ext = sext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i64:
+define void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+  %ext = zext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i64:
+define void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+  %ext = sext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i64:
+define void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+  %ext = zext <32 x i1> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i64:
+define void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+  %ext = sext <32 x i1> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i64:
+define void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+  %ext = zext <64 x i1> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i64:
+define void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+  %ext = sext <64 x i1> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i16.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,761 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_i16:
+; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
+entry:
+  %ld = load i16, i16 addrspace(2)* %in
+  store i16 %ld, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i16:
+; GCN: s_load_dword s
+
+; EG: VTX_READ_32
+define void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) {
+entry:
+  %ld = load <2 x i16>, <2 x i16> addrspace(2)* %in
+  store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i16:
+; GCN: s_load_dwordx2 s
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_16
+define void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i16:
+; GCN: s_load_dwordx2
+
+; EG: VTX_READ_64
+define void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) {
+entry:
+  %ld = load <4 x i16>, <4 x i16> addrspace(2)* %in
+  store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i16:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) {
+entry:
+  %ld = load <8 x i16>, <8 x i16> addrspace(2)* %in
+  store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i16:
+; GCN: s_load_dwordx8
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) {
+entry:
+  %ld = load <16 x i16>, <16 x i16> addrspace(2)* %in
+  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+  %a = load i16, i16 addrspace(2)* %in
+  %ext = zext i16 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 16
+define void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+  %a = load i16, i16 addrspace(2)* %in
+  %ext = sext i16 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+  %ext = zext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+  %ext = sext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+  %ext = zext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+define void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+  %ext = sext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_constant_zextload_v3i16_to_v3i32:
+; GCN: s_load_dwordx2
+define void @constant_constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+  %ext = zext <3 x i16> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_constant_sextload_v3i16_to_v3i32:
+; GCN: s_load_dwordx2
+define void @constant_constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+  %ext = sext <3 x i16> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_constant_zextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+  %ext = zext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+define void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+  %ext = sext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+  %ext = zext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+  %ext = sext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+  %ext = zext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i32:
+define void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+  %ext = sext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+  %ext = zext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+  %ext = sext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+  %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+  %ext = zext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i32:
+define void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+  %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+  %ext = sext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+  %a = load i16, i16 addrspace(2)* %in
+  %ext = zext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
+; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+  %a = load i16, i16 addrspace(2)* %in
+  %ext = sext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i64:
+define void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+  %ext = zext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i64:
+define void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+  %ext = sext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
+define void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+  %ext = zext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
+define void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+  %ext = sext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
+define void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+  %ext = zext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
+define void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+  %ext = sext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
+define void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+  %ext = zext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
+define void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+  %ext = sext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i64:
+define void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+  %ext = zext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i64:
+define void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+  %ext = sext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i64:
+define void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+  %ext = zext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i64:
+define void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+  %ext = sext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
+; define void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+;   %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+;   %ext = zext <64 x i16> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
+; define void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+;   %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+;   %ext = sext <64 x i16> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,378 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_i32:
+; GCN: s_load_dword s{{[0-9]+}}
+
+; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
+entry:
+  %ld = load i32, i32 addrspace(2)* %in
+  store i32 %ld, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i32:
+; GCN: s_load_dwordx2
+
+; EG: VTX_READ_64
+define void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+  store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i32:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
+  store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i32:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+  store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i32:
+; GCN: s_load_dwordx8
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+  store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i32:
+; GCN: s_load_dwordx16
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+  store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64:
+; GCN-DAG: s_load_dword s[[SLO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}}
+; GCN: store_dwordx2
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+define void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
+  %ld = load i32, i32 addrspace(2)* %in
+  %ext = zext i32 %ld to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64:
+; GCN: s_load_dword s[[SLO:[0-9]+]]
+; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31
+; GCN: store_dwordx2
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
+; EG: 31
+define void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
+  %ld = load i32, i32 addrspace(2)* %in
+  %ext = sext i32 %ld to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
+; GCN: s_load_dword
+; GCN: store_dwordx2
+define void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
+  %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
+  %ext = zext <1 x i32> %ld to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64:
+; GCN: s_load_dword s[[LO:[0-9]+]]
+; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
+; GCN: store_dwordx2
+define void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
+  %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
+  %ext = sext <1 x i32> %ld to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
+; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: store_dwordx4
+define void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
+  %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+  %ext = zext <2 x i32> %ld to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64:
+; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+
+; GCN-DAG: s_ashr_i32
+; GCN-DAG: s_ashr_i32
+
+; GCN: store_dwordx4
+define void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
+  %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+  %ext = sext <2 x i32> %ld to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64:
+; GCN: s_load_dwordx4
+
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+define void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
+  %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+  %ext = zext <4 x i32> %ld to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64:
+; GCN: s_load_dwordx4
+
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+define void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
+  %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+  %ext = sext <4 x i32> %ld to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64:
+; GCN: s_load_dwordx8
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-SA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
+  %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+  %ext = zext <8 x i32> %ld to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64:
+; GCN: s_load_dwordx8
+
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
+  %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+  %ext = sext <8 x i32> %ld to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64:
+; GCN: s_load_dwordx16
+
+
+; GCN-DAG: s_ashr_i32
+
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+define void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
+  %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+  %ext = sext <16 x i32> %ld to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64
+; GCN: s_load_dwordx16
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
+  %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+  %ext = zext <16 x i32> %ld to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64:
+
+; GCN: s_load_dwordx16
+; GCN: s_load_dwordx16
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
+  %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
+  %ext = sext <32 x i32> %ld to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64:
+; GCN: s_load_dwordx16
+; GCN: s_load_dwordx16
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
+  %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
+  %ext = zext <32 x i32> %ld to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i64.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,90 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}constant_load_i64:
+; GCN: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; EG: VTX_READ_64
+define void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(2)* %in) #0 {
+  %ld = load i64, i64 addrspace(2)* %in
+  store i64 %ld, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i64:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <2 x i64>, <2 x i64> addrspace(2)* %in
+  store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i64:
+; GCN-DAG: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; SI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4{{$}}
+; VI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x10{{$}}
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+define void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <3 x i64>, <3 x i64> addrspace(2)* %in
+  store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i64
+; GCN: s_load_dwordx8
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <4 x i64>, <4 x i64> addrspace(2)* %in
+  store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i64:
+; GCN: s_load_dwordx16
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <8 x i64>, <8 x i64> addrspace(2)* %in
+  store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i64:
+; GCN: s_load_dwordx16
+; GCN: s_load_dwordx16
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <16 x i64>, <16 x i64> addrspace(2)* %in
+  store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i8.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,605 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}constant_load_i8:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+entry:
+  %ld = load i8, i8 addrspace(2)* %in
+  store i8 %ld, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i8:
+; GCN-NOHSA: buffer_load_ushort v
+; GCN-HSA: flat_load_ushort v
+
+; EG: VTX_READ_16
+define void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
+  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i8:
+; GCN: s_load_dword s
+
+; EG-DAG: VTX_READ_32
+define void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+  store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i8:
+; GCN: s_load_dword s
+
+; EG: VTX_READ_32
+define void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <4 x i8>, <4 x i8> addrspace(2)* %in
+  store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i8:
+; GCN: s_load_dwordx2
+
+; EG: VTX_READ_64
+define void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <8 x i8>, <8 x i8> addrspace(2)* %in
+  store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i8:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <16 x i8>, <16 x i8> addrspace(2)* %in
+  store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+  %a = load i8, i8 addrspace(2)* %in
+  %ext = zext i8 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 8
+define void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+  %ld = load i8, i8 addrspace(2)* %in
+  %ext = sext i8 %ld to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
+define void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+  %ext = zext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32:
+define void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+  %ext = sext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+  %ext = zext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+define void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+  %ext = sext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32:
+; GCN: s_load_dword s
+
+; GCN-DAG: s_bfe_u32
+; GCN-DAG: s_bfe_u32
+; GCN-DAG: s_and_b32
+define void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+  %ext = zext <3 x i8> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32:
+; GCN: s_load_dword s
+
+; GCN-DAG: s_bfe_i32
+; GCN-DAG: s_bfe_i32
+; GCN-DAG: s_bfe_i32
+define void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+  %ext = sext <3 x i8> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+  %ext = zext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+define void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+  %ext = sext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+define void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+  %ext = zext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+define void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+  %ext = sext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32:
+define void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+  %ext = zext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32:
+define void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+  %ext = sext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32:
+define void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+  %ext = zext <32 x i8> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32:
+define void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+  %ext = sext <32 x i8> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32:
+define void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+  %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+  %ext = zext <64 x i8> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32:
+define void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+  %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+  %ext = sext <64 x i8> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64:
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+
+; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+define void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+  %a = load i8, i8 addrspace(2)* %in
+  %ext = zext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64:
+; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+  %a = load i8, i8 addrspace(2)* %in
+  %ext = sext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64:
+define void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+  %ext = zext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64:
+define void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+  %ext = sext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
+define void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+  %ext = zext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
+define void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+  %ext = sext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
+define void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+  %ext = zext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
+define void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+  %ext = sext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
+define void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+  %ext = zext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
+define void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+  %ext = sext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
+define void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+  %ext = zext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
+define void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+  %ext = sext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64:
+define void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+  %ext = zext <32 x i8> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64:
+define void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+  %ext = sext <32 x i8> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
+; define void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+;   %ext = zext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
+; define void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+;   %ext = sext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+
+; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+  %a = load i8, i8 addrspace(2)* %in
+  %ext = zext i8 %a to i16
+  store i16 %ext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
+
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+  %a = load i8, i8 addrspace(2)* %in
+  %ext = sext i8 %a to i16
+  store i16 %ext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
+define void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+  %ext = zext <1 x i8> %load to <1 x i16>
+  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16:
+define void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+  %ext = sext <1 x i8> %load to <1 x i16>
+  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
+define void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+  %ext = zext <2 x i8> %load to <2 x i16>
+  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16:
+define void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+  %ext = sext <2 x i8> %load to <2 x i16>
+  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
+define void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+  %ext = zext <4 x i8> %load to <4 x i16>
+  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16:
+define void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+  %ext = sext <4 x i8> %load to <4 x i16>
+  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
+define void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+  %ext = zext <8 x i8> %load to <8 x i16>
+  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16:
+define void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+  %ext = sext <8 x i8> %load to <8 x i16>
+  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
+define void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+  %ext = zext <16 x i8> %load to <16 x i16>
+  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16:
+define void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+  %ext = sext <16 x i8> %load to <16 x i16>
+  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16:
+define void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+  %ext = zext <32 x i8> %load to <32 x i16>
+  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16:
+define void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+  %ext = sext <32 x i8> %load to <32 x i16>
+  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
+; define void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+;   %ext = zext <64 x i8> %load to <64 x i16>
+;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
+; define void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+;   %ext = sext <64 x i8> %load to <64 x i16>
+;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+;   ret void
+; }
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-f32.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,93 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_f32:
+; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
+; GCN-HSA: flat_load_dword
+
+; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+  %tmp0 = load float, float addrspace(1)* %in
+  store float %tmp0, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2f32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; R600: VTX_READ_64
+define void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+entry:
+  %tmp0 = load <2 x float>, <2 x float> addrspace(1)* %in
+  store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+define void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+entry:
+  %tmp0 = load <3 x float>, <3 x float> addrspace(1)* %in
+  store <3 x float> %tmp0, <3 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+define void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
+entry:
+  %tmp0 = load <4 x float>, <4 x float> addrspace(1)* %in
+  store <4 x float> %tmp0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+define void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
+entry:
+  %tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in
+  store <8 x float> %tmp0, <8 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+define void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
+entry:
+  %tmp0 = load <16 x float>, <16 x float> addrspace(1)* %in
+  store <16 x float> %tmp0, <16 x float> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-f64.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,94 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_f64:
+; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-NOHSA: buffer_store_dwordx2 [[VAL]]
+
+; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
+define void @global_load_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %ld = load double, double addrspace(1)* %in
+  store double %ld, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <2 x i64>, <2 x i64> addrspace(1)* %in
+  store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3f64:
+; GCN-NOHSA-DAG: buffer_load_dwordx4
+; GCN-NOHSA-DAG: buffer_load_dwordx2
+; GCN-HSA-DAG: flat_load_dwordx4
+; GCN-HSA-DAG: flat_load_dwordx2
+define void @global_load_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <3 x double>, <3 x double> addrspace(1)* %in
+  store <3 x double> %ld, <3 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4f64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <4 x double>, <4 x double> addrspace(1)* %in
+  store <4 x double> %ld, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8f64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v8f64(<8 x double> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <8 x double>, <8 x double> addrspace(1)* %in
+  store <8 x double> %ld, <8 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16f64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v16f64(<16 x double> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <16 x double>, <16 x double> addrspace(1)* %in
+  store <16 x double> %ld, <16 x double> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i1.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,371 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_i1:
+; GCN: buffer_load_ubyte
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
+; GCN: buffer_store_byte
+
+; EG: VTX_READ_8
+; EG: AND_INT
+define void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+  %load = load i1, i1 addrspace(1)* %in
+  store i1 %load, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i1:
+define void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+  store <2 x i1> %load, <2 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i1:
+define void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+  store <3 x i1> %load, <3 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i1:
+define void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+  store <4 x i1> %load, <4 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i1:
+define void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+  store <8 x i1> %load, <8 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i1:
+define void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+  store <16 x i1> %load, <16 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v32i1:
+define void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+  store <32 x i1> %load, <32 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v64i1:
+define void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+  store <64 x i1> %load, <64 x i1> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: buffer_store_dword
+define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+  %a = load i1, i1 addrspace(1)* %in
+  %ext = zext i1 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; GCN: buffer_store_dword
+
+; EG: VTX_READ_8
+; EG: BFE_INT
+define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+  %a = load i1, i1 addrspace(1)* %in
+  %ext = sext i1 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32:
+define void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+  %ext = zext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32:
+define void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+  %ext = sext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32:
+define void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+  %ext = zext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32:
+define void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+  %ext = sext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32:
+define void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+  %ext = zext <3 x i1> %load to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32:
+define void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+  %ext = sext <3 x i1> %load to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32:
+define void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+  %ext = zext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32:
+define void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+  %ext = sext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32:
+define void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+  %ext = zext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32:
+define void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+  %ext = sext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32:
+define void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+  %ext = zext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32:
+define void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+  %ext = sext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32:
+define void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+  %ext = zext <32 x i1> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32:
+define void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+  %ext = sext <32 x i1> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32:
+define void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+  %ext = zext <64 x i1> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32:
+define void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+  %ext = sext <64 x i1> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
+; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]{{$}}
+; GCN: buffer_store_dwordx2
+define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+  %a = load i1, i1 addrspace(1)* %in
+  %ext = zext i1 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
+; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; GCN: buffer_store_dwordx2
+define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+  %a = load i1, i1 addrspace(1)* %in
+  %ext = sext i1 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64:
+define void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+  %ext = zext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64:
+define void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+  %ext = sext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64:
+define void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+  %ext = zext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64:
+define void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+  %ext = sext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64:
+define void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+  %ext = zext <3 x i1> %load to <3 x i64>
+  store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64:
+define void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+  %ext = sext <3 x i1> %load to <3 x i64>
+  store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64:
+define void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+  %ext = zext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64:
+define void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+  %ext = sext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64:
+define void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+  %ext = zext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64:
+define void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+  %ext = sext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64:
+define void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+  %ext = zext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64:
+define void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+  %ext = sext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64:
+define void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+  %ext = zext <32 x i1> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64:
+define void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+  %ext = sext <32 x i1> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64:
+define void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+  %ext = zext <64 x i1> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64:
+define void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+  %ext = sext <64 x i1> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i16.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,774 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
+
+; FUNC-LABEL: {{^}}global_load_i16:
+; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+entry:
+  %ld = load i16, i16 addrspace(1)* %in
+  store i16 %ld, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i16:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; EG: VTX_READ_32
+define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+entry:
+  %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
+  store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i16:
+; GCN-NOHSA: buffer_load_dwordx2 v
+; GCN-HSA: flat_load_dwordx2 v
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_16
+define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
+  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i16:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; EG: VTX_READ_64
+define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+entry:
+  %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
+  store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i16:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
+entry:
+  %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
+  store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i16:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
+entry:
+  %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
+  store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+  %a = load i16, i16 addrspace(1)* %in
+  %ext = zext i16 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 16
+define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+  %a = load i16, i16 addrspace(1)* %in
+  %ext = sext i16 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+  %ext = zext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+  %ext = sext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+  %ext = zext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+  %ext = sext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_global_zextload_v3i16_to_v3i32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+define void @global_global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
+  %ext = zext <3 x i16> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_global_sextload_v3i16_to_v3i32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+define void @global_global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
+  %ext = sext <3 x i16> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_global_zextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+  %ext = zext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+  %ext = sext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+  %ext = zext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+  %ext = sext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+  %ext = zext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32:
+define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+  %ext = sext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+  %ext = zext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+  %ext = sext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+  %ext = zext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32:
+define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+  %ext = sext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+  %a = load i16, i16 addrspace(1)* %in
+  %ext = zext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
+; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+  %a = load i16, i16 addrspace(1)* %in
+  %ext = sext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64:
+define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+  %ext = zext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64:
+define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+  %ext = sext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64:
+define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+  %ext = zext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64:
+define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+  %ext = sext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64:
+define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+  %ext = zext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64:
+define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+  %ext = sext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64:
+define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+  %ext = zext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64:
+define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+  %ext = sext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64:
+define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+  %ext = zext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64:
+define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+  %ext = sext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64:
+define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+  %ext = zext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64:
+define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+  %ext = sext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64:
+; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+;   %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+;   %ext = zext <64 x i16> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64:
+; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+;   %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+;   %ext = sext <64 x i16> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i32.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,523 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}global_load_i32:
+; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
+; GCN-HSA: flat_load_dword
+
+; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+entry:
+  %ld = load i32, i32 addrspace(1)* %in
+  store i32 %ld, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; EG: VTX_READ_64
+define void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
+  store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
+  store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
+  store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
+; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_dword v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+define void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %ld = load i32, i32 addrspace(1)* %in
+  %ext = zext i32 %ld to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
+; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
+; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
+; EG: 31
+define void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %ld = load i32, i32 addrspace(1)* %in
+  %ext = sext i32 %ld to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
+; GCN-NOHSA: buffer_load_dword
+; GCN-NOHSA: buffer_store_dwordx2
+
+; GCN-HSA: flat_load_dword
+; GCN-HSA: flat_store_dwordx2
+define void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
+  %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
+  %ext = zext <1 x i32> %ld to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
+; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
+; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
+  %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
+  %ext = sext <1 x i32> %ld to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_load_dwordx2
+; GCN-HSA: flat_store_dwordx4
+define void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %ext = zext <2 x i32> %ld to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %ext = sext <2 x i32> %ld to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %ext = zext <4 x i32> %ld to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+  %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %ext = sext <4 x i32> %ld to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-SA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
+  %ext = zext <8 x i32> %ld to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+  %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
+  %ext = sext <8 x i32> %ld to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
+  %ext = sext <16 x i32> %ld to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+  %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
+  %ext = zext <16 x i32> %ld to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
+
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
+  %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
+  %ext = sext <32 x i32> %ld to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
+  %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
+  %ext = zext <32 x i32> %ld to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i64.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,125 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_i64:
+; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-NOHSA: buffer_store_dwordx2 [[VAL]]
+
+; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
+
+; EG: VTX_READ_64
+define void @global_load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+  %ld = load i64, i64 addrspace(1)* %in
+  store i64 %ld, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <2 x i64>, <2 x i64> addrspace(1)* %in
+  store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i64:
+; GCN-NOHSA-DAG: buffer_load_dwordx4
+; GCN-NOHSA-DAG: buffer_load_dwordx2
+; GCN-HSA-DAG: flat_load_dwordx4
+; GCN-HSA-DAG: flat_load_dwordx2
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+define void @global_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <3 x i64>, <3 x i64> addrspace(1)* %in
+  store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <4 x i64>, <4 x i64> addrspace(1)* %in
+  store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <8 x i64>, <8 x i64> addrspace(1)* %in
+  store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <16 x i64>, <16 x i64> addrspace(1)* %in
+  store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-global-i8.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,579 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}global_load_i8:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+entry:
+  %ld = load i8, i8 addrspace(1)* %in
+  store i8 %ld, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i8:
+; GCN-NOHSA: buffer_load_ushort v
+; GCN-HSA: flat_load_ushort v
+
+; EG: VTX_READ_16
+define void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
+  store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i8:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; EG-DAG: VTX_READ_32
+define void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
+  store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i8:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; EG: VTX_READ_32
+define void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
+  store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i8:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; EG: VTX_READ_64
+define void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in
+  store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i8:
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in
+  store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %a = load i8, i8 addrspace(1)* %in
+  %ext = zext i8 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 8
+define void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %ld = load i8, i8 addrspace(1)* %in
+  %ext = sext i8 %ld to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
+define void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+  %ext = zext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32:
+define void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+  %ext = sext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+  %ext = zext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+define void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+  %ext = sext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
+; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
+define void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
+  %ext = zext <3 x i8> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
+; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
+define void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
+  %ext = sext <3 x i8> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+  %ext = zext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+define void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+  %ext = sext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32:
+define void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+  %ext = zext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32:
+define void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+  %ext = sext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32:
+define void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+  %ext = zext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32:
+define void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+  %ext = sext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32:
+define void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+  %ext = zext <32 x i8> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32:
+define void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+  %ext = sext <32 x i8> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32:
+define void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+  %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+  %ext = zext <64 x i8> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32:
+define void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+  %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+  %ext = sext <64 x i8> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i8_to_i64:
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+
+; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+define void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %a = load i8, i8 addrspace(1)* %in
+  %ext = zext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i8_to_i64:
+; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %a = load i8, i8 addrspace(1)* %in
+  %ext = sext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64:
+define void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+  %ext = zext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64:
+define void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+  %ext = sext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
+define void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+  %ext = zext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
+define void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+  %ext = sext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
+define void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+  %ext = zext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
+define void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+  %ext = sext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
+define void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+  %ext = zext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
+define void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+  %ext = sext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
+define void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+  %ext = zext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
+define void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+  %ext = sext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64:
+define void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+  %ext = zext <32 x i8> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64:
+define void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+  %ext = sext <32 x i8> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
+; define void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+;   %ext = zext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
+; define void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+;   %ext = sext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}global_zextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+
+; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %a = load i8, i8 addrspace(1)* %in
+  %ext = zext i8 %a to i16
+  store i16 %ext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
+
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+  %a = load i8, i8 addrspace(1)* %in
+  %ext = sext i8 %a to i16
+  store i16 %ext, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
+define void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+  %ext = zext <1 x i8> %load to <1 x i16>
+  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16:
+define void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+  %ext = sext <1 x i8> %load to <1 x i16>
+  store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
+define void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+  %ext = zext <2 x i8> %load to <2 x i16>
+  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16:
+define void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+  %ext = sext <2 x i8> %load to <2 x i16>
+  store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
+define void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+  %ext = zext <4 x i8> %load to <4 x i16>
+  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16:
+define void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+  %ext = sext <4 x i8> %load to <4 x i16>
+  store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
+define void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+  %ext = zext <8 x i8> %load to <8 x i16>
+  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16:
+define void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+  %ext = sext <8 x i8> %load to <8 x i16>
+  store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
+define void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+  %ext = zext <16 x i8> %load to <16 x i16>
+  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16:
+define void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+  %ext = sext <16 x i8> %load to <16 x i16>
+  store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16:
+define void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+  %ext = zext <32 x i8> %load to <32 x i16>
+  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16:
+define void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+  %ext = sext <32 x i8> %load to <32 x i16>
+  store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
+; define void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+;   %ext = zext <64 x i8> %load to <64 x i16>
+;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
+; define void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+;   %ext = sext <64 x i8> %load to <64 x i16>
+;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+;   ret void
+; }
+
+attributes #0 = { nounwind }

Removed: llvm/trunk/test/CodeGen/AMDGPU/load-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-i1.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-i1.ll (removed)
@@ -1,149 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}global_copy_i1_to_i1:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: buffer_store_byte
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: AND_INT
-define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1, i1 addrspace(1)* %in
-  store i1 %load, i1 addrspace(1)* %out, align 1
-  ret void
-}
-
-; FUNC-LABEL: {{^}}local_copy_i1_to_i1:
-; SI: ds_read_u8
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: ds_write_b8
-; SI: s_endpgm
-
-; EG: LDS_UBYTE_READ_RET
-; EG: AND_INT
-; EG: LDS_BYTE_WRITE
-define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
-  %load = load i1, i1 addrspace(3)* %in
-  store i1 %load, i1 addrspace(3)* %out, align 1
-  ret void
-}
-
-; FUNC-LABEL: {{^}}constant_copy_i1_to_i1:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: buffer_store_byte
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: AND_INT
-define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
-  %load = load i1, i1 addrspace(2)* %in
-  store i1 %load, i1 addrspace(1)* %out, align 1
-  ret void
-}
-
-; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: buffer_store_dword
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: BFE_INT
-define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1, i1 addrspace(1)* %in
-  %ext = sext i1 %load to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-
-define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1, i1 addrspace(1)* %in
-  %ext = zext i1 %load to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1, i1 addrspace(1)* %in
-  %ext = sext i1 %load to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1, i1 addrspace(1)* %in
-  %ext = zext i1 %load to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32
-; SI: buffer_store_byte
-; SI: s_endpgm
-define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
-  store i1 %x, i1 addrspace(1)* %out, align 1
-  ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_zext_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
-  %ext = zext i1 %x to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_zext_i64:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
-  %ext = zext i1 %x to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_sext_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
-  %ext = sext i1 %x to i32
-  store i32 %ext, i32addrspace(1)* %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_sext_i64:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: v_ashrrev_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
-  %ext = sext i1 %x to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}

Added: llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-f32.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,109 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}load_f32_local:
+; GCN: s_mov_b32 m0
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) #0 {
+entry:
+  %tmp0 = load float, float addrspace(3)* %in
+  store float %tmp0, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}load_v2f32_local:
+; GCN: s_mov_b32 m0
+; GCN: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) #0 {
+entry:
+  %tmp0 = load <2 x float>, <2 x float> addrspace(3)* %in
+  store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; FIXME: should only do one b64 load
+; FUNC-LABEL: {{^}}local_load_v3f32:
+; GCN: ds_read2_b64
+; GCN: s_waitcnt
+; GCN-DAG: ds_write_b64
+; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8{{$}}
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v3f32(<3 x float> addrspace(3)* %out, <3 x float> addrspace(3)* %in) #0 {
+entry:
+  %tmp0 = load <3 x float>, <3 x float> addrspace(3)* %in
+  store <3 x float> %tmp0, <3 x float> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4f32:
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4f32(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) #0 {
+entry:
+  %tmp0 = load <4 x float>, <4 x float> addrspace(3)* %in
+  store <4 x float> %tmp0, <4 x float> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8f32:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8f32(<8 x float> addrspace(3)* %out, <8 x float> addrspace(3)* %in) #0 {
+entry:
+  %tmp0 = load <8 x float>, <8 x float> addrspace(3)* %in
+  store <8 x float> %tmp0, <8 x float> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16f32:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16f32(<16 x float> addrspace(3)* %out, <16 x float> addrspace(3)* %in) #0 {
+entry:
+  %tmp0 = load <16 x float>, <16 x float> addrspace(3)* %in
+  store <16 x float> %tmp0, <16 x float> addrspace(3)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-f64.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,154 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_f64:
+; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
+; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 {
+  %ld = load double, double addrspace(3)* %in
+  store double %ld, double addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2f64:
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <2 x double>, <2 x double> addrspace(3)* %in
+  store <2 x double> %ld, <2 x double> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3f64:
+; GCN-DAG: ds_read2_b64
+; GCN-DAG: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <3 x double>, <3 x double> addrspace(3)* %in
+  store <3 x double> %ld, <3 x double> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4f64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <4 x double>, <4 x double> addrspace(3)* %in
+  store <4 x double> %ld, <4 x double> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8f64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <8 x double>, <8 x double> addrspace(3)* %in
+  store <8 x double> %ld, <8 x double> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16f64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <16 x double>, <16 x double> addrspace(3)* %in
+  store <16 x double> %ld, <16 x double> addrspace(3)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i1.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,371 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_i1:
+; GCN: ds_read_u8
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
+; GCN: ds_write_b8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: AND_INT
+; EG: LDS_BYTE_WRITE
+define void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+  %load = load i1, i1 addrspace(3)* %in
+  store i1 %load, i1 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i1:
+define void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+  store <2 x i1> %load, <2 x i1> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i1:
+define void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+  store <3 x i1> %load, <3 x i1> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i1:
+define void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+  store <4 x i1> %load, <4 x i1> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i1:
+define void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+  store <8 x i1> %load, <8 x i1> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i1:
+define void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+  store <16 x i1> %load, <16 x i1> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v32i1:
+define void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+  store <32 x i1> %load, <32 x i1> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v64i1:
+define void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+  store <64 x i1> %load, <64 x i1> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i1_to_i32:
+; GCN: ds_read_u8
+; GCN: ds_write_b32
+define void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+  %a = load i1, i1 addrspace(3)* %in
+  %ext = zext i1 %a to i32
+  store i32 %ext, i32 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i1_to_i32:
+; GCN: ds_read_u8
+; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; GCN: ds_write_b32
+
+; EG: LDS_UBYTE_READ_RET
+; EG: BFE_INT
+define void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+  %a = load i1, i1 addrspace(3)* %in
+  %ext = sext i1 %a to i32
+  store i32 %ext, i32 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32:
+define void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+  %ext = zext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32:
+define void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+  %ext = sext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32:
+define void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+  %ext = zext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32:
+define void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+  %ext = sext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32:
+define void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+  %ext = zext <3 x i1> %load to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32:
+define void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+  %ext = sext <3 x i1> %load to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32:
+define void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+  %ext = zext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32:
+define void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+  %ext = sext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32:
+define void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+  %ext = zext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32:
+define void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+  %ext = sext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32:
+define void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+  %ext = zext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32:
+define void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+  %ext = sext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32:
+define void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+  %ext = zext <32 x i1> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32:
+define void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+  %ext = sext <32 x i1> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32:
+define void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+  %ext = zext <64 x i1> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32:
+define void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+  %ext = sext <64 x i1> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i1_to_i64:
+; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; GCN: ds_write_b64
+define void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+  %a = load i1, i1 addrspace(3)* %in
+  %ext = zext i1 %a to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i1_to_i64:
+; GCN: ds_read_u8 [[LOAD:v[0-9]+]],
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; GCN: ds_write_b64
+define void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+  %a = load i1, i1 addrspace(3)* %in
+  %ext = sext i1 %a to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64:
+define void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+  %ext = zext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64:
+define void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+  %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+  %ext = sext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64:
+define void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+  %ext = zext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64:
+define void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+  %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+  %ext = sext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64:
+define void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+  %ext = zext <3 x i1> %load to <3 x i64>
+  store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64:
+define void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+  %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+  %ext = sext <3 x i1> %load to <3 x i64>
+  store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64:
+define void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+  %ext = zext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64:
+define void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+  %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+  %ext = sext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64:
+define void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+  %ext = zext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64:
+define void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+  %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+  %ext = sext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64:
+define void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+  %ext = zext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64:
+define void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+  %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+  %ext = sext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64:
+define void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+  %ext = zext <32 x i1> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64:
+define void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+  %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+  %ext = sext <32 x i1> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64:
+define void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+  %ext = zext <64 x i1> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64:
+define void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+  %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+  %ext = sext <64 x i1> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,608 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_i16:
+; GCN: ds_read_u16 v{{[0-9]+}}
+
+; EG: LDS_USHORT_READ_RET
+define void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
+entry:
+  %ld = load i16, i16 addrspace(3)* %in
+  store i16 %ld, i16 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i16:
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+  %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
+  store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i16:
+; GCN: ds_read_b64
+; GCN-DAG: ds_write_b32
+; GCN-DAG: ds_write_b16
+
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_READ_RET
+define void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
+  store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i16:
+; GCN: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+  %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
+  store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i16:
+; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
+entry:
+  %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
+  store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i16:
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
+entry:
+  %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
+  store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
+; GCN: ds_read_u16
+; GCN: ds_write_b32
+
+; EG: LDS_USHORT_READ_RET
+define void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+  %a = load i16, i16 addrspace(3)* %in
+  %ext = zext i16 %a to i32
+  store i32 %ext, i32 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i16
+
+; EG: LDS_USHORT_READ_RET
+; EG: BFE_INT
+define void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+  %a = load i16, i16 addrspace(3)* %in
+  %ext = sext i16 %a to i32
+  store i32 %ext, i32 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
+; GCN: ds_read_u16
+define void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+  %ext = zext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
+; GCN: ds_read_i16
+define void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+  %ext = sext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+define void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+  %ext = zext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+  %ext = sext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
+; GCN: ds_read_b64
+; GCN-DAG: ds_write_b32
+; GCN-DAG: ds_write_b64
+define void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
+  %ext = zext <3 x i16> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
+; GCN: ds_read_b64
+; GCN-DAG: ds_write_b32
+; GCN-DAG: ds_write_b64
+define void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+entry:
+  %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
+  %ext = sext <3 x i16> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+define void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+  %ext = zext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+  %ext = sext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+define void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+  %ext = zext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+define void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+  %ext = sext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+define void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+  %ext = zext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+define void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+  %ext = sext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+  %ext = zext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+  %ext = sext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+define void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+  %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+  %ext = zext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
+define void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+  %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+  %ext = sext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
+; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+define void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+  %a = load i16, i16 addrspace(3)* %in
+  %ext = zext i16 %a to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
+; GCN: ds_read_i16 v[[LO:[0-9]+]],
+; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+define void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+  %a = load i16, i16 addrspace(3)* %in
+  %ext = sext i16 %a to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
+define void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+  %ext = zext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
+define void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+  %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+  %ext = sext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
+define void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+  %ext = zext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
+define void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+  %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+  %ext = sext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
+define void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+  %ext = zext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
+define void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+  %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+  %ext = sext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
+define void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+  %ext = zext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
+define void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+  %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+  %ext = sext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
+define void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+  %ext = zext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
+define void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+  %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+  %ext = sext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
+define void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+  %ext = zext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
+define void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+  %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+  %ext = sext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
+; define void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+;   %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+;   %ext = zext <64 x i16> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+;   ret void
+; }
+
+; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
+; define void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+;   %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+;   %ext = sext <64 x i16> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+;   ret void
+; }
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i32.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,182 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}local_load_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0, -1
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+entry:
+  %ld = load i32, i32 addrspace(3)* %in
+  store i32 %ld, i32 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i32:
+; GCN: ds_read_b64
+define void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
+  store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i32:
+; GCN-DAG: ds_read_b64
+; GCN-DAG: ds_read_b32
+define void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
+  store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i32:
+; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+define void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
+  store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i32:
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+define void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
+  store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i32:
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:4{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:5{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
+define void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
+  store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
+define void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+  %ld = load i32, i32 addrspace(3)* %in
+  %ext = zext i32 %ld to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
+define void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+  %ld = load i32, i32 addrspace(3)* %in
+  %ext = sext i32 %ld to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
+define void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
+  %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
+  %ext = zext <1 x i32> %ld to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
+define void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
+  %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
+  %ext = sext <1 x i32> %ld to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
+define void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+  %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
+  %ext = zext <2 x i32> %ld to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
+define void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+  %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
+  %ext = sext <2 x i32> %ld to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
+define void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+  %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
+  %ext = zext <4 x i32> %ld to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
+define void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+  %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
+  %ext = sext <4 x i32> %ld to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
+define void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+  %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
+  %ext = zext <8 x i32> %ld to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
+define void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+  %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
+  %ext = sext <8 x i32> %ld to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
+define void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+  %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
+  %ext = sext <16 x i32> %ld to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
+define void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+  %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
+  %ext = zext <16 x i32> %ld to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
+define void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
+  %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
+  %ext = sext <32 x i32> %ld to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
+define void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
+  %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
+  %ext = zext <32 x i32> %ld to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i64.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,154 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_i64:
+; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
+; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace(3)* %in) #0 {
+  %ld = load i64, i64 addrspace(3)* %in
+  store i64 %ld, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i64:
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v2i64(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in
+  store <2 x i64> %ld, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i64:
+; GCN-DAG: ds_read2_b64
+; GCN-DAG: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <3 x i64>, <3 x i64> addrspace(3)* %in
+  store <3 x i64> %ld, <3 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4i64(<4 x i64> addrspace(3)* %out, <4 x i64> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <4 x i64>, <4 x i64> addrspace(3)* %in
+  store <4 x i64> %ld, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8i64(<8 x i64> addrspace(3)* %out, <8 x i64> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <8 x i64>, <8 x i64> addrspace(3)* %in
+  store <8 x i64> %ld, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16i64(<16 x i64> addrspace(3)* %out, <16 x i64> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <16 x i64>, <16 x i64> addrspace(3)* %in
+  store <16 x i64> %ld, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll?rev=271571&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i8.ll Thu Jun  2 14:54:26 2016
@@ -0,0 +1,562 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}local_load_i8:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+define void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+entry:
+  %ld = load i8, i8 addrspace(3)* %in
+  store i8 %ld, i8 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i8:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u16
+
+; EG: LDS_USHORT_READ_RET
+define void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in
+  store <2 x i8> %ld, <2 x i8> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i8:
+; GCN: ds_read_b32
+
+; EG: DS_READ_RET
+define void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
+  store <3 x i8> %ld, <3 x i8> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i8:
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in
+  store <4 x i8> %ld, <4 x i8> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i8:
+; GCN: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in
+  store <8 x i8> %ld, <8 x i8> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i8:
+; GCN: ds_read2_b64  v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset0:1{{$}}
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in
+  store <16 x i8> %ld, <16 x i8> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i8_to_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+define void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+  %a = load i8, i8 addrspace(3)* %in
+  %ext = zext i8 %a to i32
+  store i32 %ext, i32 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i8_to_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: BFE_INT
+define void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+  %ld = load i8, i8 addrspace(3)* %in
+  %ext = sext i8 %ld to i32
+  store i32 %ext, i32 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
+define void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+  %ext = zext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32:
+define void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+  %ext = sext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+define void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+  %ext = zext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+  %ext = sext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32:
+; GCN: ds_read_b32
+
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
+; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
+define void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
+  %ext = zext <3 x i8> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_b32
+
+; GCN-DAG: v_bfe_i32
+; GCN-DAG: v_bfe_i32
+; GCN-DAG: v_bfe_i32
+; GCN-DAG: v_bfe_i32
+
+; GCN-DAG: ds_write_b64
+; GCN-DAG: ds_write_b32
+
+define void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+entry:
+  %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
+  %ext = sext <3 x i8> %ld to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+define void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+  %ext = zext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+  %ext = sext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32:
+define void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+  %ext = zext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32:
+define void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+  %ext = sext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32:
+define void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+  %ext = zext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32:
+define void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+  %ext = sext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32:
+define void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+  %ext = zext <32 x i8> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32:
+define void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+  %ext = sext <32 x i8> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32:
+define void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+  %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+  %ext = zext <64 x i8> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32:
+define void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+  %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+  %ext = sext <64 x i8> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i8_to_i64:
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+define void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+  %a = load i8, i8 addrspace(3)* %in
+  %ext = zext i8 %a to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i8_to_i64:
+; GCN: ds_read_i8 v[[LO:[0-9]+]],
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+  %a = load i8, i8 addrspace(3)* %in
+  %ext = sext i8 %a to i64
+  store i64 %ext, i64 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64:
+define void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+  %ext = zext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64:
+define void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+  %ext = sext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
+define void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+  %ext = zext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64:
+define void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+  %ext = sext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
+define void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+  %ext = zext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
+define void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+  %ext = sext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64:
+define void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+  %ext = zext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64:
+define void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+  %ext = sext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64:
+define void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+  %ext = zext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64:
+define void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+  %ext = sext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64:
+define void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+  %ext = zext <32 x i8> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64:
+define void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+  %ext = sext <32 x i8> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
+; define void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+;   %ext = zext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
+; define void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+;   %ext = sext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}local_zextload_i8_to_i16:
+; GCN: ds_read_u8 v[[VAL:[0-9]+]],
+; GCN: ds_write_b16 v[[VAL:[0-9]+]]
+define void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+  %a = load i8, i8 addrspace(3)* %in
+  %ext = zext i8 %a to i16
+  store i16 %ext, i16 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i8_to_i16:
+; GCN: ds_read_i8 v[[VAL:[0-9]+]],
+; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]]
+define void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+  %a = load i8, i8 addrspace(3)* %in
+  %ext = sext i8 %a to i16
+  store i16 %ext, i16 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16:
+define void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+  %ext = zext <1 x i8> %load to <1 x i16>
+  store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16:
+define void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+  %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+  %ext = sext <1 x i8> %load to <1 x i16>
+  store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16:
+define void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+  %ext = zext <2 x i8> %load to <2 x i16>
+  store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16:
+define void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+  %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+  %ext = sext <2 x i8> %load to <2 x i16>
+  store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16:
+define void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+  %ext = zext <4 x i8> %load to <4 x i16>
+  store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
+define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+  %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+  %ext = sext <4 x i8> %load to <4 x i16>
+  store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16:
+define void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+  %ext = zext <8 x i8> %load to <8 x i16>
+  store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16:
+define void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+  %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+  %ext = sext <8 x i8> %load to <8 x i16>
+  store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16:
+define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+  %ext = zext <16 x i8> %load to <16 x i16>
+  store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16:
+define void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+  %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+  %ext = sext <16 x i8> %load to <16 x i16>
+  store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16:
+define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+  %ext = zext <32 x i8> %load to <32 x i16>
+  store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16:
+define void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+  %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+  %ext = sext <32 x i8> %load to <32 x i16>
+  store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
+; define void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+;   %ext = zext <64 x i8> %load to <64 x i16>
+;   store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
+; define void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+;   %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+;   %ext = sext <64 x i8> %load to <64 x i16>
+;   store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
+;   ret void
+; }
+
+attributes #0 = { nounwind }

Removed: llvm/trunk/test/CodeGen/AMDGPU/load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load.ll (removed)
@@ -1,750 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-;===------------------------------------------------------------------------===;
-; GLOBAL ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load an i8 value from the global address space.
-; FUNC-LABEL: {{^}}load_i8:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
-; CI-HSA: flat_load_ubyte
-define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8, i8 addrspace(1)* %in
-  %2 = zext i8 %1 to i32
-  store i32 %2, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_i8_sext:
-; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 8
-; SI-NOHSA: buffer_load_sbyte
-; CI-HSA: flat_load_sbyte
-define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-entry:
-  %0 = load i8, i8 addrspace(1)* %in
-  %1 = sext i8 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8:
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
-entry:
-  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
-  %1 = zext <2 x i8> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_sext:
-; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: 8
-; R600-DAG: 8
-
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
-entry:
-  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
-  %1 = sext <2 x i8> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8:
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
-entry:
-  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
-  %1 = zext <4 x i8> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_sext:
-; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
-; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
-; R600-DAG: 8
-; R600-DAG: 8
-; R600-DAG: 8
-; R600-DAG: 8
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
-entry:
-  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
-  %1 = sext <4 x i8> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; Load an i16 value from the global address space.
-; FUNC-LABEL: {{^}}load_i16:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
-entry:
-  %0 = load i16	, i16	 addrspace(1)* %in
-  %1 = zext i16 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_i16_sext:
-; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 16
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
-entry:
-  %0 = load i16, i16 addrspace(1)* %in
-  %1 = sext i16 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16:
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
-entry:
-  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %1 = zext <2 x i16> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_sext:
-; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: 16
-; R600-DAG: 16
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
-entry:
-  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %1 = sext <2 x i16> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}global_load_v3i16:
-; SI-NOHSA: buffer_load_dwordx2 v
-; SI-HSA: flat_load_dwordx2 v
-
-; R600-DAG: VTX_READ_32
-; R600-DAG: VTX_READ_16
-define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
-entry:
-  %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
-  store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16:
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
-entry:
-  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
-  %1 = zext <4 x i16> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_sext:
-; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
-; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
-; R600-DAG: 16
-; R600-DAG: 16
-; R600-DAG: 16
-; R600-DAG: 16
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
-entry:
-  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
-  %1 = sext <4 x i16> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; load an i32 value from the global address space.
-; FUNC-LABEL: {{^}}load_i32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
-; CI-HSA: flat_load_dword
-define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
-  %0 = load i32, i32 addrspace(1)* %in
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
-; load a f32 value from the global address space.
-; FUNC-LABEL: {{^}}load_f32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
-; CI-HSA: flat_load_dword
-define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-entry:
-  %0 = load float, float addrspace(1)* %in
-  store float %0, float addrspace(1)* %out
-  ret void
-}
-
-; load a v2f32 value from the global address space
-; FUNC-LABEL: {{^}}load_v2f32:
-; R600: MEM_RAT
-; R600: VTX_READ_64
-; SI-NOHSA: buffer_load_dwordx2
-; CI-HSA: flat_load_dwordx2
-define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
-entry:
-  %0 = load <2 x float>, <2 x float> addrspace(1)* %in
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64:
-; R600: VTX_READ_64
-; SI-NOHSA: buffer_load_dwordx2
-; CI-HSA: flat_load_dwordx2
-define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-entry:
-  %0 = load i64, i64 addrspace(1)* %in
-  store i64 %0, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64_sext:
-; R600: MEM_RAT
-; R600: MEM_RAT
-; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
-; R600: 31
-; SI-NOHSA: buffer_load_dword
-; CI-HSA: flat_load_dword
-
-define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
-  %0 = load i32, i32 addrspace(1)* %in
-  %1 = sext i32 %0 to i64
-  store i64 %1, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64_zext:
-; R600: MEM_RAT
-; R600: MEM_RAT
-define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
-  %0 = load i32, i32 addrspace(1)* %in
-  %1 = zext i32 %0 to i64
-  store i64 %1, i64 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v8i32:
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
-entry:
-  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
-  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v16i32:
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
-entry:
-  %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
-  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
-  ret void
-}
-
-;===------------------------------------------------------------------------===;
-; CONSTANT ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load a sign-extended i8 value
-; FUNC-LABEL: {{^}}load_const_i8_sext:
-; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 8
-; SI-NOHSA: buffer_load_sbyte v{{[0-9]+}},
-; CI-HSA: flat_load_sbyte v{{[0-9]+}},
-define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
-  %0 = load i8, i8 addrspace(2)* %in
-  %1 = sext i8 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; Load an aligned i8 value
-; FUNC-LABEL: {{^}}load_const_i8_aligned:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
-; CI-HSA: flat_load_ubyte v{{[0-9]+}},
-define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
-  %0 = load i8, i8 addrspace(2)* %in
-  %1 = zext i8 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; Load an un-aligned i8 value
-; FUNC-LABEL: {{^}}load_const_i8_unaligned:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
-; CI-HSA: flat_load_ubyte v{{[0-9]+}},
-define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
-  %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
-  %1 = load i8, i8 addrspace(2)* %0
-  %2 = zext i8 %1 to i32
-  store i32 %2, i32 addrspace(1)* %out
-  ret void
-}
-
-; Load a sign-extended i16 value
-; FUNC-LABEL: {{^}}load_const_i16_sext:
-; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 16
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
-  %0 = load i16, i16 addrspace(2)* %in
-  %1 = sext i16 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; Load an aligned i16 value
-; FUNC-LABEL: {{^}}load_const_i16_aligned:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
-  %0 = load i16, i16 addrspace(2)* %in
-  %1 = zext i16 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; Load an un-aligned i16 value
-; FUNC-LABEL: {{^}}load_const_i16_unaligned:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
-  %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
-  %1 = load i16, i16 addrspace(2)* %0
-  %2 = zext i16 %1 to i32
-  store i32 %2, i32 addrspace(1)* %out
-  ret void
-}
-
-; Load an i32 value from the constant address space.
-; FUNC-LABEL: {{^}}load_const_addrspace_i32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: s_load_dword s{{[0-9]+}}
-define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
-entry:
-  %0 = load i32, i32 addrspace(2)* %in
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
-; Load a f32 value from the constant address space.
-; FUNC-LABEL: {{^}}load_const_addrspace_f32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: s_load_dword s{{[0-9]+}}
-define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
-  %1 = load float, float addrspace(2)* %in
-  store float %1, float addrspace(1)* %out
-  ret void
-}
-
-;===------------------------------------------------------------------------===;
-; LOCAL ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load an i8 value from the local address space.
-; FUNC-LABEL: {{^}}load_i8_local:
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
-  %1 = load i8, i8 addrspace(3)* %in
-  %2 = zext i8 %1 to i32
-  store i32 %2, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_i8_sext_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
-entry:
-  %0 = load i8, i8 addrspace(3)* %in
-  %1 = sext i8 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-; SI: ds_read_u8
-define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
-entry:
-  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
-  %1 = zext <2 x i8> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_sext_local:
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-; SI: ds_read_i8
-define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
-entry:
-  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
-  %1 = sext <2 x i8> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
-entry:
-  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
-  %1 = zext <4 x i8> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_sext_local:
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-; SI: ds_read_i8
-; SI: ds_read_i8
-; SI: ds_read_i8
-define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
-entry:
-  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
-  %1 = sext <4 x i8> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; Load an i16 value from the local address space.
-; FUNC-LABEL: {{^}}load_i16_local:
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
-entry:
-  %0 = load i16	, i16	 addrspace(3)* %in
-  %1 = zext i16 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_i16_sext_local:
-; R600: LDS_USHORT_READ_RET
-; R600: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
-entry:
-  %0 = load i16, i16 addrspace(3)* %in
-  %1 = sext i16 %0 to i32
-  store i32 %1, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_local:
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-; SI: ds_read_u16
-define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
-entry:
-  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
-  %1 = zext <2 x i16> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_sext_local:
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-; SI: ds_read_i16
-define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
-entry:
-  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
-  %1 = sext <2 x i16> %0 to <2 x i32>
-  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_local:
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
-entry:
-  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
-  %1 = zext <4 x i16> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_sext_local:
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-; SI: ds_read_i16
-; SI: ds_read_i16
-; SI: ds_read_i16
-define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
-entry:
-  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
-  %1 = sext <4 x i16> %0 to <4 x i32>
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; load an i32 value from the local address space.
-; FUNC-LABEL: {{^}}load_i32_local:
-; R600: LDS_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_b32
-define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
-entry:
-  %0 = load i32, i32 addrspace(3)* %in
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
-; load a f32 value from the local address space.
-; FUNC-LABEL: {{^}}load_f32_local:
-; R600: LDS_READ_RET
-; SI: s_mov_b32 m0
-; SI: ds_read_b32
-define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
-entry:
-  %0 = load float, float addrspace(3)* %in
-  store float %0, float addrspace(1)* %out
-  ret void
-}
-
-; load a v2f32 value from the local address space
-; FUNC-LABEL: {{^}}load_v2f32_local:
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; SI: s_mov_b32 m0
-; SI: ds_read_b64
-define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
-entry:
-  %0 = load <2 x float>, <2 x float> addrspace(3)* %in
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
-  ret void
-}
-
-; Test loading a i32 and v2i32 value from the same base pointer.
-; FUNC-LABEL: {{^}}load_i32_v2i32_local:
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_read2_b32
-define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
-  %scalar = load i32, i32 addrspace(3)* %in
-  %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
-  %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
-  %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
-  %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
-  %vec = add <2 x i32> %vec0, %vec1
-  store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-
- at lds = addrspace(3) global [512 x i32] undef, align 4
-
-; On SI we need to make sure that the base offset is a register and not
-; an immediate.
-; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
-; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
-; SI: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4
-; R600: LDS_READ_RET
-define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
-entry:
-  %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
-  %tmp1 = load i32, i32 addrspace(3)* %tmp0
-  %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
-  store i32 %tmp1, i32 addrspace(1)* %tmp2
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/AMDGPU/load.vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load.vec.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load.vec.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load.vec.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG  %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI  %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI  %s
-
-; load a v2i32 value from the global address space.
-; EG: {{^}}load_v2i32:
-; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
-; SI: {{^}}load_v2i32:
-; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
-  store <2 x i32> %a, <2 x i32> addrspace(1)* %out
-  ret void
-}
-
-; load a v4i32 value from the global address space.
-; EG: {{^}}load_v4i32:
-; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
-; SI: {{^}}load_v4i32:
-; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
-define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
-  store <4 x i32> %a, <4 x i32> addrspace(1)* %out
-  ret void
-}

Removed: llvm/trunk/test/CodeGen/AMDGPU/load64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load64.ll?rev=271570&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load64.ll (removed)
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; load a f64 value from the global address space.
-; CHECK-LABEL: {{^}}load_f64:
-; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-  %1 = load double, double addrspace(1)* %in
-  store double %1, double addrspace(1)* %out
-  ret void
-}
-
-; CHECK-LABEL: {{^}}load_i64:
-; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %tmp = load i64, i64 addrspace(1)* %in
-  store i64 %tmp, i64 addrspace(1)* %out, align 8
-  ret void
-}
-
-; Load a f64 value from the constant address space.
-; CHECK-LABEL: {{^}}load_const_addrspace_f64:
-; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
-  %1 = load double, double addrspace(2)* %in
-  store double %1, double addrspace(1)* %out
-  ret void
-}

Modified: llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll?rev=271571&r1=271570&r2=271571&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-memory.ll Thu Jun  2 14:54:26 2016
@@ -43,6 +43,41 @@ entry:
   ret void
 }
 
+ at lds = addrspace(3) global [512 x i32] undef, align 4
+
+; On SI we need to make sure that the base offset is a register and not
+; an immediate.
+; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
+; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
+; GCN: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4
+; R600: LDS_READ_RET
+define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
+  %tmp1 = load i32, i32 addrspace(3)* %tmp0
+  %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+  store i32 %tmp1, i32 addrspace(1)* %tmp2
+  ret void
+}
+
+; Test loading a i32 and v2i32 value from the same base pointer.
+; FUNC-LABEL: {{^}}load_i32_v2i32_local:
+; R600: LDS_READ_RET
+; R600: LDS_READ_RET
+; R600: LDS_READ_RET
+; GCN-DAG: ds_read_b32
+; GCN-DAG: ds_read2_b32
+define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
+  %scalar = load i32, i32 addrspace(3)* %in
+  %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
+  %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
+  %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
+  %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
+  %vec = add <2 x i32> %vec0, %vec1
+  store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
 declare i32 @llvm.r600.read.tidig.x() #0
 declare void @llvm.AMDGPU.barrier.local()
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll?rev=271571&r1=271570&r2=271571&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/unaligned-load-store.ll Thu Jun  2 14:54:26 2016
@@ -1,300 +1,387 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}unaligned_load_store_i16_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_unaligned_load_store_i16:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 {
   %v = load i16, i16 addrspace(3)* %p, align 1
   store i16 %v, i16 addrspace(3)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}unaligned_load_store_i16_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: s_endpgm
-define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}unaligned_load_store_i16_global:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
   %v = load i16, i16 addrspace(1)* %p, align 1
   store i16 %v, i16 addrspace(1)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}unaligned_load_store_i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
   %v = load i32, i32 addrspace(3)* %p, align 1
   store i32 %v, i32 addrspace(3)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}unaligned_load_store_i32_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}global_unaligned_load_store_i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
   %v = load i32, i32 addrspace(1)* %p, align 1
   store i32 %v, i32 addrspace(1)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}align2_load_store_i32_global:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_store_short
-; SI: buffer_store_short
-define void @align2_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}global_align2_load_store_i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_store_short
+; GCN-NOHSA: buffer_store_short
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_store_short
+; GCN-HSA: flat_store_short
+define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
   %v = load i32, i32 addrspace(1)* %p, align 2
   store i32 %v, i32 addrspace(1)* %r, align 2
   ret void
 }
 
-; SI-LABEL: {{^}}align2_load_store_i32_local:
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_write_b16
-; SI: ds_write_b16
-define void @align2_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+; FUNC-LABEL: {{^}}local_align2_load_store_i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_write_b16
+; GCN: ds_write_b16
+define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
   %v = load i32, i32 addrspace(3)* %p, align 2
   store i32 %v, i32 addrspace(3)* %r, align 2
   ret void
 }
 
 ; FIXME: Unnecessary packing and unpacking of bytes.
-; SI-LABEL: {{^}}unaligned_load_store_i64_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
   %v = load i64, i64 addrspace(3)* %p, align 1
   store i64 %v, i64 addrspace(3)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}unaligned_load_store_v2i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_v2i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
   %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
   store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}unaligned_load_store_i64_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-
-; XSI-NOT: v_or_
-; XSI-NOT: v_lshl
-
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
+; FUNC-LABEL: {{^}}unaligned_load_store_i64_global:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; XGCN-NOT: v_or_
+; XGCN-NOT: v_lshl
+
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
   %v = load i64, i64 addrspace(1)* %p, align 1
   store i64 %v, i64 addrspace(1)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}unaligned_load_store_v4i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 {
   %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
   store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
   ret void
 }
 
-; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded.
-; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}global_unaligned_load_store_v4i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 {
   %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
   store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
   ret void
 }
 
-; SI-LABEL: {{^}}load_lds_i64_align_4:
-; SI: ds_read2_b32
-; SI: s_endpgm
-define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; FUNC-LABEL: {{^}}local_load_i64_align_4:
+; GCN: ds_read2_b32
+define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
   %val = load i64, i64 addrspace(3)* %in, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
-; SI: s_endpgm
-define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset
+; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
+define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
   %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
   %val = load i64, i64 addrspace(3)* %ptr, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
+; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
-; SI: s_endpgm
-define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
+; GCN: s_endpgm
+define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
   %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
   %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
   %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
@@ -303,49 +390,95 @@ define void @load_lds_i64_align_4_with_s
   ret void
 }
 
-; SI-LABEL: {{^}}load_lds_i64_align_1:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-
-define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; FUNC-LABEL: {{^}}local_load_i64_align_1:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: store_dwordx2
+define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
   %val = load i64, i64 addrspace(3)* %in, align 1
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: {{^}}store_lds_i64_align_4:
-; SI: ds_write2_b32
-; SI: s_endpgm
-define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
+; FUNC-LABEL: {{^}}local_store_i64_align_4:
+; GCN: ds_write2_b32
+define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
   store i64 %val, i64 addrspace(3)* %out, align 4
   ret void
 }
 
-; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
-; SI: s_endpgm
-define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
+; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
+; GCN: s_endpgm
+define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
   %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
   store i64 0, i64 addrspace(3)* %ptr, align 4
   ret void
 }
 
-; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
+; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
-; SI: s_endpgm
-define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; GCN: s_endpgm
+define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
   %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
   %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
   %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
   store i64 0, i64 addrspace(3)* %out, align 4
   ret void
 }
+
+; FUNC-LABEL: {{^}}constant_load_unaligned_i16:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @constant_load_unaligned_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
+entry:
+  %tmp0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
+  %tmp1 = load i16, i16 addrspace(2)* %tmp0
+  %tmp2 = zext i16 %tmp1 to i32
+  store i32 %tmp2, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_unaligned_i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+define void @constant_load_unaligned_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+entry:
+  %tmp0 = load i32, i32 addrspace(2)* %in, align 1
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_unaligned_f32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+define void @constant_load_unaligned_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
+  %tmp1 = load float, float addrspace(2)* %in, align 1
+  store float %tmp1, float addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }




More information about the llvm-commits mailing list